Blame SOURCES/013-rolling-upgrade-monitor.patch

46037d
From a35dfe0b76555f30dda4c9d96630866de40322b3 Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Tue, 13 Sep 2022 14:40:24 -0500
46037d
Subject: [PATCH 01/24] Low: fencing: use a default timeout with metadata and
46037d
 validate
46037d
46037d
If the caller did not specify a timeout, use a default in
46037d
stonith_api_operations_t:metadata() and validate(). (Timeout is currently
46037d
ignored past that point, so this has no effect yet.)
46037d
46037d
Also, rename timeout argument for clarity.
46037d
---
46037d
 lib/fencing/st_client.c | 23 ++++++++++++++++-------
46037d
 1 file changed, 16 insertions(+), 7 deletions(-)
46037d
46037d
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
46037d
index 2b0d308..28791ff 100644
46037d
--- a/lib/fencing/st_client.c
46037d
+++ b/lib/fencing/st_client.c
46037d
@@ -504,7 +504,8 @@ stonith_api_device_list(stonith_t * stonith, int call_options, const char *names
46037d
 
46037d
 static int
46037d
 stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent,
46037d
-                            const char *namespace, char **output, int timeout)
46037d
+                            const char *namespace, char **output,
46037d
+                            int timeout_sec)
46037d
 {
46037d
     /* By executing meta-data directly, we can get it from stonith_admin when
46037d
      * the cluster is not running, which is important for higher-level tools.
46037d
@@ -512,16 +513,20 @@ stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *a
46037d
 
46037d
     enum stonith_namespace ns = stonith_get_namespace(agent, namespace);
46037d
 
46037d
+    if (timeout_sec <= 0) {
46037d
+        timeout_sec = CRMD_METADATA_CALL_TIMEOUT;
46037d
+    }
46037d
+
46037d
     crm_trace("Looking up metadata for %s agent %s",
46037d
               stonith_namespace2text(ns), agent);
46037d
 
46037d
     switch (ns) {
46037d
         case st_namespace_rhcs:
46037d
-            return stonith__rhcs_metadata(agent, timeout, output);
46037d
+            return stonith__rhcs_metadata(agent, timeout_sec, output);
46037d
 
46037d
 #if HAVE_STONITH_STONITH_H
46037d
         case st_namespace_lha:
46037d
-            return stonith__lha_metadata(agent, timeout, output);
46037d
+            return stonith__lha_metadata(agent, timeout_sec, output);
46037d
 #endif
46037d
 
46037d
         default:
46037d
@@ -1684,8 +1689,8 @@ stonith_api_delete(stonith_t * stonith)
46037d
 static int
46037d
 stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id,
46037d
                      const char *namespace_s, const char *agent,
46037d
-                     stonith_key_value_t *params, int timeout, char **output,
46037d
-                     char **error_output)
46037d
+                     stonith_key_value_t *params, int timeout_sec,
46037d
+                     char **output, char **error_output)
46037d
 {
46037d
     /* Validation should be done directly via the agent, so we can get it from
46037d
      * stonith_admin when the cluster is not running, which is important for
46037d
@@ -1731,17 +1736,21 @@ stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id,
46037d
         *error_output = NULL;
46037d
     }
46037d
 
46037d
+    if (timeout_sec <= 0) {
46037d
+        timeout_sec = CRMD_METADATA_CALL_TIMEOUT; // Questionable
46037d
+    }
46037d
+
46037d
     switch (stonith_get_namespace(agent, namespace_s)) {
46037d
         case st_namespace_rhcs:
46037d
             rc = stonith__rhcs_validate(st, call_options, target, agent,
46037d
-                                        params_table, host_arg, timeout,
46037d
+                                        params_table, host_arg, timeout_sec,
46037d
                                         output, error_output);
46037d
             break;
46037d
 
46037d
 #if HAVE_STONITH_STONITH_H
46037d
         case st_namespace_lha:
46037d
             rc = stonith__lha_validate(st, call_options, target, agent,
46037d
-                                       params_table, timeout, output,
46037d
+                                       params_table, timeout_sec, output,
46037d
                                        error_output);
46037d
             break;
46037d
 #endif
46037d
-- 
46037d
2.31.1
46037d
46037d
From c2a863b7daeb829c0210d87a2f1503c1cf4dc7a5 Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Tue, 13 Sep 2022 14:00:00 -0500
46037d
Subject: [PATCH 02/24] Doc: fencer: improve
46037d
 stonith_api_operations_t:metadata() description
46037d
46037d
---
46037d
 include/crm/stonith-ng.h | 15 +++++++++++----
46037d
 lib/fencing/st_client.c  |  7 ++++---
46037d
 2 files changed, 15 insertions(+), 7 deletions(-)
46037d
46037d
diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h
46037d
index 4fe52ef..a41d411 100644
46037d
--- a/include/crm/stonith-ng.h
46037d
+++ b/include/crm/stonith-ng.h
46037d
@@ -206,14 +206,21 @@ typedef struct stonith_api_operations_s
46037d
         stonith_t *st, int options, const char *node, int level, stonith_key_value_t *device_list);
46037d
 
46037d
     /*!
46037d
-     * \brief Get the metadata documentation for a resource.
46037d
+     * \brief Retrieve a fence agent's metadata
46037d
      *
46037d
-     * \note Value is returned in output.  Output must be freed when set.
46037d
+     * \param[in,out] stonith       Fencer connection
46037d
+     * \param[in]     call_options  Group of enum stonith_call_options
46037d
+     *                              (currently ignored)
46037d
+     * \param[in]     agent         Fence agent to query
46037d
+     * \param[in]     namespace     Namespace of fence agent to query (optional)
46037d
+     * \param[out]    output        Where to store metadata
46037d
+     * \param[in]     timeout_sec   Error if not complete within this time
46037d
      *
46037d
      * \return Legacy Pacemaker return code
46037d
+     * \note The caller is responsible for freeing *output using free().
46037d
      */
46037d
-    int (*metadata)(stonith_t *st, int options,
46037d
-            const char *device, const char *provider, char **output, int timeout);
46037d
+    int (*metadata)(stonith_t *stonith, int call_options, const char *agent,
46037d
+                    const char *namespace, char **output, int timeout_sec);
46037d
 
46037d
     /*!
46037d
      * \brief Retrieve a list of installed stonith agents
46037d
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
46037d
index 28791ff..6c252bc 100644
46037d
--- a/lib/fencing/st_client.c
46037d
+++ b/lib/fencing/st_client.c
46037d
@@ -502,10 +502,11 @@ stonith_api_device_list(stonith_t * stonith, int call_options, const char *names
46037d
     return count;
46037d
 }
46037d
 
46037d
+// See stonith_api_operations_t:metadata() documentation
46037d
 static int
46037d
-stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent,
46037d
-                            const char *namespace, char **output,
46037d
-                            int timeout_sec)
46037d
+stonith_api_device_metadata(stonith_t *stonith, int call_options,
46037d
+                            const char *agent, const char *namespace,
46037d
+                            char **output, int timeout_sec)
46037d
 {
46037d
     /* By executing meta-data directly, we can get it from stonith_admin when
46037d
      * the cluster is not running, which is important for higher-level tools.
46037d
-- 
46037d
2.31.1
46037d
46037d
From 9beff34a0d39425ef470e59e251a8ca7c08e69a0 Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Tue, 13 Sep 2022 14:16:54 -0500
46037d
Subject: [PATCH 03/24] Doc: fencing: add doxygen block for
46037d
 stonith__action_create()
46037d
46037d
... and rename a couple arguments for clarity
46037d
---
46037d
 include/crm/fencing/internal.h |  4 ++--
46037d
 lib/fencing/st_actions.c       | 33 ++++++++++++++++++++++++---------
46037d
 2 files changed, 26 insertions(+), 11 deletions(-)
46037d
46037d
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
46037d
index d2b49f8..e2ca85e 100644
46037d
--- a/include/crm/fencing/internal.h
46037d
+++ b/include/crm/fencing/internal.h
46037d
@@ -50,10 +50,10 @@ struct stonith_action_s;
46037d
 typedef struct stonith_action_s stonith_action_t;
46037d
 
46037d
 stonith_action_t *stonith_action_create(const char *agent,
46037d
-                                        const char *_action,
46037d
+                                        const char *action_name,
46037d
                                         const char *victim,
46037d
                                         uint32_t victim_nodeid,
46037d
-                                        int timeout,
46037d
+                                        int timeout_sec,
46037d
                                         GHashTable * device_args,
46037d
                                         GHashTable * port_map,
46037d
                                         const char * host_arg);
46037d
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
46037d
index b3429f6..d16fa33 100644
46037d
--- a/lib/fencing/st_actions.c
46037d
+++ b/lib/fencing/st_actions.c
46037d
@@ -232,27 +232,42 @@ stonith__action_result(stonith_action_t *action)
46037d
 }
46037d
 
46037d
 #define FAILURE_MAX_RETRIES 2
46037d
+
46037d
+/*!
46037d
+ * \internal
46037d
+ * \brief Create a new fencing action to be executed
46037d
+ *
46037d
+ * \param[in] agent          Fence agent to use
46037d
+ * \param[in] action_name    Fencing action to be executed
46037d
+ * \param[in] victim         Name of target of fencing action (if known)
46037d
+ * \param[in] victim_nodeid  Node ID of target of fencing action (if known)
46037d
+ * \param[in] timeout_sec    Timeout to be used when executing action
46037d
+ * \param[in] device_args    Parameters to pass to fence agent
46037d
+ * \param[in] port_map       Mapping of target names to device ports
46037d
+ * \param[in] host_arg       Agent parameter used to pass target name
46037d
+ *
46037d
+ * \return Newly created fencing action (asserts on error, never NULL)
46037d
+ */
46037d
 stonith_action_t *
46037d
 stonith_action_create(const char *agent,
46037d
-                      const char *_action,
46037d
+                      const char *action_name,
46037d
                       const char *victim,
46037d
                       uint32_t victim_nodeid,
46037d
-                      int timeout, GHashTable * device_args,
46037d
+                      int timeout_sec, GHashTable * device_args,
46037d
                       GHashTable * port_map, const char *host_arg)
46037d
 {
46037d
-    stonith_action_t *action;
46037d
+    stonith_action_t *action = calloc(1, sizeof(stonith_action_t));
46037d
 
46037d
-    action = calloc(1, sizeof(stonith_action_t));
46037d
     CRM_ASSERT(action != NULL);
46037d
 
46037d
-    action->args = make_args(agent, _action, victim, victim_nodeid,
46037d
+    action->args = make_args(agent, action_name, victim, victim_nodeid,
46037d
                              device_args, port_map, host_arg);
46037d
     crm_debug("Preparing '%s' action for %s using agent %s",
46037d
-              _action, (victim? victim : "no target"), agent);
46037d
+              action_name, (victim? victim : "no target"), agent);
46037d
     action->agent = strdup(agent);
46037d
-    action->action = strdup(_action);
46037d
+    action->action = strdup(action_name);
46037d
     pcmk__str_update(&action->victim, victim);
46037d
-    action->timeout = action->remaining_timeout = timeout;
46037d
+    action->timeout = action->remaining_timeout = timeout_sec;
46037d
     action->max_retries = FAILURE_MAX_RETRIES;
46037d
 
46037d
     pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN,
46037d
@@ -262,7 +277,7 @@ stonith_action_create(const char *agent,
46037d
         char buffer[512];
46037d
         const char *value = NULL;
46037d
 
46037d
-        snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", _action);
46037d
+        snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", action_name);
46037d
         value = g_hash_table_lookup(device_args, buffer);
46037d
 
46037d
         if (value) {
46037d
-- 
46037d
2.31.1
46037d
46037d
From 3001cb016eefff55c55e709247b0c14c331fb330 Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Tue, 13 Sep 2022 14:20:24 -0500
46037d
Subject: [PATCH 04/24] Low: fencing: use requested timeout with RHCS metadata
46037d
 actions
46037d
46037d
... instead of hardcoded 5 seconds, and rename timeout argument for clarity
46037d
---
46037d
 lib/fencing/st_rhcs.c | 35 ++++++++++++++++-------------------
46037d
 1 file changed, 16 insertions(+), 19 deletions(-)
46037d
46037d
diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c
46037d
index dfccff2..5e600d2 100644
46037d
--- a/lib/fencing/st_rhcs.c
46037d
+++ b/lib/fencing/st_rhcs.c
46037d
@@ -112,25 +112,24 @@ stonith_rhcs_parameter_not_required(xmlNode *metadata, const char *parameter)
46037d
 }
46037d
 
46037d
 /*!
46037d
- * \brief Execute RHCS-compatible agent's meta-data action
46037d
+ * \brief Execute RHCS-compatible agent's metadata action
46037d
  *
46037d
- * \param[in]  agent    Agent to execute
46037d
- * \param[in]  timeout  Action timeout
46037d
- * \param[out] metadata Where to store output xmlNode (or NULL to ignore)
46037d
- *
46037d
- * \todo timeout is currently ignored; shouldn't we use it?
46037d
+ * \param[in]  agent        Agent to execute
46037d
+ * \param[in]  timeout_sec  Action timeout
46037d
+ * \param[out] metadata     Where to store output xmlNode (or NULL to ignore)
46037d
  */
46037d
 static int
46037d
-stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata)
46037d
+stonith__rhcs_get_metadata(const char *agent, int timeout_sec,
46037d
+		           xmlNode **metadata)
46037d
 {
46037d
     xmlNode *xml = NULL;
46037d
     xmlNode *actions = NULL;
46037d
     xmlXPathObject *xpathObj = NULL;
46037d
-    pcmk__action_result_t *result = NULL;
46037d
-    stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0,
46037d
-                                                     5, NULL, NULL, NULL);
46037d
+    stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 
46037d
+                                                     0, timeout_sec, NULL,
46037d
+                                                     NULL, NULL);
46037d
     int rc = stonith__execute(action);
46037d
-    result = stonith__action_result(action);
46037d
+    pcmk__action_result_t *result = stonith__action_result(action);
46037d
 
46037d
     if (result == NULL) {
46037d
         if (rc < 0) {
46037d
@@ -208,21 +207,19 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata)
46037d
 }
46037d
 
46037d
 /*!
46037d
- * \brief Execute RHCS-compatible agent's meta-data action
46037d
- *
46037d
- * \param[in]  agent    Agent to execute
46037d
- * \param[in]  timeout  Action timeout
46037d
- * \param[out] output   Where to store action output (or NULL to ignore)
46037d
+ * \brief Retrieve metadata for RHCS-compatible fence agent
46037d
  *
46037d
- * \todo timeout is currently ignored; shouldn't we use it?
46037d
+ * \param[in]  agent        Agent to execute
46037d
+ * \param[in]  timeout_sec  Action timeout
46037d
+ * \param[out] output       Where to store action output (or NULL to ignore)
46037d
  */
46037d
 int
46037d
-stonith__rhcs_metadata(const char *agent, int timeout, char **output)
46037d
+stonith__rhcs_metadata(const char *agent, int timeout_sec, char **output)
46037d
 {
46037d
     char *buffer = NULL;
46037d
     xmlNode *xml = NULL;
46037d
 
46037d
-    int rc = stonith__rhcs_get_metadata(agent, timeout, &xml;;
46037d
+    int rc = stonith__rhcs_get_metadata(agent, timeout_sec, &xml;;
46037d
 
46037d
     if (rc != pcmk_ok) {
46037d
         free_xml(xml);
46037d
-- 
46037d
2.31.1
46037d
46037d
From 17dbf449d8b51ea27a89a13f47160a95b0a45149 Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Tue, 13 Sep 2022 14:32:44 -0500
46037d
Subject: [PATCH 05/24] Refactor: fencing: make stonith_action_t:async bool
46037d
46037d
---
46037d
 lib/fencing/st_actions.c | 5 +++--
46037d
 1 file changed, 3 insertions(+), 2 deletions(-)
46037d
46037d
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
46037d
index d16fa33..abd0d5a 100644
46037d
--- a/lib/fencing/st_actions.c
46037d
+++ b/lib/fencing/st_actions.c
46037d
@@ -9,6 +9,7 @@
46037d
 
46037d
 #include <crm_internal.h>
46037d
 
46037d
+#include <stdbool.h>
46037d
 #include <stdlib.h>
46037d
 #include <stdio.h>
46037d
 #include <string.h>
46037d
@@ -32,7 +33,7 @@ struct stonith_action_s {
46037d
     char *victim;
46037d
     GHashTable *args;
46037d
     int timeout;
46037d
-    int async;
46037d
+    bool async;
46037d
     void *userdata;
46037d
     void (*done_cb) (int pid, const pcmk__action_result_t *result,
46037d
                      void *user_data);
46037d
@@ -671,7 +672,7 @@ stonith_action_execute_async(stonith_action_t * action,
46037d
     action->userdata = userdata;
46037d
     action->done_cb = done;
46037d
     action->fork_cb = fork_cb;
46037d
-    action->async = 1;
46037d
+    action->async = true;
46037d
 
46037d
     return internal_stonith_action_execute(action);
46037d
 }
46037d
-- 
46037d
2.31.1
46037d
46037d
From 9b0f568dddc928104e6d2d54d5138e0c7ca5b537 Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Tue, 13 Sep 2022 14:59:28 -0500
46037d
Subject: [PATCH 06/24] Refactor: fencing: rename
46037d
 stonith_action_execute_async()
46037d
46037d
... to stonith__execute_async(), since it's internal
46037d
---
46037d
 daemons/fenced/fenced_commands.c |  4 ++--
46037d
 include/crm/fencing/internal.h   | 12 +++++-------
46037d
 lib/fencing/st_actions.c         | 11 +++++------
46037d
 3 files changed, 12 insertions(+), 15 deletions(-)
46037d
46037d
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
46037d
index 94aa6b8..41a1936 100644
46037d
--- a/daemons/fenced/fenced_commands.c
46037d
+++ b/daemons/fenced/fenced_commands.c
46037d
@@ -510,8 +510,8 @@ stonith_device_execute(stonith_device_t * device)
46037d
     /* for async exec, exec_rc is negative for early error exit
46037d
        otherwise handling of success/errors is done via callbacks */
46037d
     cmd->activating_on = device;
46037d
-    exec_rc = stonith_action_execute_async(action, (void *)cmd,
46037d
-                                           cmd->done_cb, fork_cb);
46037d
+    exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb,
46037d
+                                     fork_cb);
46037d
     if (exec_rc < 0) {
46037d
         cmd->activating_on = NULL;
46037d
         cmd->done_cb(0, stonith__action_result(action), cmd);
46037d
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
46037d
index e2ca85e..1797d9a 100644
46037d
--- a/include/crm/fencing/internal.h
46037d
+++ b/include/crm/fencing/internal.h
46037d
@@ -64,13 +64,11 @@ void stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result);
46037d
 void stonith__xe_get_result(xmlNode *xml, pcmk__action_result_t *result);
46037d
 xmlNode *stonith__find_xe_with_result(xmlNode *xml);
46037d
 
46037d
-int
46037d
-stonith_action_execute_async(stonith_action_t * action,
46037d
-                             void *userdata,
46037d
-                             void (*done) (int pid,
46037d
-                                           const pcmk__action_result_t *result,
46037d
-                                           void *user_data),
46037d
-                             void (*fork_cb) (int pid, void *user_data));
46037d
+int stonith__execute_async(stonith_action_t *action, void *userdata,
46037d
+                           void (*done) (int pid,
46037d
+                                         const pcmk__action_result_t *result,
46037d
+                                         void *user_data),
46037d
+                           void (*fork_cb) (int pid, void *user_data));
46037d
 
46037d
 xmlNode *create_level_registration_xml(const char *node, const char *pattern,
46037d
                                        const char *attr, const char *value,
46037d
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
46037d
index abd0d5a..c4e32bd 100644
46037d
--- a/lib/fencing/st_actions.c
46037d
+++ b/lib/fencing/st_actions.c
46037d
@@ -658,12 +658,11 @@ internal_stonith_action_execute(stonith_action_t * action)
46037d
  * \return pcmk_ok if ownership of action has been taken, -errno otherwise
46037d
  */
46037d
 int
46037d
-stonith_action_execute_async(stonith_action_t * action,
46037d
-                             void *userdata,
46037d
-                             void (*done) (int pid,
46037d
-                                           const pcmk__action_result_t *result,
46037d
-                                           void *user_data),
46037d
-                             void (*fork_cb) (int pid, void *user_data))
46037d
+stonith__execute_async(stonith_action_t * action, void *userdata,
46037d
+                       void (*done) (int pid,
46037d
+                                     const pcmk__action_result_t *result,
46037d
+                                     void *user_data),
46037d
+                       void (*fork_cb) (int pid, void *user_data))
46037d
 {
46037d
     if (!action) {
46037d
         return -EINVAL;
46037d
-- 
46037d
2.31.1
46037d
46037d
From 1d8fbd12b302b5029a341f269bd00def79e6a0ea Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Tue, 13 Sep 2022 16:43:57 -0500
46037d
Subject: [PATCH 07/24] Refactor: fencing: add internal API for getting
46037d
 metadata async
46037d
46037d
Nothing uses it yet
46037d
---
46037d
 include/crm/fencing/internal.h |  6 +++
46037d
 lib/fencing/st_client.c        | 80 ++++++++++++++++++++++++++++++++++
46037d
 2 files changed, 86 insertions(+)
46037d
46037d
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
46037d
index 1797d9a..513d1c4 100644
46037d
--- a/include/crm/fencing/internal.h
46037d
+++ b/include/crm/fencing/internal.h
46037d
@@ -70,6 +70,12 @@ int stonith__execute_async(stonith_action_t *action, void *userdata,
46037d
                                          void *user_data),
46037d
                            void (*fork_cb) (int pid, void *user_data));
46037d
 
46037d
+int stonith__metadata_async(const char *agent, int timeout_sec,
46037d
+                            void (*callback)(int pid,
46037d
+                                             const pcmk__action_result_t *result,
46037d
+                                             void *user_data),
46037d
+                            void *user_data);
46037d
+
46037d
 xmlNode *create_level_registration_xml(const char *node, const char *pattern,
46037d
                                        const char *attr, const char *value,
46037d
                                        int level,
46037d
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
46037d
index 6c252bc..91075bd 100644
46037d
--- a/lib/fencing/st_client.c
46037d
+++ b/lib/fencing/st_client.c
46037d
@@ -2386,6 +2386,86 @@ stonith__device_parameter_flags(uint32_t *device_flags, const char *device_name,
46037d
     freeXpathObject(xpath);
46037d
 }
46037d
 
46037d
+/*!
46037d
+ * \internal
46037d
+ * \brief Retrieve fence agent meta-data asynchronously
46037d
+ *
46037d
+ * \param[in] agent        Agent to execute
46037d
+ * \param[in] timeout_sec  Error if not complete within this time
46037d
+ * \param[in] callback     Function to call with result (this will always be
46037d
+ *                         called, whether by this function directly or later
46037d
+ *                         via the main loop, and on success the metadata will
46037d
+ *                         be in its result argument's action_stdout)
46037d
+ * \param[in] user_data    User data to pass to callback
46037d
+ *
46037d
+ * \return Standard Pacemaker return code
46037d
+ * \note The caller must use a main loop. This function is not a
46037d
+ *       stonith_api_operations_t method because it does not need a stonith_t
46037d
+ *       object and does not go through the fencer, but executes the agent
46037d
+ *       directly.
46037d
+ */
46037d
+int
46037d
+stonith__metadata_async(const char *agent, int timeout_sec,
46037d
+                        void (*callback)(int pid,
46037d
+                                         const pcmk__action_result_t *result,
46037d
+                                         void *user_data),
46037d
+                        void *user_data)
46037d
+{
46037d
+    switch (stonith_get_namespace(agent, NULL)) {
46037d
+        case st_namespace_rhcs:
46037d
+            {
46037d
+                stonith_action_t *action = NULL;
46037d
+                int rc = pcmk_ok;
46037d
+
46037d
+                action = stonith_action_create(agent, "metadata", NULL, 0,
46037d
+                                               timeout_sec, NULL, NULL, NULL);
46037d
+
46037d
+                rc = stonith__execute_async(action, user_data, callback, NULL);
46037d
+                if (rc != pcmk_ok) {
46037d
+                    callback(0, stonith__action_result(action), user_data);
46037d
+                    stonith__destroy_action(action);
46037d
+                }
46037d
+                return pcmk_legacy2rc(rc);
46037d
+            }
46037d
+
46037d
+#if HAVE_STONITH_STONITH_H
46037d
+        case st_namespace_lha:
46037d
+            // LHA metadata is simply synthesized, so simulate async
46037d
+            {
46037d
+                pcmk__action_result_t result = {
46037d
+                    .exit_status = CRM_EX_OK,
46037d
+                    .execution_status = PCMK_EXEC_DONE,
46037d
+                    .exit_reason = NULL,
46037d
+                    .action_stdout = NULL,
46037d
+                    .action_stderr = NULL,
46037d
+                };
46037d
+
46037d
+                stonith__lha_metadata(agent, timeout_sec,
46037d
+                                      &result.action_stdout);
46037d
+                callback(0, &result, user_data);
46037d
+                pcmk__reset_result(&result);
46037d
+                return pcmk_rc_ok;
46037d
+            }
46037d
+#endif
46037d
+
46037d
+        default:
46037d
+            {
46037d
+                pcmk__action_result_t result = {
46037d
+                    .exit_status = CRM_EX_ERROR,
46037d
+                    .execution_status = PCMK_EXEC_ERROR_HARD,
46037d
+                    .exit_reason = crm_strdup_printf("No such agent '%s'",
46037d
+                                                     agent),
46037d
+                    .action_stdout = NULL,
46037d
+                    .action_stderr = NULL,
46037d
+                };
46037d
+
46037d
+                callback(0, &result, user_data);
46037d
+                pcmk__reset_result(&result);
46037d
+                return ENOENT;
46037d
+            }
46037d
+    }
46037d
+}
46037d
+
46037d
 /*!
46037d
  * \internal
46037d
  * \brief Return the exit status from an async action callback
46037d
-- 
46037d
2.31.1
46037d
46037d
From 1869cc181ef9599bd938fc545d302b2721169755 Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Tue, 13 Sep 2022 17:33:10 -0500
46037d
Subject: [PATCH 08/24] Refactor: liblrmd: add internal API for getting
46037d
 metadata async
46037d
46037d
Nothing uses it yet
46037d
---
46037d
 include/crm/lrmd_internal.h |  10 +++-
46037d
 lib/lrmd/lrmd_client.c      | 115 ++++++++++++++++++++++++++++++++++++
46037d
 2 files changed, 123 insertions(+), 2 deletions(-)
46037d
46037d
diff --git a/include/crm/lrmd_internal.h b/include/crm/lrmd_internal.h
46037d
index 284c4d6..5cb00d5 100644
46037d
--- a/include/crm/lrmd_internal.h
46037d
+++ b/include/crm/lrmd_internal.h
46037d
@@ -1,5 +1,5 @@
46037d
 /*
46037d
- * Copyright 2015-2021 the Pacemaker project contributors
46037d
+ * Copyright 2015-2022 the Pacemaker project contributors
46037d
  *
46037d
  * The version control history for this file may have further details.
46037d
  *
46037d
@@ -17,7 +17,7 @@
46037d
 #include <crm/common/mainloop.h>        // mainloop_io_t, ipc_client_callbacks
46037d
 #include <crm/common/output_internal.h> // pcmk__output_t
46037d
 #include <crm/common/remote_internal.h> // pcmk__remote_t
46037d
-#include <crm/lrmd.h>                   // lrmd_t, lrmd_event_data_t
46037d
+#include <crm/lrmd.h>           // lrmd_t, lrmd_event_data_t, lrmd_rsc_info_t
46037d
 
46037d
 int lrmd__new(lrmd_t **api, const char *nodename, const char *server, int port);
46037d
 
46037d
@@ -35,6 +35,12 @@ int lrmd_send_resource_alert(lrmd_t *lrmd, GList *alert_list,
46037d
 int lrmd__remote_send_xml(pcmk__remote_t *session, xmlNode *msg, uint32_t id,
46037d
                           const char *msg_type);
46037d
 
46037d
+int lrmd__metadata_async(lrmd_rsc_info_t *rsc,
46037d
+                         void (*callback)(int pid,
46037d
+                                          const pcmk__action_result_t *result,
46037d
+                                          void *user_data),
46037d
+                         void *user_data);
46037d
+
46037d
 void lrmd__set_result(lrmd_event_data_t *event, enum ocf_exitcode rc,
46037d
                       int op_status, const char *exit_reason);
46037d
 
46037d
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
46037d
index 82afd6c..4b16bf0 100644
46037d
--- a/lib/lrmd/lrmd_client.c
46037d
+++ b/lib/lrmd/lrmd_client.c
46037d
@@ -2343,6 +2343,121 @@ lrmd_api_delete(lrmd_t * lrmd)
46037d
     free(lrmd);
46037d
 }
46037d
 
46037d
+struct metadata_cb {
46037d
+     void (*callback)(int pid, const pcmk__action_result_t *result,
46037d
+                      void *user_data);
46037d
+     void *user_data;
46037d
+};
46037d
+
46037d
+/*!
46037d
+ * \internal
46037d
+ * \brief Process asynchronous metadata completion
46037d
+ *
46037d
+ * \param[in] action  Metadata action that completed
46037d
+ */
46037d
+static void
46037d
+metadata_complete(svc_action_t *action)
46037d
+{
46037d
+    struct metadata_cb *metadata_cb = (struct metadata_cb *) action->cb_data;
46037d
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
46037d
+
46037d
+    pcmk__set_result(&result, action->rc, action->status,
46037d
+                     services__exit_reason(action));
46037d
+    pcmk__set_result_output(&result, action->stdout_data, action->stderr_data);
46037d
+
46037d
+    metadata_cb->callback(0, &result, metadata_cb->user_data);
46037d
+    result.action_stdout = NULL; // Prevent free, because action owns it
46037d
+    result.action_stderr = NULL; // Prevent free, because action owns it
46037d
+    pcmk__reset_result(&result);
46037d
+    free(metadata_cb);
46037d
+}
46037d
+
46037d
+/*!
46037d
+ * \internal
46037d
+ * \brief Retrieve agent metadata asynchronously
46037d
+ *
46037d
+ * \param[in] rsc        Resource agent specification
46037d
+ * \param[in] callback   Function to call with result (this will always be
46037d
+ *                       called, whether by this function directly or later via
46037d
+ *                       the main loop, and on success the metadata will be in
46037d
+ *                       its result argument's action_stdout)
46037d
+ * \param[in] user_data  User data to pass to callback
46037d
+ *
46037d
+ * \return Standard Pacemaker return code
46037d
+ * \note This function is not a lrmd_api_operations_t method because it does not
46037d
+ *       need an lrmd_t object and does not go through the executor, but
46037d
+ *       executes the agent directly.
46037d
+ */
46037d
+int
46037d
+lrmd__metadata_async(lrmd_rsc_info_t *rsc,
46037d
+                     void (*callback)(int pid,
46037d
+                                      const pcmk__action_result_t *result,
46037d
+                                      void *user_data),
46037d
+                     void *user_data)
46037d
+{
46037d
+    svc_action_t *action = NULL;
46037d
+    struct metadata_cb *metadata_cb = NULL;
46037d
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
46037d
+
46037d
+    CRM_CHECK(callback != NULL, return EINVAL);
46037d
+
46037d
+    if ((rsc == NULL) || (rsc->standard == NULL) || (rsc->type == NULL)) {
46037d
+        pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR,
46037d
+                         "Invalid resource specification");
46037d
+        callback(0, &result, user_data);
46037d
+        pcmk__reset_result(&result);
46037d
+        return EINVAL;
46037d
+    }
46037d
+
46037d
+    if (strcmp(rsc->standard, PCMK_RESOURCE_CLASS_STONITH) == 0) {
46037d
+        return stonith__metadata_async(rsc->type,
46037d
+                                       CRMD_METADATA_CALL_TIMEOUT / 1000,
46037d
+                                       callback, user_data);
46037d
+    }
46037d
+
46037d
+    action = services__create_resource_action(rsc->type, rsc->standard,
46037d
+                                              rsc->provider, rsc->type,
46037d
+                                              CRMD_ACTION_METADATA, 0,
46037d
+                                              CRMD_METADATA_CALL_TIMEOUT, NULL,
46037d
+                                              0);
46037d
+    if (action == NULL) {
46037d
+        pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
46037d
+                         "Out of memory");
46037d
+        callback(0, &result, user_data);
46037d
+        pcmk__reset_result(&result);
46037d
+        return ENOMEM;
46037d
+    }
46037d
+    if (action->rc != PCMK_OCF_UNKNOWN) {
46037d
+        pcmk__set_result(&result, action->rc, action->status,
46037d
+                         services__exit_reason(action));
46037d
+        callback(0, &result, user_data);
46037d
+        pcmk__reset_result(&result);
46037d
+        services_action_free(action);
46037d
+        return EINVAL;
46037d
+    }
46037d
+
46037d
+    action->cb_data = calloc(1, sizeof(struct metadata_cb));
46037d
+    if (action->cb_data == NULL) {
46037d
+        services_action_free(action);
46037d
+        pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
46037d
+                         "Out of memory");
46037d
+        callback(0, &result, user_data);
46037d
+        pcmk__reset_result(&result);
46037d
+        return ENOMEM;
46037d
+    }
46037d
+
46037d
+    metadata_cb = (struct metadata_cb *) action->cb_data;
46037d
+    metadata_cb->callback = callback;
46037d
+    metadata_cb->user_data = user_data;
46037d
+    if (!services_action_async(action, metadata_complete)) {
46037d
+        services_action_free(action);
46037d
+        return pcmk_rc_error; // @TODO Derive from action->rc and ->status
46037d
+    }
46037d
+
46037d
+    // The services library has taken responsibility for action
46037d
+    return pcmk_rc_ok;
46037d
+}
46037d
+
46037d
 /*!
46037d
  * \internal
46037d
  * \brief Set the result of an executor event
46037d
-- 
46037d
2.31.1
46037d
46037d
From de89164053cde8f44ca74a007703e0827ffd67ec Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Wed, 14 Sep 2022 16:34:37 -0500
46037d
Subject: [PATCH 09/24] Low: controller: ignore CRM_OP_LRM_REFRESH
46037d
46037d
This was only sent by crm_resource --refresh in versions 1.1.9 and earlier.
46037d
Since the local crm_resource is the same version as the controller, and
46037d
Pacemaker Remote was introduced in 1.1.9, this means that only remote nodes
46037d
running 1.1.9 can possibly send it.
46037d
46037d
It didn't really do anything useful anyway, so just ignore it.
46037d
---
46037d
 daemons/controld/controld_execd.c    | 33 +++++-----------------------
46037d
 daemons/controld/controld_messages.c |  2 +-
46037d
 include/crm/crm.h                    |  2 +-
46037d
 lib/pacemaker/pcmk_graph_producer.c  |  3 +--
46037d
 lib/pengine/common.c                 |  2 --
46037d
 5 files changed, 9 insertions(+), 33 deletions(-)
46037d
46037d
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
46037d
index fa411a6..719fab0 100644
46037d
--- a/daemons/controld/controld_execd.c
46037d
+++ b/daemons/controld/controld_execd.c
46037d
@@ -1553,32 +1553,6 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
46037d
     lrmd_free_event(op);
46037d
 }
46037d
 
46037d
-static void
46037d
-handle_refresh_op(lrm_state_t *lrm_state, const char *user_name,
46037d
-                  const char *from_host, const char *from_sys)
46037d
-{
46037d
-    int rc = pcmk_ok;
46037d
-    xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all);
46037d
-
46037d
-    fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name);
46037d
-    crm_info("Forced a local resource history refresh: call=%d", rc);
46037d
-
46037d
-    if (!pcmk__str_eq(CRM_SYSTEM_CRMD, from_sys, pcmk__str_casei)) {
46037d
-        xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, fragment, from_host,
46037d
-                                        from_sys, CRM_SYSTEM_LRMD,
46037d
-                                        fsa_our_uuid);
46037d
-
46037d
-        crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host);
46037d
-
46037d
-        if (relay_message(reply, TRUE) == FALSE) {
46037d
-            crm_log_xml_err(reply, "Unable to route reply");
46037d
-        }
46037d
-        free_xml(reply);
46037d
-    }
46037d
-
46037d
-    free_xml(fragment);
46037d
-}
46037d
-
46037d
 static void
46037d
 handle_query_op(xmlNode *msg, lrm_state_t *lrm_state)
46037d
 {
46037d
@@ -1787,7 +1761,12 @@ do_lrm_invoke(long long action,
46037d
     }
46037d
 
46037d
     if (pcmk__str_eq(crm_op, CRM_OP_LRM_REFRESH, pcmk__str_casei)) {
46037d
-        handle_refresh_op(lrm_state, user_name, from_host, from_sys);
46037d
+        /* @COMPAT This can only be sent by crm_resource --refresh on a
46037d
+         * Pacemaker Remote node running Pacemaker 1.1.9, which is extremely
46037d
+         * unlikely. It previously would cause the controller to re-write its
46037d
+         * resource history to the CIB. Just ignore it.
46037d
+         */
46037d
+        crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node");
46037d
 
46037d
     } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_QUERY, pcmk__str_casei)) {
46037d
         handle_query_op(input->msg, lrm_state);
46037d
diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
46037d
index 31d3524..957fc20 100644
46037d
--- a/daemons/controld/controld_messages.c
46037d
+++ b/daemons/controld/controld_messages.c
46037d
@@ -1061,7 +1061,7 @@ handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause)
46037d
         return handle_lrm_delete(stored_msg);
46037d
 
46037d
     } else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0)
46037d
-               || (strcmp(op, CRM_OP_LRM_REFRESH) == 0)
46037d
+               || (strcmp(op, CRM_OP_LRM_REFRESH) == 0) // @COMPAT
46037d
                || (strcmp(op, CRM_OP_REPROBE) == 0)) {
46037d
 
46037d
         crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
46037d
diff --git a/include/crm/crm.h b/include/crm/crm.h
46037d
index 5ec66d2..f2e536e 100644
46037d
--- a/include/crm/crm.h
46037d
+++ b/include/crm/crm.h
46037d
@@ -146,7 +146,7 @@ extern char *crm_system_name;
46037d
 #  define CRM_OP_REGISTER		"register"
46037d
 #  define CRM_OP_IPC_FWD		"ipc_fwd"
46037d
 #  define CRM_OP_INVOKE_LRM	"lrm_invoke"
46037d
-#  define CRM_OP_LRM_REFRESH	"lrm_refresh" /* Deprecated */
46037d
+#  define CRM_OP_LRM_REFRESH "lrm_refresh" //!< Deprecated since 1.1.10
46037d
 #  define CRM_OP_LRM_QUERY	"lrm_query"
46037d
 #  define CRM_OP_LRM_DELETE	"lrm_delete"
46037d
 #  define CRM_OP_LRM_FAIL		"lrm_fail"
46037d
diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c
46037d
index 4c1b5a6..0077719 100644
46037d
--- a/lib/pacemaker/pcmk_graph_producer.c
46037d
+++ b/lib/pacemaker/pcmk_graph_producer.c
46037d
@@ -446,8 +446,7 @@ create_graph_action(xmlNode *parent, pe_action_t *action, bool skip_details,
46037d
 
46037d
     } else if (pcmk__str_any_of(action->task,
46037d
                                 CRM_OP_SHUTDOWN,
46037d
-                                CRM_OP_CLEAR_FAILCOUNT,
46037d
-                                CRM_OP_LRM_REFRESH, NULL)) {
46037d
+                                CRM_OP_CLEAR_FAILCOUNT, NULL)) {
46037d
         action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT);
46037d
 
46037d
     } else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_none)) {
46037d
diff --git a/lib/pengine/common.c b/lib/pengine/common.c
46037d
index 93ba3fe..7db9d0e 100644
46037d
--- a/lib/pengine/common.c
46037d
+++ b/lib/pengine/common.c
46037d
@@ -384,8 +384,6 @@ text2task(const char *task)
46037d
         return no_action;
46037d
     } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
46037d
         return no_action;
46037d
-    } else if (pcmk__str_eq(task, CRM_OP_LRM_REFRESH, pcmk__str_casei)) {
46037d
-        return no_action;
46037d
     } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) {
46037d
         return no_action;
46037d
     } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) {
46037d
-- 
46037d
2.31.1
46037d
46037d
From 406fbc52ed652915887e78138f8f3c2eeaeabfb6 Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Wed, 14 Sep 2022 16:46:15 -0500
46037d
Subject: [PATCH 10/24] API: libcrmcommon: deprecate CRM_OP_LRM_QUERY
46037d
46037d
This has been unused since at least Pacemaker 1.0.0, and since we don't support
46037d
rolling upgrades from anything that old, and Pacemaker Remote didn't exist
46037d
then, we can just drop support for it entirely.
46037d
---
46037d
 daemons/controld/controld_execd.c | 17 -----------------
46037d
 include/crm/crm.h                 |  1 -
46037d
 include/crm/crm_compat.h          |  5 ++++-
46037d
 3 files changed, 4 insertions(+), 19 deletions(-)
46037d
46037d
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
46037d
index 719fab0..54e6818 100644
46037d
--- a/daemons/controld/controld_execd.c
46037d
+++ b/daemons/controld/controld_execd.c
46037d
@@ -1553,20 +1553,6 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
46037d
     lrmd_free_event(op);
46037d
 }
46037d
 
46037d
-static void
46037d
-handle_query_op(xmlNode *msg, lrm_state_t *lrm_state)
46037d
-{
46037d
-    xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all);
46037d
-    xmlNode *reply = create_reply(msg, data);
46037d
-
46037d
-    if (relay_message(reply, TRUE) == FALSE) {
46037d
-        crm_err("Unable to route reply");
46037d
-        crm_log_xml_err(reply, "reply");
46037d
-    }
46037d
-    free_xml(reply);
46037d
-    free_xml(data);
46037d
-}
46037d
-
46037d
 static void
46037d
 handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys,
46037d
                   const char *from_host, const char *user_name,
46037d
@@ -1768,9 +1754,6 @@ do_lrm_invoke(long long action,
46037d
          */
46037d
         crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node");
46037d
 
46037d
-    } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_QUERY, pcmk__str_casei)) {
46037d
-        handle_query_op(input->msg, lrm_state);
46037d
-
46037d
     // @COMPAT DCs <1.1.14 in a rolling upgrade might schedule this op
46037d
     } else if (pcmk__str_eq(operation, CRM_OP_PROBED, pcmk__str_casei)) {
46037d
         update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE,
46037d
diff --git a/include/crm/crm.h b/include/crm/crm.h
46037d
index f2e536e..38915e3 100644
46037d
--- a/include/crm/crm.h
46037d
+++ b/include/crm/crm.h
46037d
@@ -147,7 +147,6 @@ extern char *crm_system_name;
46037d
 #  define CRM_OP_IPC_FWD		"ipc_fwd"
46037d
 #  define CRM_OP_INVOKE_LRM	"lrm_invoke"
46037d
 #  define CRM_OP_LRM_REFRESH "lrm_refresh" //!< Deprecated since 1.1.10
46037d
-#  define CRM_OP_LRM_QUERY	"lrm_query"
46037d
 #  define CRM_OP_LRM_DELETE	"lrm_delete"
46037d
 #  define CRM_OP_LRM_FAIL		"lrm_fail"
46037d
 #  define CRM_OP_PROBED		"probe_complete"
46037d
diff --git a/include/crm/crm_compat.h b/include/crm/crm_compat.h
46037d
index 3b35a5e..8a4b368 100644
46037d
--- a/include/crm/crm_compat.h
46037d
+++ b/include/crm/crm_compat.h
46037d
@@ -1,5 +1,5 @@
46037d
 /*
46037d
- * Copyright 2004-2021 the Pacemaker project contributors
46037d
+ * Copyright 2004-2022 the Pacemaker project contributors
46037d
  *
46037d
  * The version control history for this file may have further details.
46037d
  *
46037d
@@ -31,6 +31,9 @@ extern "C" {
46037d
 //! \deprecated This defined constant will be removed in a future release
46037d
 #define MAX_IPC_DELAY 120
46037d
 
46037d
+//! \deprecated This defined constant will be removed in a future release
46037d
+#define CRM_OP_LRM_QUERY "lrm_query"
46037d
+
46037d
 //!@{
46037d
 //! \deprecated This macro will be removed in a future release
46037d
 
46037d
-- 
46037d
2.31.1
46037d
46037d
From 7c3d2f58d387d2ec0d5c5d340f8816f324e816bf Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Wed, 14 Sep 2022 16:49:48 -0500
46037d
Subject: [PATCH 11/24] Refactor: controller: drop do_lrm_query_internal()
46037d
46037d
Now that there's only one (short) caller, just move its contents there
46037d
---
46037d
 daemons/controld/controld_execd.c | 28 +++++++++++-----------------
46037d
 1 file changed, 11 insertions(+), 17 deletions(-)
46037d
46037d
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
46037d
index 54e6818..99c9193 100644
46037d
--- a/daemons/controld/controld_execd.c
46037d
+++ b/daemons/controld/controld_execd.c
46037d
@@ -811,19 +811,26 @@ build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
46037d
     return FALSE;
46037d
 }
46037d
 
46037d
-static xmlNode *
46037d
-do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags)
46037d
+xmlNode *
46037d
+controld_query_executor_state(const char *node_name)
46037d
 {
46037d
     xmlNode *xml_state = NULL;
46037d
     xmlNode *xml_data = NULL;
46037d
     xmlNode *rsc_list = NULL;
46037d
     crm_node_t *peer = NULL;
46037d
+    lrm_state_t *lrm_state = lrm_state_find(node_name);
46037d
+
46037d
+    if (!lrm_state) {
46037d
+        crm_err("Could not find executor state for node %s", node_name);
46037d
+        return NULL;
46037d
+    }
46037d
 
46037d
     peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY);
46037d
     CRM_CHECK(peer != NULL, return NULL);
46037d
 
46037d
-    xml_state = create_node_state_update(peer, update_flags, NULL,
46037d
-                                         __func__);
46037d
+    xml_state = create_node_state_update(peer,
46037d
+                                         node_update_cluster|node_update_peer,
46037d
+                                         NULL, __func__);
46037d
     if (xml_state == NULL) {
46037d
         return NULL;
46037d
     }
46037d
@@ -840,19 +847,6 @@ do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags)
46037d
     return xml_state;
46037d
 }
46037d
 
46037d
-xmlNode *
46037d
-controld_query_executor_state(const char *node_name)
46037d
-{
46037d
-    lrm_state_t *lrm_state = lrm_state_find(node_name);
46037d
-
46037d
-    if (!lrm_state) {
46037d
-        crm_err("Could not find executor state for node %s", node_name);
46037d
-        return NULL;
46037d
-    }
46037d
-    return do_lrm_query_internal(lrm_state,
46037d
-                                 node_update_cluster|node_update_peer);
46037d
-}
46037d
-
46037d
 /*!
46037d
  * \internal
46037d
  * \brief Map standard Pacemaker return code to operation status and OCF code
46037d
-- 
46037d
2.31.1
46037d
46037d
From 5cab259417a06f64a607f99c478459093ed1b5ed Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Wed, 14 Sep 2022 15:48:44 -0500
46037d
Subject: [PATCH 12/24] Doc: controller: drop pointless comment
46037d
46037d
It's (likely?) impossible for a live cluster to have been doing rolling
46037d
upgrades since 2006.
46037d
---
46037d
 daemons/controld/controld_execd.c | 12 +-----------
46037d
 1 file changed, 1 insertion(+), 11 deletions(-)
46037d
46037d
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
46037d
index 99c9193..53b1156 100644
46037d
--- a/daemons/controld/controld_execd.c
46037d
+++ b/daemons/controld/controld_execd.c
46037d
@@ -678,18 +678,8 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_
46037d
 
46037d
     target_rc = rsc_op_expected_rc(op);
46037d
 
46037d
-    /* there is a small risk in formerly mixed clusters that it will
46037d
-     * be sub-optimal.
46037d
-     *
46037d
-     * however with our upgrade policy, the update we send should
46037d
-     * still be completely supported anyway
46037d
-     */
46037d
     caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION);
46037d
-    CRM_LOG_ASSERT(caller_version != NULL);
46037d
-
46037d
-    if(caller_version == NULL) {
46037d
-        caller_version = CRM_FEATURE_SET;
46037d
-    }
46037d
+    CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET);
46037d
 
46037d
     xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc,
46037d
                                       fsa_our_uname, src);
46037d
-- 
46037d
2.31.1
46037d
46037d
From b4541d7ecd9551674c4546415751a223ff3013ed Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Thu, 15 Sep 2022 11:24:28 -0500
46037d
Subject: [PATCH 13/24] Refactor: controller: move where reload actions get
46037d
 remapped
46037d
46037d
... from do_lrm_invoke() to do_lrm_rsc_op(), which will make planned changes
46037d
easier
46037d
---
46037d
 daemons/controld/controld_execd.c | 38 ++++++++++++++++---------------
46037d
 1 file changed, 20 insertions(+), 18 deletions(-)
46037d
46037d
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
46037d
index 53b1156..c9f0cc7 100644
46037d
--- a/daemons/controld/controld_execd.c
46037d
+++ b/daemons/controld/controld_execd.c
46037d
@@ -43,7 +43,8 @@ static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer us
46037d
 static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op,
46037d
                                        const char *rsc_id, const char *operation);
46037d
 static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
46037d
-                          const char *operation, xmlNode *msg);
46037d
+                          const char *operation, xmlNode *msg,
46037d
+                          struct ra_metadata_s *md);
46037d
 
46037d
 static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
46037d
                                          int log_level);
46037d
@@ -1808,26 +1809,12 @@ do_lrm_invoke(long long action,
46037d
             do_lrm_delete(input, lrm_state, rsc, from_sys, from_host,
46037d
                           crm_rsc_delete, user_name);
46037d
 
46037d
-        } else if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD,
46037d
-                                    CRMD_ACTION_RELOAD_AGENT, NULL)) {
46037d
-            /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
46037d
-             * will schedule reload-agent actions only. In either case, we need
46037d
-             * to map that to whatever the resource agent actually supports.
46037d
-             * Default to the OCF 1.1 name.
46037d
-             */
46037d
+        } else {
46037d
             struct ra_metadata_s *md = NULL;
46037d
-            const char *reload_name = CRMD_ACTION_RELOAD_AGENT;
46037d
 
46037d
             md = controld_get_rsc_metadata(lrm_state, rsc,
46037d
                                            controld_metadata_from_cache);
46037d
-            if ((md != NULL)
46037d
-                && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) {
46037d
-                reload_name = CRMD_ACTION_RELOAD;
46037d
-            }
46037d
-            do_lrm_rsc_op(lrm_state, rsc, reload_name, input->xml);
46037d
-
46037d
-        } else {
46037d
-            do_lrm_rsc_op(lrm_state, rsc, operation, input->xml);
46037d
+            do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, md);
46037d
         }
46037d
 
46037d
         lrmd_free_rsc_info(rsc);
46037d
@@ -2176,7 +2163,7 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t
46037d
 
46037d
 static void
46037d
 do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
46037d
-              const char *operation, xmlNode *msg)
46037d
+              const char *operation, xmlNode *msg, struct ra_metadata_s *md)
46037d
 {
46037d
     int rc;
46037d
     int call_id = 0;
46037d
@@ -2198,6 +2185,21 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
46037d
         }
46037d
     }
46037d
 
46037d
+    if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD,
46037d
+                         CRMD_ACTION_RELOAD_AGENT, NULL)) {
46037d
+        /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
46037d
+         * will schedule reload-agent actions only. In either case, we need
46037d
+         * to map that to whatever the resource agent actually supports.
46037d
+         * Default to the OCF 1.1 name.
46037d
+         */
46037d
+        if ((md != NULL)
46037d
+            && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) {
46037d
+            operation = CRMD_ACTION_RELOAD;
46037d
+        } else {
46037d
+            operation = CRMD_ACTION_RELOAD_AGENT;
46037d
+        }
46037d
+    }
46037d
+
46037d
     op = construct_op(lrm_state, msg, rsc->id, operation);
46037d
     CRM_CHECK(op != NULL, return);
46037d
 
46037d
-- 
46037d
2.31.1
46037d
46037d
From a4f6e394a61712da750aabffca2b6dd02f0c5ae6 Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Thu, 15 Sep 2022 15:12:06 -0500
46037d
Subject: [PATCH 14/24] Refactor: controller: drop operation argument to
46037d
 do_lrm_rsc_op()
46037d
46037d
It can be derived from the XML argument
46037d
---
46037d
 daemons/controld/controld_execd.c | 26 +++++++++++++-------------
46037d
 1 file changed, 13 insertions(+), 13 deletions(-)
46037d
46037d
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
46037d
index c9f0cc7..89a993b 100644
46037d
--- a/daemons/controld/controld_execd.c
46037d
+++ b/daemons/controld/controld_execd.c
46037d
@@ -43,8 +43,7 @@ static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer us
46037d
 static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op,
46037d
                                        const char *rsc_id, const char *operation);
46037d
 static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
46037d
-                          const char *operation, xmlNode *msg,
46037d
-                          struct ra_metadata_s *md);
46037d
+                          xmlNode *msg, struct ra_metadata_s *md);
46037d
 
46037d
 static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
46037d
                                          int log_level);
46037d
@@ -1814,7 +1813,7 @@ do_lrm_invoke(long long action,
46037d
 
46037d
             md = controld_get_rsc_metadata(lrm_state, rsc,
46037d
                                            controld_metadata_from_cache);
46037d
-            do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, md);
46037d
+            do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
46037d
         }
46037d
 
46037d
         lrmd_free_rsc_info(rsc);
46037d
@@ -2162,8 +2161,8 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t
46037d
 }
46037d
 
46037d
 static void
46037d
-do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
46037d
-              const char *operation, xmlNode *msg, struct ra_metadata_s *md)
46037d
+do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg,
46037d
+              struct ra_metadata_s *md)
46037d
 {
46037d
     int rc;
46037d
     int call_id = 0;
46037d
@@ -2172,17 +2171,18 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
46037d
     lrmd_key_value_t *params = NULL;
46037d
     fsa_data_t *msg_data = NULL;
46037d
     const char *transition = NULL;
46037d
+    const char *operation = NULL;
46037d
     gboolean stop_recurring = FALSE;
46037d
     const char *nack_reason = NULL;
46037d
 
46037d
-    CRM_CHECK(rsc != NULL, return);
46037d
-    CRM_CHECK(operation != NULL, return);
46037d
+    CRM_CHECK((rsc != NULL) && (msg != NULL), return);
46037d
 
46037d
-    if (msg != NULL) {
46037d
-        transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
46037d
-        if (transition == NULL) {
46037d
-            crm_log_xml_err(msg, "Missing transition number");
46037d
-        }
46037d
+    operation = crm_element_value(msg, XML_LRM_ATTR_TASK);
46037d
+    CRM_CHECK(!pcmk__str_empty(operation), return);
46037d
+
46037d
+    transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
46037d
+    if (pcmk__str_empty(transition)) {
46037d
+        crm_log_xml_err(msg, "Missing transition number");
46037d
     }
46037d
 
46037d
     if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD,
46037d
@@ -2241,7 +2241,7 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
46037d
     crm_notice("Requesting local execution of %s operation for %s on %s "
46037d
                CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT,
46037d
                crm_action_str(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name,
46037d
-               transition, rsc->id, operation, op->interval_ms);
46037d
+               (transition != NULL ? transition : ""), rsc->id, operation, op->interval_ms);
46037d
 
46037d
     if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)
46037d
         && pcmk__str_eq(operation, RSC_START, pcmk__str_casei)) {
46037d
-- 
46037d
2.31.1
46037d
46037d
From 486dbdf023f82a82a02207d8fb7921f8f2ac0588 Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Thu, 15 Sep 2022 15:40:38 -0500
46037d
Subject: [PATCH 15/24] Low: controller: add failsafe for no executor
46037d
 connection
46037d
46037d
... in do_lrm_rsc_op(), to make planned changes easier
46037d
---
46037d
 daemons/controld/controld_execd.c | 11 +++++++++++
46037d
 1 file changed, 11 insertions(+)
46037d
46037d
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
46037d
index 89a993b..8986b9b 100644
46037d
--- a/daemons/controld/controld_execd.c
46037d
+++ b/daemons/controld/controld_execd.c
46037d
@@ -2185,6 +2185,17 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg,
46037d
         crm_log_xml_err(msg, "Missing transition number");
46037d
     }
46037d
 
46037d
+    if (lrm_state == NULL) {
46037d
+        // This shouldn't be possible, but provide a failsafe just in case
46037d
+        crm_err("Cannot execute %s of %s: No executor connection "
46037d
+                CRM_XS " transition_key=%s",
46037d
+                operation, rsc->id, (transition != NULL ? transition : ""));
46037d
+        synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID,
46037d
+                                PCMK_OCF_UNKNOWN_ERROR,
46037d
+                                "No executor connection");
46037d
+        return;
46037d
+    }
46037d
+
46037d
     if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD,
46037d
                          CRMD_ACTION_RELOAD_AGENT, NULL)) {
46037d
         /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
46037d
-- 
46037d
2.31.1
46037d
46037d
From afd53bba7dfb5109d844318dff0f82e4687d9e32 Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Thu, 15 Sep 2022 12:04:31 -0500
46037d
Subject: [PATCH 16/24] Log: controller: improve messages when metadata cache
46037d
 update fails
46037d
46037d
Previously, metadata_cache_update() or ra_param_from_xml() would log an error,
46037d
then controld_get_rsc_metadata() (but not the other caller,
46037d
process_lrm_event()) would log another warning with the agent info.
46037d
46037d
Combine these into a single message always logged by metadata_cache_update(),
46037d
which also has been renamed to controld_cache_metadata().
46037d
---
46037d
 daemons/controld/controld_execd.c    |  2 +-
46037d
 daemons/controld/controld_metadata.c | 27 ++++++++++++---------------
46037d
 daemons/controld/controld_metadata.h |  6 +++---
46037d
 3 files changed, 16 insertions(+), 19 deletions(-)
46037d
46037d
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
46037d
index 8986b9b..fe16c96 100644
46037d
--- a/daemons/controld/controld_execd.c
46037d
+++ b/daemons/controld/controld_execd.c
46037d
@@ -2858,7 +2858,7 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
46037d
         } else if (rsc && (op->rc == PCMK_OCF_OK)) {
46037d
             char *metadata = unescape_newlines(op->output);
46037d
 
46037d
-            metadata_cache_update(lrm_state->metadata_cache, rsc, metadata);
46037d
+            controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata);
46037d
             free(metadata);
46037d
         }
46037d
     }
46037d
diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c
46037d
index 8c6f195..91a6a10 100644
46037d
--- a/daemons/controld/controld_metadata.c
46037d
+++ b/daemons/controld/controld_metadata.c
46037d
@@ -149,13 +149,11 @@ ra_param_from_xml(xmlNode *param_xml)
46037d
 
46037d
     p = calloc(1, sizeof(struct ra_param_s));
46037d
     if (p == NULL) {
46037d
-        crm_crit("Could not allocate memory for resource metadata");
46037d
         return NULL;
46037d
     }
46037d
 
46037d
     p->rap_name = strdup(param_name);
46037d
     if (p->rap_name == NULL) {
46037d
-        crm_crit("Could not allocate memory for resource metadata");
46037d
         free(p);
46037d
         return NULL;
46037d
     }
46037d
@@ -196,10 +194,11 @@ log_ra_ocf_version(const char *ra_key, const char *ra_ocf_version)
46037d
 }
46037d
 
46037d
 struct ra_metadata_s *
46037d
-metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc,
46037d
-                      const char *metadata_str)
46037d
+controld_cache_metadata(GHashTable *mdc, lrmd_rsc_info_t *rsc,
46037d
+                        const char *metadata_str)
46037d
 {
46037d
     char *key = NULL;
46037d
+    const char *reason = NULL;
46037d
     xmlNode *metadata = NULL;
46037d
     xmlNode *match = NULL;
46037d
     struct ra_metadata_s *md = NULL;
46037d
@@ -210,20 +209,19 @@ metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc,
46037d
 
46037d
     key = crm_generate_ra_key(rsc->standard, rsc->provider, rsc->type);
46037d
     if (!key) {
46037d
-        crm_crit("Could not allocate memory for resource metadata");
46037d
+        reason = "Invalid resource agent standard or type";
46037d
         goto err;
46037d
     }
46037d
 
46037d
     metadata = string2xml(metadata_str);
46037d
     if (!metadata) {
46037d
-        crm_err("Metadata for %s:%s:%s is not valid XML",
46037d
-                rsc->standard, rsc->provider, rsc->type);
46037d
+        reason = "Metadata is not valid XML";
46037d
         goto err;
46037d
     }
46037d
 
46037d
     md = calloc(1, sizeof(struct ra_metadata_s));
46037d
     if (md == NULL) {
46037d
-        crm_crit("Could not allocate memory for resource metadata");
46037d
+        reason = "Could not allocate memory";
46037d
         goto err;
46037d
     }
46037d
 
46037d
@@ -281,6 +279,7 @@ metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc,
46037d
             struct ra_param_s *p = ra_param_from_xml(match);
46037d
 
46037d
             if (p == NULL) {
46037d
+                reason = "Could not allocate memory";
46037d
                 goto err;
46037d
             }
46037d
             if (pcmk_is_set(p->rap_flags, ra_param_private)) {
46037d
@@ -311,6 +310,9 @@ metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc,
46037d
     return md;
46037d
 
46037d
 err:
46037d
+    crm_warn("Unable to update metadata for %s (%s%s%s:%s): %s",
46037d
+             rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"),
46037d
+             (rsc->provider != NULL ? rsc->provider : ""), rsc->type, reason);
46037d
     free(key);
46037d
     free_xml(metadata);
46037d
     metadata_free(md);
46037d
@@ -377,13 +379,8 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
46037d
         return NULL;
46037d
     }
46037d
 
46037d
-    metadata = metadata_cache_update(lrm_state->metadata_cache, rsc,
46037d
-                                     metadata_str);
46037d
+    metadata = controld_cache_metadata(lrm_state->metadata_cache, rsc,
46037d
+                                       metadata_str);
46037d
     free(metadata_str);
46037d
-    if (metadata == NULL) {
46037d
-        crm_warn("Failed to update metadata for %s (%s%s%s:%s)",
46037d
-                 rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"),
46037d
-                 ((rsc->provider == NULL)? "" : rsc->provider), rsc->type);
46037d
-    }
46037d
     return metadata;
46037d
 }
46037d
diff --git a/daemons/controld/controld_metadata.h b/daemons/controld/controld_metadata.h
46037d
index 7354f94..52d3336 100644
46037d
--- a/daemons/controld/controld_metadata.h
46037d
+++ b/daemons/controld/controld_metadata.h
46037d
@@ -73,9 +73,9 @@ void metadata_cache_free(GHashTable *mdc);
46037d
 void metadata_cache_reset(GHashTable *mdc);
46037d
 void metadata_cache_fini(void);
46037d
 
46037d
-struct ra_metadata_s *metadata_cache_update(GHashTable *mdc,
46037d
-                                            lrmd_rsc_info_t *rsc,
46037d
-                                            const char *metadata_str);
46037d
+struct ra_metadata_s *controld_cache_metadata(GHashTable *mdc,
46037d
+                                              lrmd_rsc_info_t *rsc,
46037d
+                                              const char *metadata_str);
46037d
 struct ra_metadata_s *controld_get_rsc_metadata(lrm_state_t *lrm_state,
46037d
                                                 lrmd_rsc_info_t *rsc,
46037d
                                                 uint32_t source);
46037d
-- 
46037d
2.31.1
46037d
46037d
From caeed447d0d8a980d431efd70e5b6f9c91ffac7f Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Thu, 15 Sep 2022 13:33:36 -0500
46037d
Subject: [PATCH 17/24] Fix: controller: pre-load agent metadata asynchronously
46037d
46037d
The controller needs resource agent metadata to record digests with pending and
46037d
completed resource actions.
46037d
46037d
Previously, metadata was collected synchronously when needed. This caused
46037d
several problems, two of which are fixed here for most actions: synchronous
46037d
execution blocks the controller from doing anything else (and if the agent's
46037d
metadata action tries to contact the controller, that blocks everything until
46037d
the action times out), and the metadata action ate into the real action's
46037d
timeout.
46037d
46037d
Now, if we're likely to need metadata for an action, attempt to get it
46037d
asynchronously before executing that action, so the metadata is available in
46037d
cache when needed.
46037d
46037d
This is not a complete solution, as there are other code paths that might
46037d
require metadata and still lead to synchronous execution, but it handles the
46037d
most important cases.
46037d
46037d
Fixes T554
46037d
---
46037d
 daemons/controld/controld_execd.c    | 105 +++++++++++++++++++++++----
46037d
 daemons/controld/controld_metadata.c |  22 +++---
46037d
 2 files changed, 102 insertions(+), 25 deletions(-)
46037d
46037d
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
46037d
index fe16c96..c56fdf5 100644
46037d
--- a/daemons/controld/controld_execd.c
46037d
+++ b/daemons/controld/controld_execd.c
46037d
@@ -670,7 +670,6 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_
46037d
     struct ra_metadata_s *metadata = NULL;
46037d
     const char *caller_version = NULL;
46037d
     lrm_state_t *lrm_state = NULL;
46037d
-    uint32_t metadata_source = controld_metadata_from_agent;
46037d
 
46037d
     if (op == NULL) {
46037d
         return FALSE;
46037d
@@ -703,19 +702,14 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_
46037d
         return TRUE;
46037d
     }
46037d
 
46037d
-    /* Getting meta-data from cache is OK unless this is a successful start
46037d
-     * action -- always refresh from the agent for those, in case the
46037d
-     * resource agent was updated.
46037d
+    /* Ideally the metadata is cached, and the agent is just a fallback.
46037d
      *
46037d
-     * @TODO Only refresh the meta-data after starts if the agent actually
46037d
-     * changed (using something like inotify, or a hash or modification time of
46037d
-     * the agent executable).
46037d
+     * @TODO Go through all callers and ensure they get metadata asynchronously
46037d
+     * first.
46037d
      */
46037d
-    if ((op->op_status != PCMK_EXEC_DONE) || (op->rc != target_rc)
46037d
-        || !pcmk__str_eq(op->op_type, CRMD_ACTION_START, pcmk__str_none)) {
46037d
-        metadata_source |= controld_metadata_from_cache;
46037d
-    }
46037d
-    metadata = controld_get_rsc_metadata(lrm_state, rsc, metadata_source);
46037d
+    metadata = controld_get_rsc_metadata(lrm_state, rsc,
46037d
+                                         controld_metadata_from_agent
46037d
+                                         |controld_metadata_from_cache);
46037d
     if (metadata == NULL) {
46037d
         return TRUE;
46037d
     }
46037d
@@ -1673,6 +1667,56 @@ do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state,
46037d
                     user_name, input, unregister);
46037d
 }
46037d
 
46037d
+// User data for asynchronous metadata execution
46037d
+struct metadata_cb_data {
46037d
+    lrmd_rsc_info_t *rsc;   // Copy of resource information
46037d
+    xmlNode *input_xml;     // Copy of FSA input XML
46037d
+};
46037d
+
46037d
+static struct metadata_cb_data *
46037d
+new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml)
46037d
+{
46037d
+    struct metadata_cb_data *data = NULL;
46037d
+
46037d
+    data = calloc(1, sizeof(struct metadata_cb_data));
46037d
+    CRM_ASSERT(data != NULL);
46037d
+    data->input_xml = copy_xml(input_xml);
46037d
+    data->rsc = lrmd_copy_rsc_info(rsc);
46037d
+    return data;
46037d
+}
46037d
+
46037d
+static void
46037d
+free_metadata_cb_data(struct metadata_cb_data *data)
46037d
+{
46037d
+    lrmd_free_rsc_info(data->rsc);
46037d
+    free_xml(data->input_xml);
46037d
+    free(data);
46037d
+}
46037d
+
46037d
+/*!
46037d
+ * \internal
46037d
+ * \brief Execute an action after metadata has been retrieved
46037d
+ *
46037d
+ * \param[in] pid        Ignored
46037d
+ * \param[in] result     Result of metadata action
46037d
+ * \param[in] user_data  Metadata callback data
46037d
+ */
46037d
+static void
46037d
+metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data)
46037d
+{
46037d
+    struct metadata_cb_data *data = (struct metadata_cb_data *) user_data;
46037d
+
46037d
+    struct ra_metadata_s *md = NULL;
46037d
+    lrm_state_t *lrm_state = lrm_state_find(lrm_op_target(data->input_xml));
46037d
+
46037d
+    if ((lrm_state != NULL) && pcmk__result_ok(result)) {
46037d
+        md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc,
46037d
+                                     result->action_stdout);
46037d
+    }
46037d
+    do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md);
46037d
+    free_metadata_cb_data(data);
46037d
+}
46037d
+
46037d
 /*	 A_LRM_INVOKE	*/
46037d
 void
46037d
 do_lrm_invoke(long long action,
46037d
@@ -1811,9 +1855,40 @@ do_lrm_invoke(long long action,
46037d
         } else {
46037d
             struct ra_metadata_s *md = NULL;
46037d
 
46037d
-            md = controld_get_rsc_metadata(lrm_state, rsc,
46037d
-                                           controld_metadata_from_cache);
46037d
-            do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
46037d
+            /* Getting metadata from cache is OK except for start actions --
46037d
+             * always refresh from the agent for those, in case the resource
46037d
+             * agent was updated.
46037d
+             *
46037d
+             * @TODO Only refresh metadata for starts if the agent actually
46037d
+             * changed (using something like inotify, or a hash or modification
46037d
+             * time of the agent executable).
46037d
+             */
46037d
+            if (strcmp(operation, CRMD_ACTION_START) != 0) {
46037d
+                md = controld_get_rsc_metadata(lrm_state, rsc,
46037d
+                                               controld_metadata_from_cache);
46037d
+            }
46037d
+
46037d
+            if ((md == NULL) && crm_op_needs_metadata(rsc->standard,
46037d
+                                                      operation)) {
46037d
+                /* Most likely, we'll need the agent metadata to record the
46037d
+                 * pending operation and the operation result. Get it now rather
46037d
+                 * than wait until then, so the metadata action doesn't eat into
46037d
+                 * the real action's timeout.
46037d
+                 *
46037d
+                 * @TODO Metadata is retrieved via direct execution of the
46037d
+                 * agent, which has a couple of related issues: the executor
46037d
+                 * should execute agents, not the controller; and metadata for
46037d
+                 * Pacemaker Remote nodes should be collected on those nodes,
46037d
+                 * not locally.
46037d
+                 */
46037d
+                struct metadata_cb_data *data = NULL;
46037d
+
46037d
+                data = new_metadata_cb_data(rsc, input->xml);
46037d
+                (void) lrmd__metadata_async(rsc, metadata_complete,
46037d
+                                            (void *) data);
46037d
+            } else {
46037d
+                do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
46037d
+            }
46037d
         }
46037d
 
46037d
         lrmd_free_rsc_info(rsc);
46037d
diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c
46037d
index 91a6a10..a954ebd 100644
46037d
--- a/daemons/controld/controld_metadata.c
46037d
+++ b/daemons/controld/controld_metadata.c
46037d
@@ -356,17 +356,19 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
46037d
         return NULL;
46037d
     }
46037d
 
46037d
-    /* For now, we always collect resource agent meta-data via a local,
46037d
-     * synchronous, direct execution of the agent. This has multiple issues:
46037d
-     * the executor should execute agents, not the controller; meta-data for
46037d
-     * Pacemaker Remote nodes should be collected on those nodes, not
46037d
-     * locally; and the meta-data call shouldn't eat into the timeout of the
46037d
-     * real action being performed.
46037d
+    /* For most actions, metadata was cached asynchronously before action
46037d
+     * execution (via metadata_complete()).
46037d
      *
46037d
-     * These issues are planned to be addressed by having the scheduler
46037d
-     * schedule a meta-data cache check at the beginning of each transition.
46037d
-     * Once that is working, this block will only be a fallback in case the
46037d
-     * initial collection fails.
46037d
+     * However if that failed, and for other actions, retrieve the metadata now
46037d
+     * via a local, synchronous, direct execution of the agent.
46037d
+     *
46037d
+     * This has multiple issues, which is why this is just a fallback: the
46037d
+     * executor should execute agents, not the controller; metadata for
46037d
+     * Pacemaker Remote nodes should be collected on those nodes, not locally;
46037d
+     * the metadata call shouldn't eat into the timeout of the real action being
46037d
+     * performed; and the synchronous call blocks the controller (which also
46037d
+     * means that if the metadata action tries to contact the controller,
46037d
+     * everything will hang until the timeout).
46037d
      */
46037d
     rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider,
46037d
                                 rsc->type, &metadata_str, 0);
46037d
-- 
46037d
2.31.1
46037d
46037d
From fddf663d5285740771145e83c41f33c0bfb86dfb Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Mon, 19 Sep 2022 15:19:06 -0500
46037d
Subject: [PATCH 18/24] Low: libstonithd: return CRM_EX_NOSUCH for bad agent
46037d
 namespace
46037d
46037d
Callers can't rely on a particular exit code scheme at this point,
46037d
but it doesn't hurt
46037d
---
46037d
 lib/fencing/st_client.c | 2 +-
46037d
 1 file changed, 1 insertion(+), 1 deletion(-)
46037d
46037d
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
46037d
index 91075bd..d41b066 100644
46037d
--- a/lib/fencing/st_client.c
46037d
+++ b/lib/fencing/st_client.c
46037d
@@ -2451,7 +2451,7 @@ stonith__metadata_async(const char *agent, int timeout_sec,
46037d
         default:
46037d
             {
46037d
                 pcmk__action_result_t result = {
46037d
-                    .exit_status = CRM_EX_ERROR,
46037d
+                    .exit_status = CRM_EX_NOSUCH,
46037d
                     .execution_status = PCMK_EXEC_ERROR_HARD,
46037d
                     .exit_reason = crm_strdup_printf("No such agent '%s'",
46037d
                                                      agent),
46037d
-- 
46037d
2.31.1
46037d
46037d
From 2de926f5b2b5dbf28f994bc35477d59ce46d5ab1 Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Mon, 19 Sep 2022 15:23:43 -0500
46037d
Subject: [PATCH 19/24] Low: liblrmd: consider invalid agent specification a
46037d
 fatal error
46037d
46037d
---
46037d
 lib/lrmd/lrmd_client.c | 3 ++-
46037d
 1 file changed, 2 insertions(+), 1 deletion(-)
46037d
46037d
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
46037d
index 4b16bf0..d691dce 100644
46037d
--- a/lib/lrmd/lrmd_client.c
46037d
+++ b/lib/lrmd/lrmd_client.c
46037d
@@ -2402,7 +2402,8 @@ lrmd__metadata_async(lrmd_rsc_info_t *rsc,
46037d
     CRM_CHECK(callback != NULL, return EINVAL);
46037d
 
46037d
     if ((rsc == NULL) || (rsc->standard == NULL) || (rsc->type == NULL)) {
46037d
-        pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR,
46037d
+        pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED,
46037d
+                         PCMK_EXEC_ERROR_FATAL,
46037d
                          "Invalid resource specification");
46037d
         callback(0, &result, user_data);
46037d
         pcmk__reset_result(&result);
46037d
-- 
46037d
2.31.1
46037d
46037d
From 2d526dae9dbfc6f8658ff96f5f6d58ee09ea879c Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Mon, 19 Sep 2022 15:25:12 -0500
46037d
Subject: [PATCH 20/24] Low: liblrmd: use resource ID for metadata actions when
46037d
 available
46037d
46037d
---
46037d
 lib/lrmd/lrmd_client.c | 10 +++++-----
46037d
 1 file changed, 5 insertions(+), 5 deletions(-)
46037d
46037d
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
46037d
index d691dce..570a2b8 100644
46037d
--- a/lib/lrmd/lrmd_client.c
46037d
+++ b/lib/lrmd/lrmd_client.c
46037d
@@ -2416,11 +2416,11 @@ lrmd__metadata_async(lrmd_rsc_info_t *rsc,
46037d
                                        callback, user_data);
46037d
     }
46037d
 
46037d
-    action = services__create_resource_action(rsc->type, rsc->standard,
46037d
-                                              rsc->provider, rsc->type,
46037d
-                                              CRMD_ACTION_METADATA, 0,
46037d
-                                              CRMD_METADATA_CALL_TIMEOUT, NULL,
46037d
-                                              0);
46037d
+    action = services__create_resource_action((rsc->id != NULL ? rsc->id : rsc->type),
46037d
+                                              rsc->standard, rsc->provider,
46037d
+                                              rsc->type, CRMD_ACTION_METADATA,
46037d
+                                              0, CRMD_METADATA_CALL_TIMEOUT,
46037d
+                                              NULL, 0);
46037d
     if (action == NULL) {
46037d
         pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
46037d
                          "Out of memory");
46037d
-- 
46037d
2.31.1
46037d
46037d
From 3d632be58dca13293e4ae974da5dfe2838fcdf12 Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Mon, 19 Sep 2022 15:27:11 -0500
46037d
Subject: [PATCH 21/24] Refactor: controller: executor query can assume local
46037d
 node
46037d
46037d
---
46037d
 daemons/controld/controld_execd.c       | 6 +++---
46037d
 daemons/controld/controld_fsa.h         | 4 ++--
46037d
 daemons/controld/controld_join_client.c | 2 +-
46037d
 daemons/controld/controld_join_dc.c     | 2 +-
46037d
 4 files changed, 7 insertions(+), 7 deletions(-)
46037d
46037d
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
46037d
index c56fdf5..039b194 100644
46037d
--- a/daemons/controld/controld_execd.c
46037d
+++ b/daemons/controld/controld_execd.c
46037d
@@ -796,16 +796,16 @@ build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
46037d
 }
46037d
 
46037d
 xmlNode *
46037d
-controld_query_executor_state(const char *node_name)
46037d
+controld_query_executor_state(void)
46037d
 {
46037d
     xmlNode *xml_state = NULL;
46037d
     xmlNode *xml_data = NULL;
46037d
     xmlNode *rsc_list = NULL;
46037d
     crm_node_t *peer = NULL;
46037d
-    lrm_state_t *lrm_state = lrm_state_find(node_name);
46037d
+    lrm_state_t *lrm_state = lrm_state_find(fsa_our_uname);
46037d
 
46037d
     if (!lrm_state) {
46037d
-        crm_err("Could not find executor state for node %s", node_name);
46037d
+        crm_err("Could not find executor state for node %s", fsa_our_uname);
46037d
         return NULL;
46037d
     }
46037d
 
46037d
diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h
46037d
index 296232f..d137310 100644
46037d
--- a/daemons/controld/controld_fsa.h
46037d
+++ b/daemons/controld/controld_fsa.h
46037d
@@ -1,5 +1,5 @@
46037d
 /*
46037d
- * Copyright 2004-2021 the Pacemaker project contributors
46037d
+ * Copyright 2004-2022 the Pacemaker project contributors
46037d
  *
46037d
  * The version control history for this file may have further details.
46037d
  *
46037d
@@ -518,7 +518,7 @@ extern gboolean ever_had_quorum;
46037d
 // These should be moved elsewhere
46037d
 void do_update_cib_nodes(gboolean overwrite, const char *caller);
46037d
 int crmd_cib_smart_opt(void);
46037d
-xmlNode *controld_query_executor_state(const char *node_name);
46037d
+xmlNode *controld_query_executor_state(void);
46037d
 
46037d
 const char *fsa_input2string(enum crmd_fsa_input input);
46037d
 const char *fsa_state2string(enum crmd_fsa_state state);
46037d
diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c
46037d
index 6485856..bfec430 100644
46037d
--- a/daemons/controld/controld_join_client.c
46037d
+++ b/daemons/controld/controld_join_client.c
46037d
@@ -268,7 +268,7 @@ do_cl_join_finalize_respond(long long action,
46037d
     update_dc_expected(input->msg);
46037d
 
46037d
     /* send our status section to the DC */
46037d
-    tmp1 = controld_query_executor_state(fsa_our_uname);
46037d
+    tmp1 = controld_query_executor_state();
46037d
     if (tmp1 != NULL) {
46037d
         xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc,
46037d
                                         CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
46037d
diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
46037d
index 9386182..9a8ea3e 100644
46037d
--- a/daemons/controld/controld_join_dc.c
46037d
+++ b/daemons/controld/controld_join_dc.c
46037d
@@ -591,7 +591,7 @@ do_dc_join_ack(long long action,
46037d
     }
46037d
     controld_delete_node_state(join_from, section, cib_scope_local);
46037d
     if (pcmk__str_eq(join_from, fsa_our_uname, pcmk__str_casei)) {
46037d
-        xmlNode *now_dc_lrmd_state = controld_query_executor_state(fsa_our_uname);
46037d
+        xmlNode *now_dc_lrmd_state = controld_query_executor_state();
46037d
 
46037d
         if (now_dc_lrmd_state != NULL) {
46037d
             fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state,
46037d
-- 
46037d
2.31.1
46037d
46037d
From d852ec335bd5b518a3f06c7f1b597370094311ae Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Tue, 20 Sep 2022 10:18:48 -0500
46037d
Subject: [PATCH 22/24] Log: controller: add messages when getting agent
46037d
 metadata
46037d
46037d
---
46037d
 daemons/controld/controld_execd.c    |  5 +++++
46037d
 daemons/controld/controld_metadata.c | 10 ++++++++++
46037d
 2 files changed, 15 insertions(+)
46037d
46037d
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
46037d
index 039b194..f02da82 100644
46037d
--- a/daemons/controld/controld_execd.c
46037d
+++ b/daemons/controld/controld_execd.c
46037d
@@ -1884,6 +1884,11 @@ do_lrm_invoke(long long action,
46037d
                 struct metadata_cb_data *data = NULL;
46037d
 
46037d
                 data = new_metadata_cb_data(rsc, input->xml);
46037d
+                crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously",
46037d
+                         rsc->id, rsc->standard,
46037d
+                         ((rsc->provider == NULL)? "" : ":"),
46037d
+                         ((rsc->provider == NULL)? "" : rsc->provider),
46037d
+                         rsc->type);
46037d
                 (void) lrmd__metadata_async(rsc, metadata_complete,
46037d
                                             (void *) data);
46037d
             } else {
46037d
diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c
46037d
index a954ebd..39b43b0 100644
46037d
--- a/daemons/controld/controld_metadata.c
46037d
+++ b/daemons/controld/controld_metadata.c
46037d
@@ -348,6 +348,11 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
46037d
             free(key);
46037d
         }
46037d
         if (metadata != NULL) {
46037d
+            crm_debug("Retrieved metadata for %s (%s%s%s:%s) from cache",
46037d
+                      rsc->id, rsc->standard,
46037d
+                      ((rsc->provider == NULL)? "" : ":"),
46037d
+                      ((rsc->provider == NULL)? "" : rsc->provider),
46037d
+                      rsc->type);
46037d
             return metadata;
46037d
         }
46037d
     }
46037d
@@ -370,6 +375,11 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
46037d
      * means that if the metadata action tries to contact the controller,
46037d
      * everything will hang until the timeout).
46037d
      */
46037d
+    crm_debug("Retrieving metadata for %s (%s%s%s:%s) synchronously",
46037d
+              rsc->id, rsc->standard,
46037d
+              ((rsc->provider == NULL)? "" : ":"),
46037d
+              ((rsc->provider == NULL)? "" : rsc->provider),
46037d
+              rsc->type);
46037d
     rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider,
46037d
                                 rsc->type, &metadata_str, 0);
46037d
     if (rc != pcmk_ok) {
46037d
-- 
46037d
2.31.1
46037d
46037d
From 5aec773a20e1ded971a4082358e266353615f196 Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Wed, 14 Sep 2022 14:36:44 -0500
46037d
Subject: [PATCH 23/24] Test: cts-lab: allow any whitespace in "Recover"
46037d
 messages
46037d
46037d
This seems to have always been multiple spaces, not sure what happened
46037d
---
46037d
 cts/lab/CTStests.py | 12 ++++++------
46037d
 cts/lab/patterns.py |  4 ++--
46037d
 2 files changed, 8 insertions(+), 8 deletions(-)
46037d
46037d
diff --git a/cts/lab/CTStests.py b/cts/lab/CTStests.py
46037d
index 5535177..8b56758 100644
46037d
--- a/cts/lab/CTStests.py
46037d
+++ b/cts/lab/CTStests.py
46037d
@@ -1,7 +1,7 @@
46037d
 """ Test-specific classes for Pacemaker's Cluster Test Suite (CTS)
46037d
 """
46037d
 
46037d
-__copyright__ = "Copyright 2000-2021 the Pacemaker project contributors"
46037d
+__copyright__ = "Copyright 2000-2022 the Pacemaker project contributors"
46037d
 __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
46037d
 
46037d
 #
46037d
@@ -1225,7 +1225,7 @@ class MaintenanceMode(CTSTest):
46037d
         '''Return list of errors which should be ignored'''
46037d
         return [
46037d
             r"Updating failcount for %s" % self.rid,
46037d
-            r"schedulerd.*: Recover %s\s*\(.*\)" % self.rid,
46037d
+            r"schedulerd.*: Recover\s+%s\s+\(.*\)" % self.rid,
46037d
             r"Unknown operation: fail",
46037d
             self.templates["Pat:RscOpOK"] % (self.action, self.rid),
46037d
             r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval),
46037d
@@ -1324,7 +1324,7 @@ class ResourceRecover(CTSTest):
46037d
         '''Return list of errors which should be ignored'''
46037d
         return [
46037d
             r"Updating failcount for %s" % self.rid,
46037d
-            r"schedulerd.*: Recover (%s|%s)\s*\(.*\)" % (self.rid, self.rid_alt),
46037d
+            r"schedulerd.*: Recover\s+(%s|%s)\s+\(.*\)" % (self.rid, self.rid_alt),
46037d
             r"Unknown operation: fail",
46037d
             self.templates["Pat:RscOpOK"] % (self.action, self.rid),
46037d
             r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval),
46037d
@@ -2559,7 +2559,7 @@ class RemoteLXC(CTSTest):
46037d
         '''Return list of errors which should be ignored'''
46037d
         return [
46037d
             r"Updating failcount for ping",
46037d
-            r"schedulerd.*: Recover (ping|lxc-ms|container)\s*\(.*\)",
46037d
+            r"schedulerd.*: Recover\s+(ping|lxc-ms|container)\s+\(.*\)",
46037d
             # The orphaned lxc-ms resource causes an expected transition error
46037d
             # that is a result of the scheduler not having knowledge that the
46037d
             # promotable resource used to be a clone. As a result, it looks like that 
46037d
@@ -3054,7 +3054,7 @@ class RemoteStonithd(RemoteDriver):
46037d
             r"Software caused connection abort",
46037d
             r"pacemaker-controld.*:\s+error.*: Operation remote-.*_monitor",
46037d
             r"pacemaker-controld.*:\s+error.*: Result of monitor operation for remote-.*",
46037d
-            r"schedulerd.*:\s+Recover remote-.*\s*\(.*\)",
46037d
+            r"schedulerd.*:\s+Recover\s+remote-.*\s+\(.*\)",
46037d
             r"error: Result of monitor operation for .* on remote-.*: Internal communication failure",
46037d
         ]
46037d
 
46037d
@@ -3120,7 +3120,7 @@ class RemoteRscFailure(RemoteDriver):
46037d
 
46037d
     def errorstoignore(self):
46037d
         ignore_pats = [
46037d
-            r"schedulerd.*: Recover remote-rsc\s*\(.*\)",
46037d
+            r"schedulerd.*: Recover\s+remote-rsc\s+\(.*\)",
46037d
             r"Dummy.*: No process state file found",
46037d
         ]
46037d
 
46037d
diff --git a/cts/lab/patterns.py b/cts/lab/patterns.py
46037d
index 90cac73..6e718f7 100644
46037d
--- a/cts/lab/patterns.py
46037d
+++ b/cts/lab/patterns.py
46037d
@@ -66,7 +66,7 @@ class BasePatterns(object):
46037d
 
46037d
             "Pat:Fencing_start"   : r"Requesting peer fencing .* targeting %s",
46037d
             "Pat:Fencing_ok"      : r"pacemaker-fenced.*:\s*Operation .* targeting %s by .* for .*@.*: OK",
46037d
-            "Pat:Fencing_recover" : r"pacemaker-schedulerd.*: Recover %s",
46037d
+            "Pat:Fencing_recover" : r"pacemaker-schedulerd.*: Recover\s+%s",
46037d
             "Pat:Fencing_active"  : r"stonith resource .* is active on 2 nodes (attempting recovery)",
46037d
             "Pat:Fencing_probe"   : r"pacemaker-controld.* Result of probe operation for %s on .*: Error",
46037d
 
46037d
@@ -180,7 +180,7 @@ class crm_corosync(BasePatterns):
46037d
             r"Parameters to .* action changed:",
46037d
             r"Parameters to .* changed",
46037d
             r"pacemakerd.*\[[0-9]+\] terminated( with signal| as IPC server|$)",
46037d
-            r"pacemaker-schedulerd.*Recover .*\(.* -\> .*\)",
46037d
+            r"pacemaker-schedulerd.*Recover\s+.*\(.* -\> .*\)",
46037d
             r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting",
46037d
             r"Peer is not part of our cluster",
46037d
             r"We appear to be in an election loop",
46037d
-- 
46037d
2.31.1
46037d
46037d
From 338cf55d19cb4ebebedf092dd0a5969ac2eda295 Mon Sep 17 00:00:00 2001
46037d
From: Ken Gaillot <kgaillot@redhat.com>
46037d
Date: Mon, 19 Sep 2022 15:55:42 -0500
46037d
Subject: [PATCH 24/24] Test: cts-lab: match parentheses correctly
46037d
46037d
---
46037d
 cts/lab/patterns.py | 3 ++-
46037d
 1 file changed, 2 insertions(+), 1 deletion(-)
46037d
46037d
diff --git a/cts/lab/patterns.py b/cts/lab/patterns.py
46037d
index 6e718f7..856fffb 100644
46037d
--- a/cts/lab/patterns.py
46037d
+++ b/cts/lab/patterns.py
46037d
@@ -271,6 +271,7 @@ class crm_corosync(BasePatterns):
46037d
         ]
46037d
         self.components["pacemaker-based-ignore"] = [
46037d
             r"pacemaker-execd.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
46037d
+            r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)",
46037d
             # This is overbroad, but we don't have a way to say that only
46037d
             # certain transition errors are acceptable (if the fencer respawns,
46037d
             # fence devices may appear multiply active). We have to rely on
46037d
@@ -328,7 +329,7 @@ class crm_corosync(BasePatterns):
46037d
             r"crit:.*Fencing daemon connection failed",
46037d
             r"error:.*Fencer connection failed \(will retry\)",
46037d
             r"Connection to (fencer|stonith-ng) failed, finalizing .* pending operations",
46037d
-            r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error",
46037d
+            r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)",
46037d
             # This is overbroad, but we don't have a way to say that only
46037d
             # certain transition errors are acceptable (if the fencer respawns,
46037d
             # fence devices may appear multiply active). We have to rely on
46037d
-- 
46037d
2.31.1
46037d