Blame SOURCES/009-improve-pacemaker_remote-handling.patch

4c8e44
From 28566d6832274c59f27bb7b2f1f54420a3f3d822 Mon Sep 17 00:00:00 2001
4c8e44
From: Ken Gaillot <kgaillot@redhat.com>
4c8e44
Date: Thu, 9 May 2019 20:26:08 -0500
4c8e44
Subject: [PATCH 01/13] Refactor: libpe_status: functionize unfencing digest
4c8e44
 code more
4c8e44
4c8e44
... for readability, reusability, and avoiding unnecessary function calls or
4c8e44
memory allocation.
4c8e44
---
4c8e44
 lib/pengine/utils.c | 159 ++++++++++++++++++++++++++++++++++++++--------------
4c8e44
 1 file changed, 118 insertions(+), 41 deletions(-)
4c8e44
4c8e44
diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
4c8e44
index 2f4dc1e..f80f8d4 100644
4c8e44
--- a/lib/pengine/utils.c
4c8e44
+++ b/lib/pengine/utils.c
4c8e44
@@ -2080,57 +2080,134 @@ rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node,
4c8e44
     return data;
4c8e44
 }
4c8e44
 
4c8e44
+/*!
4c8e44
+ * \internal
4c8e44
+ * \brief Create an unfencing summary for use in special node attribute
4c8e44
+ *
4c8e44
+ * Create a string combining a fence device's resource ID, agent type, and
4c8e44
+ * parameter digest (whether for all parameters or just non-private parameters).
4c8e44
+ * This can be stored in a special node attribute, allowing us to detect changes
4c8e44
+ * in either the agent type or parameters, to know whether unfencing must be
4c8e44
+ * redone or can be safely skipped when the device's history is cleaned.
4c8e44
+ *
4c8e44
+ * \param[in] rsc_id        Fence device resource ID
4c8e44
+ * \param[in] agent_type    Fence device agent
4c8e44
+ * \param[in] param_digest  Fence device parameter digest
4c8e44
+ *
4c8e44
+ * \return Newly allocated string with unfencing digest
4c8e44
+ * \note The caller is responsible for freeing the result.
4c8e44
+ */
4c8e44
+static inline char *
4c8e44
+create_unfencing_summary(const char *rsc_id, const char *agent_type,
4c8e44
+                         const char *param_digest)
4c8e44
+{
4c8e44
+    return crm_strdup_printf("%s:%s:%s", rsc_id, agent_type, param_digest);
4c8e44
+}
4c8e44
+
4c8e44
+/*!
4c8e44
+ * \internal
4c8e44
+ * \brief Check whether a node can skip unfencing
4c8e44
+ *
4c8e44
+ * Check whether a fence device's current definition matches a node's
4c8e44
+ * stored summary of when it was last unfenced by the device.
4c8e44
+ *
4c8e44
+ * \param[in] rsc_id        Fence device's resource ID
4c8e44
+ * \param[in] agent         Fence device's agent type
4c8e44
+ * \param[in] digest_calc   Fence device's current parameter digest
4c8e44
+ * \param[in] node_summary  Value of node's special unfencing node attribute
4c8e44
+ *                          (a comma-separated list of unfencing summaries for
4c8e44
+ *                          all devices that have unfenced this node)
4c8e44
+ *
4c8e44
+ * \return TRUE if digest matches, FALSE otherwise
4c8e44
+ */
4c8e44
+static bool
4c8e44
+unfencing_digest_matches(const char *rsc_id, const char *agent,
4c8e44
+                         const char *digest_calc, const char *node_summary)
4c8e44
+{
4c8e44
+    bool matches = FALSE;
4c8e44
+
4c8e44
+    if (rsc_id && agent && digest_calc && node_summary) {
4c8e44
+        char *search_secure = create_unfencing_summary(rsc_id, agent,
4c8e44
+                                                       digest_calc);
4c8e44
+
4c8e44
+        /* The digest was calculated including the device ID and agent,
4c8e44
+         * so there is no risk of collision using strstr().
4c8e44
+         */
4c8e44
+        matches = (strstr(node_summary, search_secure) != NULL);
4c8e44
+        crm_trace("Calculated unfencing digest '%s' %sfound in '%s'",
4c8e44
+                  search_secure, matches? "" : "not ", node_summary);
4c8e44
+        free(search_secure);
4c8e44
+    }
4c8e44
+    return matches;
4c8e44
+}
4c8e44
+
4c8e44
+/* Magic string to use as action name for digest cache entries used for
4c8e44
+ * unfencing checks. This is not a real action name (i.e. "on"), so
4c8e44
+ * check_action_definition() won't confuse these entries with real actions.
4c8e44
+ */
4c8e44
 #define STONITH_DIGEST_TASK "stonith-on"
4c8e44
 
4c8e44
+/*!
4c8e44
+ * \internal
4c8e44
+ * \brief Calculate fence device digests and digest comparison result
4c8e44
+ *
4c8e44
+ * \param[in] rsc       Fence device resource
4c8e44
+ * \param[in] agent     Fence device's agent type
4c8e44
+ * \param[in] node      Node with digest cache to use
4c8e44
+ * \param[in] data_set  Cluster working set
4c8e44
+ *
4c8e44
+ * \return Node's digest cache entry
4c8e44
+ */
4c8e44
 static op_digest_cache_t *
4c8e44
-fencing_action_digest_cmp(resource_t * rsc, node_t * node, pe_working_set_t * data_set)
4c8e44
+fencing_action_digest_cmp(pe_resource_t *rsc, const char *agent,
4c8e44
+                          pe_node_t *node, pe_working_set_t *data_set)
4c8e44
 {
4c8e44
-    char *key = generate_op_key(rsc->id, STONITH_DIGEST_TASK, 0);
4c8e44
-    op_digest_cache_t *data = rsc_action_digest(rsc, STONITH_DIGEST_TASK, key, node, NULL, data_set);
4c8e44
+    const char *node_summary = NULL;
4c8e44
 
4c8e44
-    const char *digest_all = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_ALL);
4c8e44
-    const char *digest_secure = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_SECURE);
4c8e44
+    // Calculate device's current parameter digests
4c8e44
+    char *key = generate_op_key(rsc->id, STONITH_DIGEST_TASK, 0);
4c8e44
+    op_digest_cache_t *data = rsc_action_digest(rsc, STONITH_DIGEST_TASK, key,
4c8e44
+                                                node, NULL, data_set);
4c8e44
 
4c8e44
-    /* No 'reloads' for fencing device changes
4c8e44
-     *
4c8e44
-     * We use the resource id + agent + digest so that we can detect
4c8e44
-     * changes to the agent and/or the parameters used
4c8e44
-     */
4c8e44
-    char *search_all = crm_strdup_printf("%s:%s:%s", rsc->id, (const char*)g_hash_table_lookup(rsc->meta, XML_ATTR_TYPE), data->digest_all_calc);
4c8e44
-    char *search_secure = crm_strdup_printf("%s:%s:%s", rsc->id, (const char*)g_hash_table_lookup(rsc->meta, XML_ATTR_TYPE), data->digest_secure_calc);
4c8e44
+    free(key);
4c8e44
 
4c8e44
-    data->rc = RSC_DIGEST_ALL;
4c8e44
-    if (digest_all == NULL) {
4c8e44
-        /* it is unknown what the previous op digest was */
4c8e44
+    // Check whether node has special unfencing summary node attribute
4c8e44
+    node_summary = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_ALL);
4c8e44
+    if (node_summary == NULL) {
4c8e44
         data->rc = RSC_DIGEST_UNKNOWN;
4c8e44
+        return data;
4c8e44
+    }
4c8e44
 
4c8e44
-    } else if (strstr(digest_all, search_all)) {
4c8e44
+    // Check whether full parameter digest matches
4c8e44
+    if (unfencing_digest_matches(rsc->id, agent, data->digest_all_calc,
4c8e44
+                                 node_summary)) {
4c8e44
         data->rc = RSC_DIGEST_MATCH;
4c8e44
+        return data;
4c8e44
+    }
4c8e44
 
4c8e44
-    } else if(digest_secure && data->digest_secure_calc) {
4c8e44
-        if(strstr(digest_secure, search_secure)) {
4c8e44
-            if (is_set(data_set->flags, pe_flag_stdout)) {
4c8e44
-                printf("Only 'private' parameters to %s for unfencing %s changed\n",
4c8e44
-                       rsc->id, node->details->uname);
4c8e44
-            }
4c8e44
-            data->rc = RSC_DIGEST_MATCH;
4c8e44
+    // Check whether secure parameter digest matches
4c8e44
+    node_summary = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_SECURE);
4c8e44
+    if (unfencing_digest_matches(rsc->id, agent, data->digest_secure_calc,
4c8e44
+                                 node_summary)) {
4c8e44
+        data->rc = RSC_DIGEST_MATCH;
4c8e44
+        if (is_set(data_set->flags, pe_flag_stdout)) {
4c8e44
+            printf("Only 'private' parameters to %s for unfencing %s changed\n",
4c8e44
+                   rsc->id, node->details->uname);
4c8e44
         }
4c8e44
+        return data;
4c8e44
     }
4c8e44
 
4c8e44
-    if (is_set(data_set->flags, pe_flag_sanitized)
4c8e44
-        && is_set(data_set->flags, pe_flag_stdout)
4c8e44
-        && (data->rc == RSC_DIGEST_ALL)
4c8e44
+    // Parameters don't match
4c8e44
+    data->rc = RSC_DIGEST_ALL;
4c8e44
+    if (is_set(data_set->flags, (pe_flag_sanitized|pe_flag_stdout))
4c8e44
         && data->digest_secure_calc) {
4c8e44
-        printf("Parameters to %s for unfencing %s changed, try '%s:%s:%s'\n",
4c8e44
-               rsc->id, node->details->uname, rsc->id,
4c8e44
-               (const char *) g_hash_table_lookup(rsc->meta, XML_ATTR_TYPE),
4c8e44
-               data->digest_secure_calc);
4c8e44
-    }
4c8e44
-
4c8e44
-    free(key);
4c8e44
-    free(search_all);
4c8e44
-    free(search_secure);
4c8e44
+        char *digest = create_unfencing_summary(rsc->id, agent,
4c8e44
+                                                data->digest_secure_calc);
4c8e44
 
4c8e44
+        printf("Parameters to %s for unfencing %s changed, try '%s'\n",
4c8e44
+               rsc->id, node->details->uname, digest);
4c8e44
+        free(digest);
4c8e44
+    }
4c8e44
     return data;
4c8e44
 }
4c8e44
 
4c8e44
@@ -2218,9 +2295,6 @@ pe_fence_op(node_t * node, const char *op, bool optional, const char *reason, pe
4c8e44
              *
4c8e44
              * We may do this for all nodes in the future, but for now
4c8e44
              * the check_action_definition() based stuff works fine.
4c8e44
-             *
4c8e44
-             * Use "stonith-on" to avoid creating cache entries for
4c8e44
-             * operations check_action_definition() would look for.
4c8e44
              */
4c8e44
             long max = 1024;
4c8e44
             long digests_all_offset = 0;
4c8e44
@@ -2232,8 +2306,11 @@ pe_fence_op(node_t * node, const char *op, bool optional, const char *reason, pe
4c8e44
 
4c8e44
             for (GListPtr gIter = matches; gIter != NULL; gIter = gIter->next) {
4c8e44
                 resource_t *match = gIter->data;
4c8e44
-                op_digest_cache_t *data = fencing_action_digest_cmp(match, node, data_set);
4c8e44
+                const char *agent = g_hash_table_lookup(match->meta,
4c8e44
+                                                        XML_ATTR_TYPE);
4c8e44
+                op_digest_cache_t *data = NULL;
4c8e44
 
4c8e44
+                data = fencing_action_digest_cmp(match, agent, node, data_set);
4c8e44
                 if(data->rc == RSC_DIGEST_ALL) {
4c8e44
                     optional = FALSE;
4c8e44
                     crm_notice("Unfencing %s (remote): because the definition of %s changed", node->details->uname, match->id);
4c8e44
@@ -2244,11 +2321,11 @@ pe_fence_op(node_t * node, const char *op, bool optional, const char *reason, pe
4c8e44
 
4c8e44
                 digests_all_offset += snprintf(
4c8e44
                     digests_all+digests_all_offset, max-digests_all_offset,
4c8e44
-                    "%s:%s:%s,", match->id, (const char*)g_hash_table_lookup(match->meta, XML_ATTR_TYPE), data->digest_all_calc);
4c8e44
+                    "%s:%s:%s,", match->id, agent, data->digest_all_calc);
4c8e44
 
4c8e44
                 digests_secure_offset += snprintf(
4c8e44
                     digests_secure+digests_secure_offset, max-digests_secure_offset,
4c8e44
-                    "%s:%s:%s,", match->id, (const char*)g_hash_table_lookup(match->meta, XML_ATTR_TYPE), data->digest_secure_calc);
4c8e44
+                    "%s:%s:%s,", match->id, agent, data->digest_secure_calc);
4c8e44
             }
4c8e44
             g_hash_table_insert(stonith_op->meta,
4c8e44
                                 strdup(XML_OP_ATTR_DIGESTS_ALL),
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
4c8e44
From fd6e06ff419c95f4423202163d2d4dca3f03a4c5 Mon Sep 17 00:00:00 2001
4c8e44
From: Ken Gaillot <kgaillot@redhat.com>
4c8e44
Date: Fri, 10 May 2019 11:57:31 -0500
4c8e44
Subject: [PATCH 02/13] Fix: libpe_status: calculate secure digests for
4c8e44
 unfencing ops
4c8e44
4c8e44
The calculation of digests for detection of when unfencing is needed reused
4c8e44
rsc_action_digest(). However that would only add secure digests when the
4c8e44
pe_flag_sanitized flag was set, which is only set by crm_simulate, so secure
4c8e44
digests would never be added in normal cluster operation. This led to
4c8e44
node attributes like name="#digests-secure"
4c8e44
value="stonith-fence_compute-fence-nova:fence_compute:(null),".
4c8e44
4c8e44
Now, rsc_action_digest() takes a new argument to select whether secure digests
4c8e44
are added, which is always set to TRUE when calculating unfencing digests.
4c8e44
---
4c8e44
 lib/pengine/utils.c | 27 ++++++++++++++++++++++-----
4c8e44
 1 file changed, 22 insertions(+), 5 deletions(-)
4c8e44
4c8e44
diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
4c8e44
index f80f8d4..5b893f7 100644
4c8e44
--- a/lib/pengine/utils.c
4c8e44
+++ b/lib/pengine/utils.c
4c8e44
@@ -1936,9 +1936,24 @@ append_versioned_params(xmlNode *versioned_params, const char *ra_version, xmlNo
4c8e44
 }
4c8e44
 #endif
4c8e44
 
4c8e44
+/*!
4c8e44
+ * \internal
4c8e44
+ * \brief Calculate action digests and store in node's digest cache
4c8e44
+ *
4c8e44
+ * \param[in] rsc          Resource that action was for
4c8e44
+ * \param[in] task         Name of action performed
4c8e44
+ * \param[in] key          Action's task key
4c8e44
+ * \param[in] node         Node action was performed on
4c8e44
+ * \param[in] xml_op       XML of operation in CIB status (if available)
4c8e44
+ * \param[in] calc_secure  Whether to calculate secure digest
4c8e44
+ * \param[in] data_set     Cluster working set
4c8e44
+ *
4c8e44
+ * \return Pointer to node's digest cache entry
4c8e44
+ */
4c8e44
 static op_digest_cache_t *
4c8e44
-rsc_action_digest(resource_t * rsc, const char *task, const char *key,
4c8e44
-                  node_t * node, xmlNode * xml_op, pe_working_set_t * data_set) 
4c8e44
+rsc_action_digest(pe_resource_t *rsc, const char *task, const char *key,
4c8e44
+                  pe_node_t *node, xmlNode *xml_op, bool calc_secure,
4c8e44
+                  pe_working_set_t *data_set)
4c8e44
 {
4c8e44
     op_digest_cache_t *data = NULL;
4c8e44
 
4c8e44
@@ -2007,7 +2022,7 @@ rsc_action_digest(resource_t * rsc, const char *task, const char *key,
4c8e44
 
4c8e44
         data->digest_all_calc = calculate_operation_digest(data->params_all, op_version);
4c8e44
 
4c8e44
-        if (is_set(data_set->flags, pe_flag_sanitized)) {
4c8e44
+        if (calc_secure) {
4c8e44
             data->params_secure = copy_xml(data->params_all);
4c8e44
             if(secure_list) {
4c8e44
                 filter_parameters(data->params_secure, secure_list, FALSE);
4c8e44
@@ -2053,7 +2068,9 @@ rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node,
4c8e44
 
4c8e44
     interval_ms = crm_parse_ms(interval_ms_s);
4c8e44
     key = generate_op_key(rsc->id, task, interval_ms);
4c8e44
-    data = rsc_action_digest(rsc, task, key, node, xml_op, data_set);
4c8e44
+    data = rsc_action_digest(rsc, task, key, node, xml_op,
4c8e44
+                             is_set(data_set->flags, pe_flag_sanitized),
4c8e44
+                             data_set);
4c8e44
 
4c8e44
     data->rc = RSC_DIGEST_MATCH;
4c8e44
     if (digest_restart && data->digest_restart_calc && strcmp(data->digest_restart_calc, digest_restart) != 0) {
4c8e44
@@ -2167,7 +2184,7 @@ fencing_action_digest_cmp(pe_resource_t *rsc, const char *agent,
4c8e44
     // Calculate device's current parameter digests
4c8e44
     char *key = generate_op_key(rsc->id, STONITH_DIGEST_TASK, 0);
4c8e44
     op_digest_cache_t *data = rsc_action_digest(rsc, STONITH_DIGEST_TASK, key,
4c8e44
-                                                node, NULL, data_set);
4c8e44
+                                                node, NULL, TRUE, data_set);
4c8e44
 
4c8e44
     free(key);
4c8e44
 
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
4c8e44
From 7886c8ec4dd209078cdc76274ed9d2804ea09b6a Mon Sep 17 00:00:00 2001
4c8e44
From: Ken Gaillot <kgaillot@redhat.com>
4c8e44
Date: Wed, 5 Jun 2019 12:54:34 -0500
4c8e44
Subject: [PATCH 03/13] Refactor: controller: pass desired op status when
4c8e44
 synthesizing failure
4c8e44
4c8e44
so we can use new status codes later
4c8e44
---
4c8e44
 daemons/controld/controld_execd.c | 27 +++++++++++++++++++++------
4c8e44
 1 file changed, 21 insertions(+), 6 deletions(-)
4c8e44
4c8e44
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
4c8e44
index 8e89216..fed9419 100644
4c8e44
--- a/daemons/controld/controld_execd.c
4c8e44
+++ b/daemons/controld/controld_execd.c
4c8e44
@@ -1424,8 +1424,22 @@ force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
4c8e44
     update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node);
4c8e44
 }
4c8e44
 
4c8e44
+/*!
4c8e44
+ * \internal
4c8e44
+ * \brief Fail a requested action without actually executing it
4c8e44
+ *
4c8e44
+ * For an action that can't be executed, process it similarly to an actual
4c8e44
+ * execution result, with specified error status (except for notify actions,
4c8e44
+ * which will always be treated as successful).
4c8e44
+ *
4c8e44
+ * \param[in] lrm_state  Executor connection that action is for
4c8e44
+ * \param[in] action     Action XML from request
4c8e44
+ * \param[in] rc         Desired return code to use
4c8e44
+ * \param[in] op_status  Desired operation status to use
4c8e44
+ */
4c8e44
 static void
4c8e44
-synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) 
4c8e44
+synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action,
4c8e44
+                        int op_status, enum ocf_exitcode rc)
4c8e44
 {
4c8e44
     lrmd_event_data_t *op = NULL;
4c8e44
     const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK);
4c8e44
@@ -1451,7 +1465,7 @@ synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc)
4c8e44
     if (safe_str_eq(operation, RSC_NOTIFY)) { // Notifications can't fail
4c8e44
         fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_OK);
4c8e44
     } else {
4c8e44
-        fake_op_status(lrm_state, op, PCMK_LRM_OP_ERROR, rc);
4c8e44
+        fake_op_status(lrm_state, op, op_status, rc);
4c8e44
     }
4c8e44
 
4c8e44
     crm_info("Faking " CRM_OP_FMT " result (%d) on %s",
4c8e44
@@ -1744,7 +1758,8 @@ do_lrm_invoke(long long action,
4c8e44
     if ((lrm_state == NULL) && is_remote_node) {
4c8e44
         crm_err("Failing action because local node has never had connection to remote node %s",
4c8e44
                 target_node);
4c8e44
-        synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED);
4c8e44
+        synthesize_lrmd_failure(NULL, input->xml, PCMK_LRM_OP_ERROR,
4c8e44
+                                PCMK_OCF_CONNECTION_DIED);
4c8e44
         return;
4c8e44
     }
4c8e44
     CRM_ASSERT(lrm_state != NULL);
4c8e44
@@ -1800,7 +1815,7 @@ do_lrm_invoke(long long action,
4c8e44
 
4c8e44
         rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc);
4c8e44
         if (rc == -ENOTCONN) {
4c8e44
-            synthesize_lrmd_failure(lrm_state, input->xml,
4c8e44
+            synthesize_lrmd_failure(lrm_state, input->xml, PCMK_LRM_OP_ERROR,
4c8e44
                                     PCMK_OCF_CONNECTION_DIED);
4c8e44
             return;
4c8e44
 
4c8e44
@@ -1822,7 +1837,7 @@ do_lrm_invoke(long long action,
4c8e44
             // Resource operation on malformed resource
4c8e44
             crm_err("Invalid resource definition for %s", ID(xml_rsc));
4c8e44
             crm_log_xml_warn(input->msg, "invalid resource");
4c8e44
-            synthesize_lrmd_failure(lrm_state, input->xml,
4c8e44
+            synthesize_lrmd_failure(lrm_state, input->xml, PCMK_LRM_OP_ERROR,
4c8e44
                                     PCMK_OCF_NOT_CONFIGURED); // fatal error
4c8e44
             return;
4c8e44
 
4c8e44
@@ -1832,7 +1847,7 @@ do_lrm_invoke(long long action,
4c8e44
                     CRM_XS " rc=%d",
4c8e44
                     ID(xml_rsc), pcmk_strerror(rc), rc);
4c8e44
             crm_log_xml_warn(input->msg, "failed registration");
4c8e44
-            synthesize_lrmd_failure(lrm_state, input->xml,
4c8e44
+            synthesize_lrmd_failure(lrm_state, input->xml, PCMK_LRM_OP_ERROR,
4c8e44
                                     PCMK_OCF_INVALID_PARAM); // hard error
4c8e44
             return;
4c8e44
         }
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
4c8e44
From ddc3942d7131db9c9874031ca4b3b4a531221573 Mon Sep 17 00:00:00 2001
4c8e44
From: Ken Gaillot <kgaillot@redhat.com>
4c8e44
Date: Wed, 5 Jun 2019 13:08:15 -0500
4c8e44
Subject: [PATCH 04/13] Fix: controller: use op status, not rc, for executor
4c8e44
 disconnection
4c8e44
4c8e44
Previously, if an action were requested for an executor (local or remote) that
4c8e44
the controller does not have a connection to, the action's rc would be set to
4c8e44
PCMK_OCF_CONNECTION_DIED and its op status to PCMK_LRM_OP_ERROR.
4c8e44
4c8e44
This was undesirable for a couple reasons: PCMK_OCF_CONNECTION_DIED is a
4c8e44
nonstandard extension to the OCF return codes, which can confuse users
4c8e44
trying to look up the meaning or interpret cluster status output; and it really
4c8e44
is an operation execution status and not an operation result.
4c8e44
4c8e44
This changes the result to PCMK_OCF_UNKNOWN_ERROR with a new op status
4c8e44
PCMK_LRM_OP_NOT_CONNECTED. The new codes are mapped to the old ones for older
4c8e44
DCs that don't understand them.
4c8e44
---
4c8e44
 cts/CTStests.py                         |  2 +-
4c8e44
 daemons/controld/controld_execd.c       | 21 +++++++++++++++++----
4c8e44
 daemons/controld/controld_execd_state.c |  6 ++++--
4c8e44
 include/crm/services.h                  |  4 +++-
4c8e44
 lib/common/operations.c                 |  1 +
4c8e44
 lib/pengine/unpack.c                    |  3 ++-
4c8e44
 6 files changed, 28 insertions(+), 9 deletions(-)
4c8e44
4c8e44
diff --git a/cts/CTStests.py b/cts/CTStests.py
4c8e44
index 32945cb..be7fd7f 100644
4c8e44
--- a/cts/CTStests.py
4c8e44
+++ b/cts/CTStests.py
4c8e44
@@ -3068,7 +3068,7 @@ class RemoteStonithd(RemoteDriver):
4c8e44
             r"schedulerd.*:\s+Recover remote-.*\s*\(.*\)",
4c8e44
             r"Calculated [Tt]ransition .*pe-error",
4c8e44
             r"error.*: Resource .*ocf::.* is active on 2 nodes attempting recovery",
4c8e44
-            r"error: Result of monitor operation for .* on remote-.*: Error",
4c8e44
+            r"error: Result of monitor operation for .* on remote-.*: No executor connection",
4c8e44
         ]
4c8e44
 
4c8e44
         ignore_pats.extend(RemoteDriver.errorstoignore(self))
4c8e44
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
4c8e44
index fed9419..ac215b6 100644
4c8e44
--- a/daemons/controld/controld_execd.c
4c8e44
+++ b/daemons/controld/controld_execd.c
4c8e44
@@ -1758,8 +1758,8 @@ do_lrm_invoke(long long action,
4c8e44
     if ((lrm_state == NULL) && is_remote_node) {
4c8e44
         crm_err("Failing action because local node has never had connection to remote node %s",
4c8e44
                 target_node);
4c8e44
-        synthesize_lrmd_failure(NULL, input->xml, PCMK_LRM_OP_ERROR,
4c8e44
-                                PCMK_OCF_CONNECTION_DIED);
4c8e44
+        synthesize_lrmd_failure(NULL, input->xml, PCMK_LRM_OP_NOT_CONNECTED,
4c8e44
+                                PCMK_OCF_UNKNOWN_ERROR);
4c8e44
         return;
4c8e44
     }
4c8e44
     CRM_ASSERT(lrm_state != NULL);
4c8e44
@@ -1815,8 +1815,9 @@ do_lrm_invoke(long long action,
4c8e44
 
4c8e44
         rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc);
4c8e44
         if (rc == -ENOTCONN) {
4c8e44
-            synthesize_lrmd_failure(lrm_state, input->xml, PCMK_LRM_OP_ERROR,
4c8e44
-                                    PCMK_OCF_CONNECTION_DIED);
4c8e44
+            synthesize_lrmd_failure(lrm_state, input->xml,
4c8e44
+                                    PCMK_LRM_OP_NOT_CONNECTED,
4c8e44
+                                    PCMK_OCF_UNKNOWN_ERROR);
4c8e44
             return;
4c8e44
 
4c8e44
         } else if ((rc < 0) && !create_rsc) {
4c8e44
@@ -2532,6 +2533,18 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
4c8e44
     CRM_CHECK(op != NULL, return);
4c8e44
     CRM_CHECK(op->rsc_id != NULL, return);
4c8e44
 
4c8e44
+    // Remap new status codes for older DCs
4c8e44
+    if (compare_version(fsa_our_dc_version, "3.2.0") < 0) {
4c8e44
+        switch (op->op_status) {
4c8e44
+            case PCMK_LRM_OP_NOT_CONNECTED:
4c8e44
+                op->op_status = PCMK_LRM_OP_ERROR;
4c8e44
+                op->rc = PCMK_OCF_CONNECTION_DIED;
4c8e44
+                break;
4c8e44
+            default:
4c8e44
+                break;
4c8e44
+        }
4c8e44
+    }
4c8e44
+
4c8e44
     op_id = make_stop_id(op->rsc_id, op->call_id);
4c8e44
     op_key = generate_op_key(op->rsc_id, op->op_type, op->interval_ms);
4c8e44
 
4c8e44
diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c
4c8e44
index 4e9f096..63e6b33 100644
4c8e44
--- a/daemons/controld/controld_execd_state.c
4c8e44
+++ b/daemons/controld/controld_execd_state.c
4c8e44
@@ -1,6 +1,8 @@
4c8e44
 /*
4c8e44
  * Copyright 2012-2019 the Pacemaker project contributors
4c8e44
  *
4c8e44
+ * The version control history for this file may have further details.
4c8e44
+ *
4c8e44
  * This source code is licensed under the GNU General Public License version 2
4c8e44
  * or later (GPLv2+) WITHOUT ANY WARRANTY.
4c8e44
  */
4c8e44
@@ -76,8 +78,8 @@ fail_pending_op(gpointer key, gpointer value, gpointer user_data)
4c8e44
     event.user_data = op->user_data;
4c8e44
     event.timeout = 0;
4c8e44
     event.interval_ms = op->interval_ms;
4c8e44
-    event.rc = PCMK_OCF_CONNECTION_DIED;
4c8e44
-    event.op_status = PCMK_LRM_OP_ERROR;
4c8e44
+    event.rc = PCMK_OCF_UNKNOWN_ERROR;
4c8e44
+    event.op_status = PCMK_LRM_OP_NOT_CONNECTED;
4c8e44
     event.t_run = op->start_time;
4c8e44
     event.t_rcchange = op->start_time;
4c8e44
 
4c8e44
diff --git a/include/crm/services.h b/include/crm/services.h
4c8e44
index 4bdd21a..ca9470b 100644
4c8e44
--- a/include/crm/services.h
4c8e44
+++ b/include/crm/services.h
4c8e44
@@ -100,7 +100,7 @@ enum ocf_exitcode {
4c8e44
 
4c8e44
 
4c8e44
     /* 150-199	reserved for application use */
4c8e44
-    PCMK_OCF_CONNECTION_DIED = 189, /* Operation failure implied by disconnection of the LRM API to a local or remote node */
4c8e44
+    PCMK_OCF_CONNECTION_DIED = 189, // Deprecated (see PCMK_LRM_OP_NOT_CONNECTED)
4c8e44
 
4c8e44
     PCMK_OCF_DEGRADED        = 190, /* Active resource that is no longer 100% functional */
4c8e44
     PCMK_OCF_DEGRADED_MASTER = 191, /* Promoted resource that is no longer 100% functional */
4c8e44
@@ -126,6 +126,7 @@ enum op_status {
4c8e44
     PCMK_LRM_OP_ERROR_HARD,
4c8e44
     PCMK_LRM_OP_ERROR_FATAL,
4c8e44
     PCMK_LRM_OP_NOT_INSTALLED,
4c8e44
+    PCMK_LRM_OP_NOT_CONNECTED,
4c8e44
 };
4c8e44
 
4c8e44
 enum nagios_exitcode {
4c8e44
@@ -337,6 +338,7 @@ gboolean services_alert_async(svc_action_t *action,
4c8e44
                 case PCMK_LRM_OP_NOTSUPPORTED:return "NOT SUPPORTED";
4c8e44
                 case PCMK_LRM_OP_ERROR:return "Error";
4c8e44
                 case PCMK_LRM_OP_NOT_INSTALLED:return "Not installed";
4c8e44
+                case PCMK_LRM_OP_NOT_CONNECTED:return "No executor connection";
4c8e44
                 default:return "UNKNOWN!";
4c8e44
         }
4c8e44
     }
4c8e44
diff --git a/lib/common/operations.c b/lib/common/operations.c
4c8e44
index 2144cc6..c6b16cb 100644
4c8e44
--- a/lib/common/operations.c
4c8e44
+++ b/lib/common/operations.c
4c8e44
@@ -395,6 +395,7 @@ did_rsc_op_fail(lrmd_event_data_t * op, int target_rc)
4c8e44
         case PCMK_LRM_OP_NOTSUPPORTED:
4c8e44
         case PCMK_LRM_OP_TIMEOUT:
4c8e44
         case PCMK_LRM_OP_ERROR:
4c8e44
+        case PCMK_LRM_OP_NOT_CONNECTED:
4c8e44
             return TRUE;
4c8e44
             break;
4c8e44
 
4c8e44
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
4c8e44
index 0e8177b..671f0c4 100644
4c8e44
--- a/lib/pengine/unpack.c
4c8e44
+++ b/lib/pengine/unpack.c
4c8e44
@@ -3163,7 +3163,7 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last
4c8e44
     crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
4c8e44
 
4c8e44
     CRM_CHECK(task != NULL, return FALSE);
4c8e44
-    CRM_CHECK(status <= PCMK_LRM_OP_NOT_INSTALLED, return FALSE);
4c8e44
+    CRM_CHECK(status <= PCMK_LRM_OP_NOT_CONNECTED, return FALSE);
4c8e44
     CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE);
4c8e44
 
4c8e44
     if (safe_str_eq(task, CRMD_ACTION_NOTIFY) ||
4c8e44
@@ -3304,6 +3304,7 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last
4c8e44
         case PCMK_LRM_OP_ERROR_FATAL:
4c8e44
         case PCMK_LRM_OP_TIMEOUT:
4c8e44
         case PCMK_LRM_OP_NOTSUPPORTED:
4c8e44
+        case PCMK_LRM_OP_NOT_CONNECTED:
4c8e44
 
4c8e44
             failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
4c8e44
             if ((failure_strategy == action_fail_ignore)
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
4c8e44
From fc135cb441fb7c66a44fbffe74dcae26c112be3f Mon Sep 17 00:00:00 2001
4c8e44
From: Ken Gaillot <kgaillot@redhat.com>
4c8e44
Date: Wed, 5 Jun 2019 13:43:08 -0500
4c8e44
Subject: [PATCH 05/13] Fix: controller: use op status, not rc, for execution
4c8e44
 in invalid state
4c8e44
4c8e44
Previously, if an action were requested while the controller cannot execute actions
4c8e44
(i.e. shutdown), the action's rc would be set to CRM_DIRECT_NACK_RC and its op
4c8e44
status to PCMK_LRM_OP_ERROR.
4c8e44
4c8e44
This was undesirable for a couple reasons: rc should only be OCF return codes,
4c8e44
and it really is an operation execution status and not an operation result.
4c8e44
4c8e44
This changes the result to PCMK_OCF_UNKNOWN_ERROR with a new op status
4c8e44
PCMK_LRM_OP_INVALID. The new codes are mapped to the old ones for older
4c8e44
DCs that don't understand them.
4c8e44
---
4c8e44
 daemons/controld/controld_execd.c     |  8 ++++++--
4c8e44
 daemons/controld/controld_fsa.h       |  6 +-----
4c8e44
 daemons/controld/controld_te_events.c | 13 ++++++-------
4c8e44
 include/crm/services.h                |  2 ++
4c8e44
 lib/common/operations.c               |  1 +
4c8e44
 lib/pengine/unpack.c                  |  3 ++-
4c8e44
 6 files changed, 18 insertions(+), 15 deletions(-)
4c8e44
4c8e44
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
4c8e44
index ac215b6..a20f96a 100644
4c8e44
--- a/daemons/controld/controld_execd.c
4c8e44
+++ b/daemons/controld/controld_execd.c
4c8e44
@@ -2254,8 +2254,8 @@ do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operat
4c8e44
                    operation, rsc->id, fsa_state2string(fsa_state),
4c8e44
                    is_set(fsa_input_register, R_SHUTDOWN)?"true":"false");
4c8e44
 
4c8e44
-        op->rc = CRM_DIRECT_NACK_RC;
4c8e44
-        op->op_status = PCMK_LRM_OP_ERROR;
4c8e44
+        op->rc = PCMK_OCF_UNKNOWN_ERROR;
4c8e44
+        op->op_status = PCMK_LRM_OP_INVALID;
4c8e44
         send_direct_ack(NULL, NULL, rsc, op, rsc->id);
4c8e44
         lrmd_free_event(op);
4c8e44
         free(op_id);
4c8e44
@@ -2540,6 +2540,10 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
4c8e44
                 op->op_status = PCMK_LRM_OP_ERROR;
4c8e44
                 op->rc = PCMK_OCF_CONNECTION_DIED;
4c8e44
                 break;
4c8e44
+            case PCMK_LRM_OP_INVALID:
4c8e44
+                op->op_status = PCMK_LRM_OP_ERROR;
4c8e44
+                op->rc = CRM_DIRECT_NACK_RC;
4c8e44
+                break;
4c8e44
             default:
4c8e44
                 break;
4c8e44
         }
4c8e44
diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h
4c8e44
index 397a9cd..7527ed9 100644
4c8e44
--- a/daemons/controld/controld_fsa.h
4c8e44
+++ b/daemons/controld/controld_fsa.h
4c8e44
@@ -426,11 +426,7 @@ enum crmd_fsa_input {
4c8e44
 
4c8e44
 #  define R_IN_RECOVERY     0x80000000ULL
4c8e44
 
4c8e44
-/*
4c8e44
- * Magic RC used within the controller to indicate direct nacks
4c8e44
- * (operation is invalid in current state)
4c8e44
- */
4c8e44
-#define CRM_DIRECT_NACK_RC (99)
4c8e44
+#define CRM_DIRECT_NACK_RC (99) // Deprecated (see PCMK_LRM_OP_INVALID)
4c8e44
 
4c8e44
 enum crmd_fsa_cause {
4c8e44
     C_UNKNOWN = 0,
4c8e44
diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c
4c8e44
index b7b48a4..d297241 100644
4c8e44
--- a/daemons/controld/controld_te_events.c
4c8e44
+++ b/daemons/controld/controld_te_events.c
4c8e44
@@ -123,10 +123,8 @@ update_failcount(xmlNode * event, const char *event_node_uuid, int rc,
4c8e44
     const char *on_uname = crm_peer_uname(event_node_uuid);
4c8e44
     const char *origin = crm_element_value(event, XML_ATTR_ORIGIN);
4c8e44
 
4c8e44
-    /* Nothing needs to be done for success, lrm status refresh,
4c8e44
-     * or direct nack (internal code for "busy, try again")
4c8e44
-     */
4c8e44
-    if ((rc == CRM_DIRECT_NACK_RC) || (rc == target_rc)) {
4c8e44
+    // Nothing needs to be done for success or status refresh
4c8e44
+    if (rc == target_rc) {
4c8e44
         return FALSE;
4c8e44
     } else if (safe_str_eq(origin, "build_active_RAs")) {
4c8e44
         crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
4c8e44
@@ -225,7 +223,7 @@ status_from_rc(crm_action_t * action, int orig_status, int rc, int target_rc)
4c8e44
         return PCMK_LRM_OP_DONE;
4c8e44
     }
4c8e44
 
4c8e44
-    if (rc != CRM_DIRECT_NACK_RC) {
4c8e44
+    if (orig_status != PCMK_LRM_OP_INVALID) {
4c8e44
         const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
4c8e44
         const char *uname = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
4c8e44
 
4c8e44
@@ -541,8 +539,9 @@ process_graph_event(xmlNode *event, const char *event_node)
4c8e44
     if (action && (rc == target_rc)) {
4c8e44
         crm_trace("Processed update to %s: %s", id, magic);
4c8e44
     } else {
4c8e44
-        if (update_failcount(event, event_node, rc, target_rc,
4c8e44
-                             (transition_num == -1), ignore_failures)) {
4c8e44
+        if ((status != PCMK_LRM_OP_INVALID)
4c8e44
+            && update_failcount(event, event_node, rc, target_rc,
4c8e44
+                               (transition_num == -1), ignore_failures)) {
4c8e44
             desc = "failed";
4c8e44
         }
4c8e44
         crm_info("Detected action (%d.%d) %s.%d=%s: %s", transition_num,
4c8e44
diff --git a/include/crm/services.h b/include/crm/services.h
4c8e44
index ca9470b..0771241 100644
4c8e44
--- a/include/crm/services.h
4c8e44
+++ b/include/crm/services.h
4c8e44
@@ -127,6 +127,7 @@ enum op_status {
4c8e44
     PCMK_LRM_OP_ERROR_FATAL,
4c8e44
     PCMK_LRM_OP_NOT_INSTALLED,
4c8e44
     PCMK_LRM_OP_NOT_CONNECTED,
4c8e44
+    PCMK_LRM_OP_INVALID,
4c8e44
 };
4c8e44
 
4c8e44
 enum nagios_exitcode {
4c8e44
@@ -339,6 +340,7 @@ gboolean services_alert_async(svc_action_t *action,
4c8e44
                 case PCMK_LRM_OP_ERROR:return "Error";
4c8e44
                 case PCMK_LRM_OP_NOT_INSTALLED:return "Not installed";
4c8e44
                 case PCMK_LRM_OP_NOT_CONNECTED:return "No executor connection";
4c8e44
+                case PCMK_LRM_OP_INVALID:return "Cannot execute now";
4c8e44
                 default:return "UNKNOWN!";
4c8e44
         }
4c8e44
     }
4c8e44
diff --git a/lib/common/operations.c b/lib/common/operations.c
4c8e44
index c6b16cb..480bddc 100644
4c8e44
--- a/lib/common/operations.c
4c8e44
+++ b/lib/common/operations.c
4c8e44
@@ -396,6 +396,7 @@ did_rsc_op_fail(lrmd_event_data_t * op, int target_rc)
4c8e44
         case PCMK_LRM_OP_TIMEOUT:
4c8e44
         case PCMK_LRM_OP_ERROR:
4c8e44
         case PCMK_LRM_OP_NOT_CONNECTED:
4c8e44
+        case PCMK_LRM_OP_INVALID:
4c8e44
             return TRUE;
4c8e44
             break;
4c8e44
 
4c8e44
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
4c8e44
index 671f0c4..fb1ab60 100644
4c8e44
--- a/lib/pengine/unpack.c
4c8e44
+++ b/lib/pengine/unpack.c
4c8e44
@@ -3163,7 +3163,7 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last
4c8e44
     crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
4c8e44
 
4c8e44
     CRM_CHECK(task != NULL, return FALSE);
4c8e44
-    CRM_CHECK(status <= PCMK_LRM_OP_NOT_CONNECTED, return FALSE);
4c8e44
+    CRM_CHECK(status <= PCMK_LRM_OP_INVALID, return FALSE);
4c8e44
     CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE);
4c8e44
 
4c8e44
     if (safe_str_eq(task, CRMD_ACTION_NOTIFY) ||
4c8e44
@@ -3305,6 +3305,7 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last
4c8e44
         case PCMK_LRM_OP_TIMEOUT:
4c8e44
         case PCMK_LRM_OP_NOTSUPPORTED:
4c8e44
         case PCMK_LRM_OP_NOT_CONNECTED:
4c8e44
+        case PCMK_LRM_OP_INVALID:
4c8e44
 
4c8e44
             failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
4c8e44
             if ((failure_strategy == action_fail_ignore)
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
4c8e44
From f5ea526b211e95ece16acb0f72bfbbbda60ec437 Mon Sep 17 00:00:00 2001
4c8e44
From: Ken Gaillot <kgaillot@redhat.com>
4c8e44
Date: Wed, 12 Jun 2019 20:48:59 -0500
4c8e44
Subject: [PATCH 06/13] Doc: libcrmcommon: document CRM_FEATURE_SET in API docs
4c8e44
4c8e44
---
4c8e44
 include/crm/crm.h | 23 ++++++++++++++++++++++-
4c8e44
 1 file changed, 22 insertions(+), 1 deletion(-)
4c8e44
4c8e44
diff --git a/include/crm/crm.h b/include/crm/crm.h
4c8e44
index 5f323e8..56a2048 100644
4c8e44
--- a/include/crm/crm.h
4c8e44
+++ b/include/crm/crm.h
4c8e44
@@ -1,5 +1,5 @@
4c8e44
 /*
4c8e44
- * Copyright 2004-2018 the Pacemaker project contributors
4c8e44
+ * Copyright 2004-2019 the Pacemaker project contributors
4c8e44
  *
4c8e44
  * The version control history for this file may have further details.
4c8e44
  *
4c8e44
@@ -29,6 +29,27 @@ extern "C" {
4c8e44
 
4c8e44
 #  include <libxml/tree.h>
4c8e44
 
4c8e44
+/*!
4c8e44
+ * The CRM feature set assists with compatibility in mixed-version clusters.
4c8e44
+ * The major version number increases when nodes with different versions
4c8e44
+ * would not work (rolling upgrades are not allowed). The minor version
4c8e44
+ * number increases when mixed-version clusters are allowed only during
4c8e44
+ * rolling upgrades (a node with the oldest feature set will be elected DC). The
4c8e44
+ * minor-minor version number is ignored, but allows resource agents to detect
4c8e44
+ * cluster support for various features.
4c8e44
+ *
4c8e44
+ * The feature set also affects the processing of old saved CIBs (such as for
4c8e44
+ * many scheduler regression tests).
4c8e44
+ *
4c8e44
+ * Particular feature points currently used by pacemaker:
4c8e44
+ *
4c8e44
+ * >2.1:     Operation updates include timing data
4c8e44
+ * >=3.0.5:  XML v2 digests are created
4c8e44
+ * >=3.0.8:  Peers do not need acks for cancellations
4c8e44
+ * >=3.0.9:  DC will send its own shutdown request to all peers
4c8e44
+ *           XML v2 patchsets are created by default
4c8e44
+ * >=3.0.13: Fail counts include operation name and interval
4c8e44
+ */
4c8e44
 #  define CRM_FEATURE_SET		"3.1.0"
4c8e44
 
4c8e44
 #  define EOS		'\0'
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
4c8e44
From 1ff54a448b1178a34f2dd4f615221087e08468de Mon Sep 17 00:00:00 2001
4c8e44
From: Ken Gaillot <kgaillot@redhat.com>
4c8e44
Date: Wed, 12 Jun 2019 20:51:21 -0500
4c8e44
Subject: [PATCH 07/13] Feature: libcrmcommon: bump CRM feature set
4c8e44
4c8e44
... for the new LRM op status codes
4c8e44
---
4c8e44
 include/crm/crm.h | 3 ++-
4c8e44
 1 file changed, 2 insertions(+), 1 deletion(-)
4c8e44
4c8e44
diff --git a/include/crm/crm.h b/include/crm/crm.h
4c8e44
index 56a2048..cbf72d3 100644
4c8e44
--- a/include/crm/crm.h
4c8e44
+++ b/include/crm/crm.h
4c8e44
@@ -49,8 +49,9 @@ extern "C" {
4c8e44
  * >=3.0.9:  DC will send its own shutdown request to all peers
4c8e44
  *           XML v2 patchsets are created by default
4c8e44
  * >=3.0.13: Fail counts include operation name and interval
4c8e44
+ * >=3.2.0:  DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED
4c8e44
  */
4c8e44
-#  define CRM_FEATURE_SET		"3.1.0"
4c8e44
+#  define CRM_FEATURE_SET		"3.2.0"
4c8e44
 
4c8e44
 #  define EOS		'\0'
4c8e44
 #  define DIMOF(a)	((int) (sizeof(a)/sizeof(a[0])) )
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
4c8e44
From efc639cc835fba27fa5af4a0539e995d95660520 Mon Sep 17 00:00:00 2001
4c8e44
From: Ken Gaillot <kgaillot@redhat.com>
4c8e44
Date: Wed, 5 Jun 2019 15:12:20 -0500
4c8e44
Subject: [PATCH 08/13] Low: libpe_status: fail connection resource if remote
4c8e44
 action gets "not connected"
4c8e44
4c8e44
---
4c8e44
 lib/pengine/unpack.c | 15 ++++++++++++++-
4c8e44
 1 file changed, 14 insertions(+), 1 deletion(-)
4c8e44
4c8e44
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
4c8e44
index fb1ab60..081df07 100644
4c8e44
--- a/lib/pengine/unpack.c
4c8e44
+++ b/lib/pengine/unpack.c
4c8e44
@@ -3299,12 +3299,25 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last
4c8e44
             unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
4c8e44
             break;
4c8e44
 
4c8e44
+        case PCMK_LRM_OP_NOT_CONNECTED:
4c8e44
+            if (pe__is_guest_or_remote_node(node)
4c8e44
+                && is_set(node->details->remote_rsc->flags, pe_rsc_managed)) {
4c8e44
+                /* We should never get into a situation where a managed remote
4c8e44
+                 * connection resource is considered OK but a resource action
4c8e44
+                 * behind the connection gets a "not connected" status. But as a
4c8e44
+                 * fail-safe in case a bug or unusual circumstances do lead to
4c8e44
+                 * that, ensure the remote connection is considered failed.
4c8e44
+                 */
4c8e44
+                set_bit(node->details->remote_rsc->flags, pe_rsc_failed);
4c8e44
+            }
4c8e44
+
4c8e44
+            // fall through
4c8e44
+
4c8e44
         case PCMK_LRM_OP_ERROR:
4c8e44
         case PCMK_LRM_OP_ERROR_HARD:
4c8e44
         case PCMK_LRM_OP_ERROR_FATAL:
4c8e44
         case PCMK_LRM_OP_TIMEOUT:
4c8e44
         case PCMK_LRM_OP_NOTSUPPORTED:
4c8e44
-        case PCMK_LRM_OP_NOT_CONNECTED:
4c8e44
         case PCMK_LRM_OP_INVALID:
4c8e44
 
4c8e44
             failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
4c8e44
From dad337a96dfeca4dbde7bbd97f99f24956440fc2 Mon Sep 17 00:00:00 2001
4c8e44
From: Ken Gaillot <kgaillot@redhat.com>
4c8e44
Date: Sat, 8 Jun 2019 16:25:04 -0500
4c8e44
Subject: [PATCH 09/13] Refactor: libpe_status: add function for checking
4c8e44
 shutdown attribute
4c8e44
4c8e44
... to reduce code duplication and allow further reuse
4c8e44
---
4c8e44
 include/crm/pengine/internal.h |  2 ++
4c8e44
 lib/pengine/unpack.c           |  8 ++------
4c8e44
 lib/pengine/utils.c            | 20 ++++++++++++++++++++
4c8e44
 3 files changed, 24 insertions(+), 6 deletions(-)
4c8e44
4c8e44
diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h
4c8e44
index fd55bb9..a2a3d52 100644
4c8e44
--- a/include/crm/pengine/internal.h
4c8e44
+++ b/include/crm/pengine/internal.h
4c8e44
@@ -359,4 +359,6 @@ void pe__foreach_param_check(pe_working_set_t *data_set,
4c8e44
                                         enum pe_check_parameters,
4c8e44
                                         pe_working_set_t*));
4c8e44
 void pe__free_param_checks(pe_working_set_t *data_set);
4c8e44
+
4c8e44
+bool pe__shutdown_requested(pe_node_t *node);
4c8e44
 #endif
4c8e44
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
4c8e44
index 081df07..9d13a57 100644
4c8e44
--- a/lib/pengine/unpack.c
4c8e44
+++ b/lib/pengine/unpack.c
4c8e44
@@ -909,7 +909,6 @@ unpack_handle_remote_attrs(node_t *this_node, xmlNode *state, pe_working_set_t *
4c8e44
     const char *resource_discovery_enabled = NULL;
4c8e44
     xmlNode *attrs = NULL;
4c8e44
     resource_t *rsc = NULL;
4c8e44
-    const char *shutdown = NULL;
4c8e44
 
4c8e44
     if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
4c8e44
         return;
4c8e44
@@ -931,8 +930,7 @@ unpack_handle_remote_attrs(node_t *this_node, xmlNode *state, pe_working_set_t *
4c8e44
     attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
4c8e44
     add_node_attrs(attrs, this_node, TRUE, data_set);
4c8e44
 
4c8e44
-    shutdown = pe_node_attribute_raw(this_node, XML_CIB_ATTR_SHUTDOWN);
4c8e44
-    if (shutdown != NULL && safe_str_neq("0", shutdown)) {
4c8e44
+    if (pe__shutdown_requested(this_node)) {
4c8e44
         crm_info("Node %s is shutting down", this_node->details->uname);
4c8e44
         this_node->details->shutdown = TRUE;
4c8e44
         if (rsc) {
4c8e44
@@ -1392,7 +1390,6 @@ gboolean
4c8e44
 determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set)
4c8e44
 {
4c8e44
     gboolean online = FALSE;
4c8e44
-    const char *shutdown = NULL;
4c8e44
     const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
4c8e44
 
4c8e44
     if (this_node == NULL) {
4c8e44
@@ -1402,9 +1399,8 @@ determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set
4c8e44
 
4c8e44
     this_node->details->shutdown = FALSE;
4c8e44
     this_node->details->expected_up = FALSE;
4c8e44
-    shutdown = pe_node_attribute_raw(this_node, XML_CIB_ATTR_SHUTDOWN);
4c8e44
 
4c8e44
-    if (shutdown != NULL && safe_str_neq("0", shutdown)) {
4c8e44
+    if (pe__shutdown_requested(this_node)) {
4c8e44
         this_node->details->shutdown = TRUE;
4c8e44
 
4c8e44
     } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) {
4c8e44
diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
4c8e44
index 5b893f7..c5fd0f7 100644
4c8e44
--- a/lib/pengine/utils.c
4c8e44
+++ b/lib/pengine/utils.c
4c8e44
@@ -2510,3 +2510,23 @@ void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrit
4c8e44
         }
4c8e44
     }
4c8e44
 }
4c8e44
+
4c8e44
+/*!
4c8e44
+ * \internal
4c8e44
+ * \brief Check whether shutdown has been requested for a node
4c8e44
+ *
4c8e44
+ * \param[in] node  Node to check
4c8e44
+ *
4c8e44
+ * \return TRUE if node has shutdown attribute set and nonzero, FALSE otherwise
4c8e44
+ * \note This differs from simply using node->details->shutdown in that it can
4c8e44
+ *       be used before that has been determined (and in fact to determine it),
4c8e44
+ *       and it can also be used to distinguish requested shutdown from implicit
4c8e44
+ *       shutdown of remote nodes by virtue of their connection stopping.
4c8e44
+ */
4c8e44
+bool
4c8e44
+pe__shutdown_requested(pe_node_t *node)
4c8e44
+{
4c8e44
+    const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
4c8e44
+
4c8e44
+    return shutdown && strcmp(shutdown, "0");
4c8e44
+}
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
4c8e44
From 1e9903326a59f58d9dd2f2618d709f8aa61e41e9 Mon Sep 17 00:00:00 2001
4c8e44
From: Ken Gaillot <kgaillot@redhat.com>
4c8e44
Date: Wed, 5 Jun 2019 16:37:26 -0500
4c8e44
Subject: [PATCH 10/13] Fix: scheduler: remote state is failed if node is
4c8e44
 shutting down with connection failure
4c8e44
4c8e44
When determining remote state, if the connection resource is failed and not
4c8e44
being started again, we consider the state to be unknown if the connection has
4c8e44
a reconnect interval, because we won't know whether the connection can be
4c8e44
recovered until the interval expires and we re-attempt connection.
4c8e44
4c8e44
However, if the node is shutting down at the time, we won't re-attempt
4c8e44
connection, so consider the state failed in that case. (Note that we check the
4c8e44
actual shutdown node attribute, rather than node->details->shutdown, since that
4c8e44
is set for remote nodes whenever the connection is stopping.)
4c8e44
4c8e44
This avoids a situation where actions that cannot succeed can be scheduled on a
4c8e44
remote node that's shutting down.
4c8e44
---
4c8e44
 lib/pacemaker/pcmk_sched_allocate.c | 3 ++-
4c8e44
 1 file changed, 2 insertions(+), 1 deletion(-)
4c8e44
4c8e44
diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c
4c8e44
index 3363a72..b7d1b48 100644
4c8e44
--- a/lib/pacemaker/pcmk_sched_allocate.c
4c8e44
+++ b/lib/pacemaker/pcmk_sched_allocate.c
4c8e44
@@ -1972,7 +1972,8 @@ get_remote_node_state(pe_node_t *node)
4c8e44
 
4c8e44
         if ((remote_rsc->next_role == RSC_ROLE_STOPPED)
4c8e44
             && remote_rsc->remote_reconnect_ms
4c8e44
-            && node->details->remote_was_fenced) {
4c8e44
+            && node->details->remote_was_fenced
4c8e44
+            && !pe__shutdown_requested(node)) {
4c8e44
 
4c8e44
             /* We won't know whether the connection is recoverable until the
4c8e44
              * reconnect interval expires and we reattempt connection.
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
4c8e44
From ea70750d04219618b5feeda04443b27616e441a0 Mon Sep 17 00:00:00 2001
4c8e44
From: Ken Gaillot <kgaillot@redhat.com>
4c8e44
Date: Wed, 5 Jun 2019 16:43:19 -0500
4c8e44
Subject: [PATCH 11/13] Fix: libpe_status: don't order implied stops relative
4c8e44
 to a remote connection
4c8e44
4c8e44
Actions behind a remote connection are ordered relative to any start or stop of
4c8e44
the remote connection. However, if the action is a stop implied due to fencing,
4c8e44
it does not require the remote connection, and the ordering should not be done.
4c8e44
4c8e44
This avoids a delay in the remote connection recovery if it is failed, e.g.
4c8e44
previously the ordering would look like:
4c8e44
4c8e44
   fence remote node -> implied stop of resource on remote -> stop connection
4c8e44
4c8e44
Now, the connection stop can proceed simultaneously with the remote node
4c8e44
fencing.
4c8e44
---
4c8e44
 lib/pacemaker/pcmk_sched_allocate.c | 11 +++++------
4c8e44
 1 file changed, 5 insertions(+), 6 deletions(-)
4c8e44
4c8e44
diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c
4c8e44
index b7d1b48..9f82c00 100644
4c8e44
--- a/lib/pacemaker/pcmk_sched_allocate.c
4c8e44
+++ b/lib/pacemaker/pcmk_sched_allocate.c
4c8e44
@@ -2065,14 +2065,13 @@ apply_remote_ordering(action_t *action, pe_working_set_t *data_set)
4c8e44
                                        pe_order_implies_first, data_set);
4c8e44
 
4c8e44
             } else if(state == remote_state_failed) {
4c8e44
-                /* We would only be here if the resource is
4c8e44
-                 * running on the remote node.  Since we have no
4c8e44
-                 * way to stop it, it is necessary to fence the
4c8e44
-                 * node.
4c8e44
+                /* The resource is active on the node, but since we don't have a
4c8e44
+                 * valid connection, the only way to stop the resource is by
4c8e44
+                 * fencing the node. There is no need to order the stop relative
4c8e44
+                 * to the remote connection, since the stop will become implied
4c8e44
+                 * by the fencing.
4c8e44
                  */
4c8e44
                 pe_fence_node(data_set, action->node, "resources are active and the connection is unrecoverable");
4c8e44
-                order_action_then_stop(action, remote_rsc,
4c8e44
-                                       pe_order_implies_first, data_set);
4c8e44
 
4c8e44
             } else if(remote_rsc->next_role == RSC_ROLE_STOPPED) {
4c8e44
                 /* State must be remote_state_unknown or remote_state_stopped.
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
4c8e44
From 091c367369b892d26fe0de99d35cf521b6249d10 Mon Sep 17 00:00:00 2001
4c8e44
From: Ken Gaillot <kgaillot@redhat.com>
4c8e44
Date: Sat, 8 Jun 2019 16:51:20 -0500
4c8e44
Subject: [PATCH 12/13] Test: cts-scheduler: update regression tests for remote
4c8e44
 connection ordering change
4c8e44
4c8e44
Remote connection stops no longer have to wait for implied stops of resources
4c8e44
behind the connection.
4c8e44
4c8e44
Unchanged from before, if the remote connection stops are implied themselves,
4c8e44
they can be confirmed immediately without waiting for their host's fencing,
4c8e44
because remote connections have "requires" set to "quorum" rather than
4c8e44
"fencing".
4c8e44
---
4c8e44
 cts/scheduler/order-expired-failure.dot               |  1 -
4c8e44
 cts/scheduler/order-expired-failure.exp               |  6 +-----
4c8e44
 cts/scheduler/order-expired-failure.summary           |  2 +-
4c8e44
 cts/scheduler/remote-connection-unrecoverable.dot     |  2 --
4c8e44
 cts/scheduler/remote-connection-unrecoverable.exp     |  9 +--------
4c8e44
 cts/scheduler/remote-connection-unrecoverable.summary |  2 +-
4c8e44
 cts/scheduler/remote-fence-before-reconnect.dot       |  1 -
4c8e44
 cts/scheduler/remote-fence-before-reconnect.exp       |  6 +-----
4c8e44
 cts/scheduler/remote-fence-before-reconnect.summary   |  2 +-
4c8e44
 cts/scheduler/remote-recover-all.dot                  |  2 --
4c8e44
 cts/scheduler/remote-recover-all.exp                  | 12 ++----------
4c8e44
 cts/scheduler/remote-recover-all.summary              |  4 ++--
4c8e44
 cts/scheduler/remote-recover-no-resources.dot         |  1 -
4c8e44
 cts/scheduler/remote-recover-no-resources.exp         |  6 +-----
4c8e44
 cts/scheduler/remote-recover-no-resources.summary     |  2 +-
4c8e44
 cts/scheduler/remote-recover-unknown.dot              |  1 -
4c8e44
 cts/scheduler/remote-recover-unknown.exp              |  6 +-----
4c8e44
 cts/scheduler/remote-recover-unknown.summary          |  2 +-
4c8e44
 18 files changed, 14 insertions(+), 53 deletions(-)
4c8e44
4c8e44
diff --git a/cts/scheduler/order-expired-failure.dot b/cts/scheduler/order-expired-failure.dot
4c8e44
index 2e9963b..5c21d5d 100644
4c8e44
--- a/cts/scheduler/order-expired-failure.dot
4c8e44
+++ b/cts/scheduler/order-expired-failure.dot
4c8e44
@@ -4,7 +4,6 @@ digraph "g" {
4c8e44
 "compute-unfence-trigger-clone_stop_0" [ style=bold color="green" fontcolor="orange"]
4c8e44
 "compute-unfence-trigger-clone_stopped_0" [ style=bold color="green" fontcolor="orange"]
4c8e44
 "compute-unfence-trigger_stop_0 overcloud-novacompute-1" -> "compute-unfence-trigger-clone_stopped_0" [ style = bold]
4c8e44
-"compute-unfence-trigger_stop_0 overcloud-novacompute-1" -> "overcloud-novacompute-1_stop_0 controller-1" [ style = bold]
4c8e44
 "compute-unfence-trigger_stop_0 overcloud-novacompute-1" [ style=bold color="green" fontcolor="orange"]
4c8e44
 "ip-10.0.0.110_monitor_10000 controller-1" [ style=bold color="green" fontcolor="black"]
4c8e44
 "ip-10.0.0.110_start_0 controller-1" -> "ip-10.0.0.110_monitor_10000 controller-1" [ style = bold]
4c8e44
diff --git a/cts/scheduler/order-expired-failure.exp b/cts/scheduler/order-expired-failure.exp
4c8e44
index c476bc2..4a50493 100644
4c8e44
--- a/cts/scheduler/order-expired-failure.exp
4c8e44
+++ b/cts/scheduler/order-expired-failure.exp
4c8e44
@@ -9,11 +9,7 @@
4c8e44
         </downed>
4c8e44
       </rsc_op>
4c8e44
     </action_set>
4c8e44
-    <inputs>
4c8e44
-      <trigger>
4c8e44
-        <pseudo_event id="220" operation="stop" operation_key="compute-unfence-trigger_stop_0" internal_operation_key="compute-unfence-trigger:1_stop_0"/>
4c8e44
-      </trigger>
4c8e44
-    </inputs>
4c8e44
+    <inputs/>
4c8e44
   </synapse>
4c8e44
   <synapse id="1">
4c8e44
     <action_set>
4c8e44
diff --git a/cts/scheduler/order-expired-failure.summary b/cts/scheduler/order-expired-failure.summary
4c8e44
index c86bb91..2cf43ed 100644
4c8e44
--- a/cts/scheduler/order-expired-failure.summary
4c8e44
+++ b/cts/scheduler/order-expired-failure.summary
4c8e44
@@ -52,6 +52,7 @@ Transition Summary:
4c8e44
  * Stop       compute-unfence-trigger:1            ( overcloud-novacompute-1 )   due to node availability
4c8e44
 
4c8e44
 Executing cluster transition:
4c8e44
+ * Resource action: overcloud-novacompute-1 stop on controller-1
4c8e44
  * Resource action: stonith-fence_compute-fence-nova stop on controller-2
4c8e44
  * Fencing overcloud-novacompute-1 (reboot)
4c8e44
  * Cluster action:  clear_failcount for overcloud-novacompute-1 on controller-1
4c8e44
@@ -62,7 +63,6 @@ Executing cluster transition:
4c8e44
  * Resource action: ip-10.0.0.110   monitor=10000 on controller-1
4c8e44
  * Pseudo action:   compute-unfence-trigger_stop_0
4c8e44
  * Pseudo action:   compute-unfence-trigger-clone_stopped_0
4c8e44
- * Resource action: overcloud-novacompute-1 stop on controller-1
4c8e44
 Using the original execution date of: 2018-04-09 07:55:35Z
4c8e44
 
4c8e44
 Revised cluster status:
4c8e44
diff --git a/cts/scheduler/remote-connection-unrecoverable.dot b/cts/scheduler/remote-connection-unrecoverable.dot
4c8e44
index 7728425..1017d2b 100644
4c8e44
--- a/cts/scheduler/remote-connection-unrecoverable.dot
4c8e44
+++ b/cts/scheduler/remote-connection-unrecoverable.dot
4c8e44
@@ -7,14 +7,12 @@ digraph "g" {
4c8e44
 "remote1_stop_0 node1" [ style=bold color="green" fontcolor="orange"]
4c8e44
 "rsc1_delete_0 remote1" -> "rsc1_start_0 node2" [ style = dashed]
4c8e44
 "rsc1_delete_0 remote1" [ style=dashed color="red" fontcolor="black"]
4c8e44
-"rsc1_monitor_0 node2" -> "remote1_stop_0 node1" [ style = bold]
4c8e44
 "rsc1_monitor_0 node2" -> "rsc1_start_0 node2" [ style = bold]
4c8e44
 "rsc1_monitor_0 node2" -> "rsc2-master_demote_0" [ style = bold]
4c8e44
 "rsc1_monitor_0 node2" [ style=bold color="green" fontcolor="black"]
4c8e44
 "rsc1_monitor_10000 node2" [ style=bold color="green" fontcolor="black"]
4c8e44
 "rsc1_start_0 node2" -> "rsc1_monitor_10000 node2" [ style = bold]
4c8e44
 "rsc1_start_0 node2" [ style=bold color="green" fontcolor="black"]
4c8e44
-"rsc1_stop_0 remote1" -> "remote1_stop_0 node1" [ style = bold]
4c8e44
 "rsc1_stop_0 remote1" -> "rsc1_delete_0 remote1" [ style = dashed]
4c8e44
 "rsc1_stop_0 remote1" -> "rsc1_start_0 node2" [ style = bold]
4c8e44
 "rsc1_stop_0 remote1" -> "rsc2-master_demote_0" [ style = bold]
4c8e44
diff --git a/cts/scheduler/remote-connection-unrecoverable.exp b/cts/scheduler/remote-connection-unrecoverable.exp
4c8e44
index 2c9357b..d57c106 100644
4c8e44
--- a/cts/scheduler/remote-connection-unrecoverable.exp
4c8e44
+++ b/cts/scheduler/remote-connection-unrecoverable.exp
4c8e44
@@ -5,14 +5,7 @@
4c8e44
         <attributes CRM_meta_timeout="20000"  reconnect_interval="60"/>
4c8e44
       </pseudo_event>
4c8e44
     </action_set>
4c8e44
-    <inputs>
4c8e44
-      <trigger>
4c8e44
-        <pseudo_event id="6" operation="stop" operation_key="rsc1_stop_0"/>
4c8e44
-      </trigger>
4c8e44
-      <trigger>
4c8e44
-        <rsc_op id="8" operation="monitor" operation_key="rsc1_monitor_0" on_node="node2" on_node_uuid="2"/>
4c8e44
-      </trigger>
4c8e44
-    </inputs>
4c8e44
+    <inputs/>
4c8e44
   </synapse>
4c8e44
   <synapse id="1">
4c8e44
     <action_set>
4c8e44
diff --git a/cts/scheduler/remote-connection-unrecoverable.summary b/cts/scheduler/remote-connection-unrecoverable.summary
4c8e44
index 23fa9ca..caff564 100644
4c8e44
--- a/cts/scheduler/remote-connection-unrecoverable.summary
4c8e44
+++ b/cts/scheduler/remote-connection-unrecoverable.summary
4c8e44
@@ -21,6 +21,7 @@ Transition Summary:
4c8e44
  * Stop       rsc2:0      (     Master node1 )   due to node availability
4c8e44
 
4c8e44
 Executing cluster transition:
4c8e44
+ * Pseudo action:   remote1_stop_0
4c8e44
  * Resource action: killer          stop on node2
4c8e44
  * Resource action: rsc1            monitor on node2
4c8e44
  * Fencing node1 (reboot)
4c8e44
@@ -29,7 +30,6 @@ Executing cluster transition:
4c8e44
  * Resource action: killer          monitor=60000 on node2
4c8e44
  * Pseudo action:   rsc1_stop_0
4c8e44
  * Pseudo action:   rsc2-master_demote_0
4c8e44
- * Pseudo action:   remote1_stop_0
4c8e44
  * Resource action: rsc1            start on node2
4c8e44
  * Pseudo action:   rsc2_demote_0
4c8e44
  * Pseudo action:   rsc2-master_demoted_0
4c8e44
diff --git a/cts/scheduler/remote-fence-before-reconnect.dot b/cts/scheduler/remote-fence-before-reconnect.dot
4c8e44
index 4ced43e..5812b7f 100644
4c8e44
--- a/cts/scheduler/remote-fence-before-reconnect.dot
4c8e44
+++ b/cts/scheduler/remote-fence-before-reconnect.dot
4c8e44
@@ -3,7 +3,6 @@
4c8e44
 "fake2_monitor_10000 c7auto1" [ style=bold color="green" fontcolor="black"]
4c8e44
 "fake2_start_0 c7auto1" -> "fake2_monitor_10000 c7auto1" [ style = bold]
4c8e44
 "fake2_start_0 c7auto1" [ style=bold color="green" fontcolor="black"]
4c8e44
-"fake2_stop_0 c7auto4" -> "c7auto4_stop_0 c7auto1" [ style = bold]
4c8e44
 "fake2_stop_0 c7auto4" -> "fake2_start_0 c7auto1" [ style = bold]
4c8e44
 "fake2_stop_0 c7auto4" [ style=bold color="green" fontcolor="orange"]
4c8e44
 "stonith 'reboot' c7auto4" -> "fake2_start_0 c7auto1" [ style = bold]
4c8e44
diff --git a/cts/scheduler/remote-fence-before-reconnect.exp b/cts/scheduler/remote-fence-before-reconnect.exp
4c8e44
index f99d9ef..f506f85 100644
4c8e44
--- a/cts/scheduler/remote-fence-before-reconnect.exp
4c8e44
+++ b/cts/scheduler/remote-fence-before-reconnect.exp
4c8e44
@@ -9,11 +9,7 @@
4c8e44
         </downed>
4c8e44
       </rsc_op>
4c8e44
     </action_set>
4c8e44
-    <inputs>
4c8e44
-      <trigger>
4c8e44
-        <pseudo_event id="13" operation="stop" operation_key="fake2_stop_0"/>
4c8e44
-      </trigger>
4c8e44
-    </inputs>
4c8e44
+    <inputs/>
4c8e44
   </synapse>
4c8e44
   <synapse id="1">
4c8e44
     <action_set>
4c8e44
diff --git a/cts/scheduler/remote-fence-before-reconnect.summary b/cts/scheduler/remote-fence-before-reconnect.summary
4c8e44
index f61e18b..03eac20 100644
4c8e44
--- a/cts/scheduler/remote-fence-before-reconnect.summary
4c8e44
+++ b/cts/scheduler/remote-fence-before-reconnect.summary
4c8e44
@@ -17,9 +17,9 @@ Transition Summary:
4c8e44
  * Move       fake2       ( c7auto4 -> c7auto1 )  
4c8e44
 
4c8e44
 Executing cluster transition:
4c8e44
+ * Resource action: c7auto4         stop on c7auto1
4c8e44
  * Fencing c7auto4 (reboot)
4c8e44
  * Pseudo action:   fake2_stop_0
4c8e44
- * Resource action: c7auto4         stop on c7auto1
4c8e44
  * Resource action: fake2           start on c7auto1
4c8e44
  * Resource action: fake2           monitor=10000 on c7auto1
4c8e44
 
4c8e44
diff --git a/cts/scheduler/remote-recover-all.dot b/cts/scheduler/remote-recover-all.dot
4c8e44
index deed802..4128b10 100644
4c8e44
--- a/cts/scheduler/remote-recover-all.dot
4c8e44
+++ b/cts/scheduler/remote-recover-all.dot
4c8e44
@@ -19,7 +19,6 @@ digraph "g" {
4c8e44
 "galera_demote_0 galera-2" -> "galera_stop_0 galera-2" [ style = bold]
4c8e44
 "galera_demote_0 galera-2" [ style=bold color="green" fontcolor="orange"]
4c8e44
 "galera_monitor_10000 galera-0" [ style=bold color="green" fontcolor="black"]
4c8e44
-"galera_stop_0 galera-2" -> "galera-2_stop_0 controller-1" [ style = bold]
4c8e44
 "galera_stop_0 galera-2" -> "galera-master_stopped_0" [ style = bold]
4c8e44
 "galera_stop_0 galera-2" [ style=bold color="green" fontcolor="orange"]
4c8e44
 "haproxy-clone_stop_0" -> "haproxy-clone_stopped_0" [ style = bold]
4c8e44
@@ -60,7 +59,6 @@ digraph "g" {
4c8e44
 "rabbitmq_post_notify_stonith_0" -> "rabbitmq_post_notify_stonith_0 messaging-0" [ style = bold]
4c8e44
 "rabbitmq_post_notify_stonith_0" -> "rabbitmq_post_notify_stonith_0 messaging-2" [ style = bold]
4c8e44
 "rabbitmq_post_notify_stonith_0" [ style=bold color="green" fontcolor="orange"]
4c8e44
-"rabbitmq_stop_0 messaging-1" -> "messaging-1_stop_0 controller-1" [ style = bold]
4c8e44
 "rabbitmq_stop_0 messaging-1" -> "rabbitmq-clone_stopped_0" [ style = bold]
4c8e44
 "rabbitmq_stop_0 messaging-1" [ style=bold color="green" fontcolor="orange"]
4c8e44
 "redis-master_confirmed-post_notify_stopped_0" [ style=bold color="green" fontcolor="orange"]
4c8e44
diff --git a/cts/scheduler/remote-recover-all.exp b/cts/scheduler/remote-recover-all.exp
4c8e44
index 8137ffb..0cb51f6 100644
4c8e44
--- a/cts/scheduler/remote-recover-all.exp
4c8e44
+++ b/cts/scheduler/remote-recover-all.exp
4c8e44
@@ -5,11 +5,7 @@
4c8e44
         <attributes CRM_meta_name="stop" CRM_meta_timeout="60000"  reconnect_interval="60"/>
4c8e44
       </pseudo_event>
4c8e44
     </action_set>
4c8e44
-    <inputs>
4c8e44
-      <trigger>
4c8e44
-        <pseudo_event id="39" operation="stop" operation_key="rabbitmq_stop_0" internal_operation_key="rabbitmq:2_stop_0"/>
4c8e44
-      </trigger>
4c8e44
-    </inputs>
4c8e44
+    <inputs/>
4c8e44
   </synapse>
4c8e44
   <synapse id="1">
4c8e44
     <action_set>
4c8e44
@@ -57,11 +53,7 @@
4c8e44
         <attributes CRM_meta_name="stop" CRM_meta_timeout="60000"  reconnect_interval="60"/>
4c8e44
       </pseudo_event>
4c8e44
     </action_set>
4c8e44
-    <inputs>
4c8e44
-      <trigger>
4c8e44
-        <pseudo_event id="49" operation="stop" operation_key="galera_stop_0" internal_operation_key="galera:1_stop_0"/>
4c8e44
-      </trigger>
4c8e44
-    </inputs>
4c8e44
+    <inputs/>
4c8e44
   </synapse>
4c8e44
   <synapse id="5" priority="1000000">
4c8e44
     <action_set>
4c8e44
diff --git a/cts/scheduler/remote-recover-all.summary b/cts/scheduler/remote-recover-all.summary
4c8e44
index 2ac0c6a..d095fdd 100644
4c8e44
--- a/cts/scheduler/remote-recover-all.summary
4c8e44
+++ b/cts/scheduler/remote-recover-all.summary
4c8e44
@@ -56,7 +56,9 @@ Transition Summary:
4c8e44
  * Move       stonith-fence_ipmilan-5254005bdbb5     ( controller-1 -> controller-2 )  
4c8e44
 
4c8e44
 Executing cluster transition:
4c8e44
+ * Pseudo action:   messaging-1_stop_0
4c8e44
  * Pseudo action:   galera-0_stop_0
4c8e44
+ * Pseudo action:   galera-2_stop_0
4c8e44
  * Pseudo action:   galera-master_demote_0
4c8e44
  * Pseudo action:   redis-master_pre_notify_stop_0
4c8e44
  * Resource action: stonith-fence_ipmilan-525400bbf613 stop on controller-0
4c8e44
@@ -94,7 +96,6 @@ Executing cluster transition:
4c8e44
  * Resource action: stonith-fence_ipmilan-525400b4f6bd monitor=60000 on controller-0
4c8e44
  * Resource action: stonith-fence_ipmilan-5254005bdbb5 start on controller-2
4c8e44
  * Resource action: galera-0        monitor=20000 on controller-2
4c8e44
- * Pseudo action:   galera-2_stop_0
4c8e44
  * Resource action: rabbitmq        notify on messaging-2
4c8e44
  * Resource action: rabbitmq        notify on messaging-0
4c8e44
  * Pseudo action:   rabbitmq_notified_0
4c8e44
@@ -107,7 +108,6 @@ Executing cluster transition:
4c8e44
  * Resource action: ip-172.17.1.17  start on controller-2
4c8e44
  * Resource action: ip-172.17.4.11  start on controller-2
4c8e44
  * Resource action: stonith-fence_ipmilan-5254005bdbb5 monitor=60000 on controller-2
4c8e44
- * Pseudo action:   messaging-1_stop_0
4c8e44
  * Pseudo action:   redis_notified_0
4c8e44
  * Resource action: ip-172.17.1.14  monitor=10000 on controller-2
4c8e44
  * Resource action: ip-172.17.1.17  monitor=10000 on controller-2
4c8e44
diff --git a/cts/scheduler/remote-recover-no-resources.dot b/cts/scheduler/remote-recover-no-resources.dot
4c8e44
index ef78aa6..a2f8ce0 100644
4c8e44
--- a/cts/scheduler/remote-recover-no-resources.dot
4c8e44
+++ b/cts/scheduler/remote-recover-no-resources.dot
4c8e44
@@ -45,7 +45,6 @@ digraph "g" {
4c8e44
 "rabbitmq_post_notify_stonith_0" -> "rabbitmq_post_notify_stonith_0 messaging-0" [ style = bold]
4c8e44
 "rabbitmq_post_notify_stonith_0" -> "rabbitmq_post_notify_stonith_0 messaging-2" [ style = bold]
4c8e44
 "rabbitmq_post_notify_stonith_0" [ style=bold color="green" fontcolor="orange"]
4c8e44
-"rabbitmq_stop_0 messaging-1" -> "messaging-1_stop_0 controller-1" [ style = bold]
4c8e44
 "rabbitmq_stop_0 messaging-1" -> "rabbitmq-clone_stopped_0" [ style = bold]
4c8e44
 "rabbitmq_stop_0 messaging-1" [ style=bold color="green" fontcolor="orange"]
4c8e44
 "redis-master_confirmed-post_notify_stopped_0" [ style=bold color="green" fontcolor="orange"]
4c8e44
diff --git a/cts/scheduler/remote-recover-no-resources.exp b/cts/scheduler/remote-recover-no-resources.exp
4c8e44
index 8a67c11..90470fb 100644
4c8e44
--- a/cts/scheduler/remote-recover-no-resources.exp
4c8e44
+++ b/cts/scheduler/remote-recover-no-resources.exp
4c8e44
@@ -5,11 +5,7 @@
4c8e44
         <attributes CRM_meta_name="stop" CRM_meta_timeout="60000"  reconnect_interval="60"/>
4c8e44
       </pseudo_event>
4c8e44
     </action_set>
4c8e44
-    <inputs>
4c8e44
-      <trigger>
4c8e44
-        <pseudo_event id="38" operation="stop" operation_key="rabbitmq_stop_0" internal_operation_key="rabbitmq:2_stop_0"/>
4c8e44
-      </trigger>
4c8e44
-    </inputs>
4c8e44
+    <inputs/>
4c8e44
   </synapse>
4c8e44
   <synapse id="1">
4c8e44
     <action_set>
4c8e44
diff --git a/cts/scheduler/remote-recover-no-resources.summary b/cts/scheduler/remote-recover-no-resources.summary
4c8e44
index 89da784..18a989b 100644
4c8e44
--- a/cts/scheduler/remote-recover-no-resources.summary
4c8e44
+++ b/cts/scheduler/remote-recover-no-resources.summary
4c8e44
@@ -54,6 +54,7 @@ Transition Summary:
4c8e44
  * Move       stonith-fence_ipmilan-5254005bdbb5     ( controller-1 -> controller-2 )  
4c8e44
 
4c8e44
 Executing cluster transition:
4c8e44
+ * Pseudo action:   messaging-1_stop_0
4c8e44
  * Pseudo action:   galera-0_stop_0
4c8e44
  * Pseudo action:   galera-2_stop_0
4c8e44
  * Pseudo action:   redis-master_pre_notify_stop_0
4c8e44
@@ -92,7 +93,6 @@ Executing cluster transition:
4c8e44
  * Pseudo action:   ip-172.17.1.17_stop_0
4c8e44
  * Pseudo action:   ip-172.17.4.11_stop_0
4c8e44
  * Resource action: stonith-fence_ipmilan-5254005bdbb5 monitor=60000 on controller-2
4c8e44
- * Pseudo action:   messaging-1_stop_0
4c8e44
  * Resource action: redis           notify on controller-0
4c8e44
  * Resource action: redis           notify on controller-2
4c8e44
  * Pseudo action:   redis-master_confirmed-post_notify_stopped_0
4c8e44
diff --git a/cts/scheduler/remote-recover-unknown.dot b/cts/scheduler/remote-recover-unknown.dot
4c8e44
index 5cd760b..29ab59f 100644
4c8e44
--- a/cts/scheduler/remote-recover-unknown.dot
4c8e44
+++ b/cts/scheduler/remote-recover-unknown.dot
4c8e44
@@ -46,7 +46,6 @@ digraph "g" {
4c8e44
 "rabbitmq_post_notify_stonith_0" -> "rabbitmq_post_notify_stonith_0 messaging-0" [ style = bold]
4c8e44
 "rabbitmq_post_notify_stonith_0" -> "rabbitmq_post_notify_stonith_0 messaging-2" [ style = bold]
4c8e44
 "rabbitmq_post_notify_stonith_0" [ style=bold color="green" fontcolor="orange"]
4c8e44
-"rabbitmq_stop_0 messaging-1" -> "messaging-1_stop_0 controller-1" [ style = bold]
4c8e44
 "rabbitmq_stop_0 messaging-1" -> "rabbitmq-clone_stopped_0" [ style = bold]
4c8e44
 "rabbitmq_stop_0 messaging-1" [ style=bold color="green" fontcolor="orange"]
4c8e44
 "redis-master_confirmed-post_notify_stopped_0" [ style=bold color="green" fontcolor="orange"]
4c8e44
diff --git a/cts/scheduler/remote-recover-unknown.exp b/cts/scheduler/remote-recover-unknown.exp
4c8e44
index ac6f004..82cb65f7 100644
4c8e44
--- a/cts/scheduler/remote-recover-unknown.exp
4c8e44
+++ b/cts/scheduler/remote-recover-unknown.exp
4c8e44
@@ -5,11 +5,7 @@
4c8e44
         <attributes CRM_meta_name="stop" CRM_meta_timeout="60000"  reconnect_interval="60"/>
4c8e44
       </pseudo_event>
4c8e44
     </action_set>
4c8e44
-    <inputs>
4c8e44
-      <trigger>
4c8e44
-        <pseudo_event id="39" operation="stop" operation_key="rabbitmq_stop_0" internal_operation_key="rabbitmq:2_stop_0"/>
4c8e44
-      </trigger>
4c8e44
-    </inputs>
4c8e44
+    <inputs/>
4c8e44
   </synapse>
4c8e44
   <synapse id="1">
4c8e44
     <action_set>
4c8e44
diff --git a/cts/scheduler/remote-recover-unknown.summary b/cts/scheduler/remote-recover-unknown.summary
4c8e44
index 2c60713..4d7a411 100644
4c8e44
--- a/cts/scheduler/remote-recover-unknown.summary
4c8e44
+++ b/cts/scheduler/remote-recover-unknown.summary
4c8e44
@@ -55,6 +55,7 @@ Transition Summary:
4c8e44
  * Move       stonith-fence_ipmilan-5254005bdbb5     ( controller-1 -> controller-2 )  
4c8e44
 
4c8e44
 Executing cluster transition:
4c8e44
+ * Pseudo action:   messaging-1_stop_0
4c8e44
  * Pseudo action:   galera-0_stop_0
4c8e44
  * Pseudo action:   galera-2_stop_0
4c8e44
  * Pseudo action:   redis-master_pre_notify_stop_0
4c8e44
@@ -94,7 +95,6 @@ Executing cluster transition:
4c8e44
  * Pseudo action:   ip-172.17.1.17_stop_0
4c8e44
  * Pseudo action:   ip-172.17.4.11_stop_0
4c8e44
  * Resource action: stonith-fence_ipmilan-5254005bdbb5 monitor=60000 on controller-2
4c8e44
- * Pseudo action:   messaging-1_stop_0
4c8e44
  * Resource action: redis           notify on controller-0
4c8e44
  * Resource action: redis           notify on controller-2
4c8e44
  * Pseudo action:   redis-master_confirmed-post_notify_stopped_0
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
4c8e44
From 9a5f7952c921f7f8eea3c7b0af711df2995a4e60 Mon Sep 17 00:00:00 2001
4c8e44
From: Ken Gaillot <kgaillot@redhat.com>
4c8e44
Date: Fri, 7 Jun 2019 17:11:27 -0500
4c8e44
Subject: [PATCH 13/13] Low: libpe_status: don't add /var/log mount to bundles
4c8e44
 if user did
4c8e44
4c8e44
---
4c8e44
 lib/pengine/bundle.c | 10 ++++++++--
4c8e44
 1 file changed, 8 insertions(+), 2 deletions(-)
4c8e44
4c8e44
diff --git a/lib/pengine/bundle.c b/lib/pengine/bundle.c
4c8e44
index b223f03..060e73a 100644
4c8e44
--- a/lib/pengine/bundle.c
4c8e44
+++ b/lib/pengine/bundle.c
4c8e44
@@ -1027,6 +1027,7 @@ pe__unpack_bundle(pe_resource_t *rsc, pe_working_set_t *data_set)
4c8e44
     xmlNode *xml_obj = NULL;
4c8e44
     xmlNode *xml_resource = NULL;
4c8e44
     pe__bundle_variant_data_t *bundle_data = NULL;
4c8e44
+    bool need_log_mount = TRUE;
4c8e44
 
4c8e44
     CRM_ASSERT(rsc != NULL);
4c8e44
     pe_rsc_trace(rsc, "Processing resource %s...", rsc->id);
4c8e44
@@ -1151,6 +1152,9 @@ pe__unpack_bundle(pe_resource_t *rsc, pe_working_set_t *data_set)
4c8e44
 
4c8e44
         if (source && target) {
4c8e44
             mount_add(bundle_data, source, target, options, flags);
4c8e44
+            if (strcmp(target, "/var/log") == 0) {
4c8e44
+                need_log_mount = FALSE;
4c8e44
+            }
4c8e44
         } else {
4c8e44
             pe_err("Invalid mount directive %s", ID(xml_child));
4c8e44
         }
4c8e44
@@ -1253,8 +1257,10 @@ pe__unpack_bundle(pe_resource_t *rsc, pe_working_set_t *data_set)
4c8e44
         mount_add(bundle_data, DEFAULT_REMOTE_KEY_LOCATION,
4c8e44
                   DEFAULT_REMOTE_KEY_LOCATION, NULL, pe__bundle_mount_none);
4c8e44
 
4c8e44
-        mount_add(bundle_data, CRM_BUNDLE_DIR, "/var/log", NULL,
4c8e44
-                  pe__bundle_mount_subdir);
4c8e44
+        if (need_log_mount) {
4c8e44
+            mount_add(bundle_data, CRM_BUNDLE_DIR, "/var/log", NULL,
4c8e44
+                      pe__bundle_mount_subdir);
4c8e44
+        }
4c8e44
 
4c8e44
         port = calloc(1, sizeof(pe__bundle_port_t));
4c8e44
         if(bundle_data->control_port) {
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44