26ad17
From 4e190ebc5460563bae2586b28afb0415f2eb3d1a Mon Sep 17 00:00:00 2001
26ad17
From: Ken Gaillot <kgaillot@redhat.com>
26ad17
Date: Wed, 1 Jul 2020 20:38:16 -0500
26ad17
Subject: [PATCH 1/4] Test: CTS: libqb shared memory creates directories now
26ad17
26ad17
... so use "rm -rf" instead of "rm -f"
26ad17
---
26ad17
 cts/CTS.py.in    | 2 +-
26ad17
 cts/CTSaudits.py | 2 +-
26ad17
 2 files changed, 2 insertions(+), 2 deletions(-)
26ad17
26ad17
diff --git a/cts/CTS.py.in b/cts/CTS.py.in
26ad17
index c418318..091bb1f 100644
26ad17
--- a/cts/CTS.py.in
26ad17
+++ b/cts/CTS.py.in
26ad17
@@ -546,7 +546,7 @@ class ClusterManager(UserDict):
26ad17
         if self.rsh(node, self.templates["StopCmd"]) == 0:
26ad17
             # Make sure we can continue even if corosync leaks
26ad17
             # fdata-* is the old name
26ad17
-            #self.rsh(node, "rm -f /dev/shm/qb-* /dev/shm/fdata-*")
26ad17
+            #self.rsh(node, "rm -rf /dev/shm/qb-* /dev/shm/fdata-*")
26ad17
             self.ShouldBeStatus[node] = "down"
26ad17
             self.cluster_stable(self.Env["DeadTime"])
26ad17
             return 1
26ad17
diff --git a/cts/CTSaudits.py b/cts/CTSaudits.py
26ad17
index b7e0827..cc82171 100755
26ad17
--- a/cts/CTSaudits.py
26ad17
+++ b/cts/CTSaudits.py
26ad17
@@ -233,7 +233,7 @@ class FileAudit(ClusterAudit):
26ad17
                     for line in lsout:
26ad17
                         self.CM.debug("ps[%s]: %s" % (node, line))
26ad17
 
26ad17
-                    self.CM.rsh(node, "rm -f /dev/shm/qb-*")
26ad17
+                    self.CM.rsh(node, "rm -rf /dev/shm/qb-*")
26ad17
 
26ad17
             else:
26ad17
                 self.CM.debug("Skipping %s" % node)
26ad17
-- 
26ad17
1.8.3.1
26ad17
26ad17
26ad17
From 4316507d50d51c7864d8d34aac1da31a232b9f42 Mon Sep 17 00:00:00 2001
26ad17
From: Ken Gaillot <kgaillot@redhat.com>
26ad17
Date: Thu, 2 Jul 2020 16:09:20 -0500
26ad17
Subject: [PATCH 2/4] Test: CTS: ignore error logged by recent pcs versions
26ad17
26ad17
... because it is expected when a node is fenced, and we should already see
26ad17
pacemaker errors if a node is unexpectedly fenced
26ad17
---
26ad17
 cts/patterns.py | 4 ++++
26ad17
 1 file changed, 4 insertions(+)
26ad17
26ad17
diff --git a/cts/patterns.py b/cts/patterns.py
26ad17
index 96d6471..7eed90c 100644
26ad17
--- a/cts/patterns.py
26ad17
+++ b/cts/patterns.py
26ad17
@@ -21,6 +21,10 @@ class BasePatterns(object):
26ad17
 
26ad17
             # Logging bug in some versions of libvirtd
26ad17
             r"libvirtd.*: internal error: Failed to parse PCI config address",
26ad17
+
26ad17
+            # pcs can log this when node is fenced, but fencing is OK in some
26ad17
+            # tests (and we will catch it in pacemaker logs when not OK)
26ad17
+            r"pcs.daemon:No response from: .* request: get_configs, error:",
26ad17
         ]
26ad17
         self.BadNews = []
26ad17
         self.components = {}
26ad17
-- 
26ad17
1.8.3.1
26ad17
26ad17
26ad17
From 598ae0f65bad6ed16978d1ab6e24e8e358e0a1a4 Mon Sep 17 00:00:00 2001
26ad17
From: Ken Gaillot <kgaillot@redhat.com>
26ad17
Date: Thu, 2 Jul 2020 20:40:00 -0500
26ad17
Subject: [PATCH 3/4] Low: libcrmcommon: avoid assertion on controller protocol
26ad17
 errors
26ad17
26ad17
Previously, after a protocol error, we would set reply to NULL and then try to
26ad17
call crm_element_value() on it, which would log an assertion.
26ad17
---
26ad17
 lib/common/ipc_controld.c | 46 ++++++++++++++++++++++------------------------
26ad17
 1 file changed, 22 insertions(+), 24 deletions(-)
26ad17
26ad17
diff --git a/lib/common/ipc_controld.c b/lib/common/ipc_controld.c
26ad17
index 5917cc5..22cb9e0 100644
26ad17
--- a/lib/common/ipc_controld.c
26ad17
+++ b/lib/common/ipc_controld.c
26ad17
@@ -187,53 +187,51 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply)
26ad17
         crm_debug("Unrecognizable controller message: invalid message type '%s'",
26ad17
                   crm_str(value));
26ad17
         status = CRM_EX_PROTOCOL;
26ad17
-        reply = NULL;
26ad17
+        goto done;
26ad17
     }
26ad17
 
26ad17
     if (crm_element_value(reply, XML_ATTR_REFERENCE) == NULL) {
26ad17
         crm_debug("Unrecognizable controller message: no reference");
26ad17
         status = CRM_EX_PROTOCOL;
26ad17
-        reply = NULL;
26ad17
+        goto done;
26ad17
     }
26ad17
 
26ad17
     value = crm_element_value(reply, F_CRM_TASK);
26ad17
     if (value == NULL) {
26ad17
         crm_debug("Unrecognizable controller message: no command name");
26ad17
         status = CRM_EX_PROTOCOL;
26ad17
-        reply = NULL;
26ad17
+        goto done;
26ad17
     }
26ad17
 
26ad17
     // Parse useful info from reply
26ad17
 
26ad17
-    if (reply != NULL) {
26ad17
-        reply_data.feature_set = crm_element_value(reply, XML_ATTR_VERSION);
26ad17
-        reply_data.host_from = crm_element_value(reply, F_CRM_HOST_FROM);
26ad17
-        msg_data = get_message_xml(reply, F_CRM_DATA);
26ad17
+    reply_data.feature_set = crm_element_value(reply, XML_ATTR_VERSION);
26ad17
+    reply_data.host_from = crm_element_value(reply, F_CRM_HOST_FROM);
26ad17
+    msg_data = get_message_xml(reply, F_CRM_DATA);
26ad17
 
26ad17
-        if (!strcmp(value, CRM_OP_REPROBE)) {
26ad17
-            reply_data.reply_type = pcmk_controld_reply_reprobe;
26ad17
+    if (!strcmp(value, CRM_OP_REPROBE)) {
26ad17
+        reply_data.reply_type = pcmk_controld_reply_reprobe;
26ad17
 
26ad17
-        } else if (!strcmp(value, CRM_OP_NODE_INFO)) {
26ad17
-            set_node_info_data(&reply_data, msg_data);
26ad17
+    } else if (!strcmp(value, CRM_OP_NODE_INFO)) {
26ad17
+        set_node_info_data(&reply_data, msg_data);
26ad17
 
26ad17
-        } else if (!strcmp(value, CRM_OP_INVOKE_LRM)) {
26ad17
-            reply_data.reply_type = pcmk_controld_reply_resource;
26ad17
-            reply_data.data.resource.node_state = msg_data;
26ad17
+    } else if (!strcmp(value, CRM_OP_INVOKE_LRM)) {
26ad17
+        reply_data.reply_type = pcmk_controld_reply_resource;
26ad17
+        reply_data.data.resource.node_state = msg_data;
26ad17
 
26ad17
-        } else if (!strcmp(value, CRM_OP_PING)) {
26ad17
-            set_ping_data(&reply_data, msg_data);
26ad17
+    } else if (!strcmp(value, CRM_OP_PING)) {
26ad17
+        set_ping_data(&reply_data, msg_data);
26ad17
 
26ad17
-        } else if (!strcmp(value, PCMK__CONTROLD_CMD_NODES)) {
26ad17
-            set_nodes_data(&reply_data, msg_data);
26ad17
+    } else if (!strcmp(value, PCMK__CONTROLD_CMD_NODES)) {
26ad17
+        set_nodes_data(&reply_data, msg_data);
26ad17
 
26ad17
-        } else {
26ad17
-            crm_debug("Unrecognizable controller message: unknown command '%s'",
26ad17
-                      value);
26ad17
-            status = CRM_EX_PROTOCOL;
26ad17
-            reply = NULL;
26ad17
-        }
26ad17
+    } else {
26ad17
+        crm_debug("Unrecognizable controller message: unknown command '%s'",
26ad17
+                  value);
26ad17
+        status = CRM_EX_PROTOCOL;
26ad17
     }
26ad17
 
26ad17
+done:
26ad17
     pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data);
26ad17
 
26ad17
     // Free any reply data that was allocated
26ad17
-- 
26ad17
1.8.3.1
26ad17
26ad17
26ad17
From 5ae4101b60f8c0cd96eb2097a65a59aaa1750d73 Mon Sep 17 00:00:00 2001
26ad17
From: Ken Gaillot <kgaillot@redhat.com>
26ad17
Date: Fri, 17 Jul 2020 17:20:23 -0500
26ad17
Subject: [PATCH 4/4] Log: fencer: don't log assertion if unable to create full
26ad17
 request reply
26ad17
26ad17
Previously, we would log an assertion and a warning if asked to create a reply
26ad17
to a NULL request. However there is a possible sequence for this to happen:
26ad17
26ad17
- Some nodes are up and some down at cluster start-up
26ad17
- One node is elected DC and schedules fencing of the down nodes
26ad17
- Fencing is initiated for one of the down nodes
26ad17
- One of the other down nodes comes up and is elected DC
26ad17
- The fencing result comes back and all peers (including new DC) are notified
26ad17
- New DC tries to create a notification for its client (the controller)
26ad17
  but doesn't know anything about the initial request
26ad17
26ad17
For now, just log a warning and drop the assertion. Longer term, maybe we
26ad17
should synchronize in-flight request information when a fencer joins the
26ad17
process group.
26ad17
---
26ad17
 daemons/fenced/fenced_commands.c | 55 +++++++++++++++++++++++-----------------
26ad17
 1 file changed, 32 insertions(+), 23 deletions(-)
26ad17
26ad17
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
26ad17
index 05c5437..9c27d61 100644
26ad17
--- a/daemons/fenced/fenced_commands.c
26ad17
+++ b/daemons/fenced/fenced_commands.c
26ad17
@@ -2336,22 +2336,8 @@ stonith_fence(xmlNode * msg)
26ad17
 xmlNode *
26ad17
 stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc)
26ad17
 {
26ad17
-    int lpc = 0;
26ad17
     xmlNode *reply = NULL;
26ad17
 
26ad17
-    const char *name = NULL;
26ad17
-    const char *value = NULL;
26ad17
-
26ad17
-    const char *names[] = {
26ad17
-        F_STONITH_OPERATION,
26ad17
-        F_STONITH_CALLID,
26ad17
-        F_STONITH_CLIENTID,
26ad17
-        F_STONITH_CLIENTNAME,
26ad17
-        F_STONITH_REMOTE_OP_ID,
26ad17
-        F_STONITH_CALLOPTS
26ad17
-    };
26ad17
-
26ad17
-    crm_trace("Creating a basic reply");
26ad17
     reply = create_xml_node(NULL, T_STONITH_REPLY);
26ad17
 
26ad17
     crm_xml_add(reply, "st_origin", __FUNCTION__);
26ad17
@@ -2359,16 +2345,39 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i
26ad17
     crm_xml_add(reply, "st_output", output);
26ad17
     crm_xml_add_int(reply, F_STONITH_RC, rc);
26ad17
 
26ad17
-    CRM_CHECK(request != NULL, crm_warn("Can't create a sane reply"); return reply);
26ad17
-    for (lpc = 0; lpc < DIMOF(names); lpc++) {
26ad17
-        name = names[lpc];
26ad17
-        value = crm_element_value(request, name);
26ad17
-        crm_xml_add(reply, name, value);
26ad17
-    }
26ad17
+    if (request == NULL) {
26ad17
+        /* Most likely, this is the result of a stonith operation that was
26ad17
+         * initiated before we came up. Unfortunately that means we lack enough
26ad17
+         * information to provide clients with a full result.
26ad17
+         *
26ad17
+         * @TODO Maybe synchronize this information at start-up?
26ad17
+         */
26ad17
+        crm_warn("Missing request information for client notifications for "
26ad17
+                 "operation with result %d (initiated before we came up?)", rc);
26ad17
 
26ad17
-    if (data != NULL) {
26ad17
-        crm_trace("Attaching reply output");
26ad17
-        add_message_xml(reply, F_STONITH_CALLDATA, data);
26ad17
+    } else {
26ad17
+        const char *name = NULL;
26ad17
+        const char *value = NULL;
26ad17
+
26ad17
+        const char *names[] = {
26ad17
+            F_STONITH_OPERATION,
26ad17
+            F_STONITH_CALLID,
26ad17
+            F_STONITH_CLIENTID,
26ad17
+            F_STONITH_CLIENTNAME,
26ad17
+            F_STONITH_REMOTE_OP_ID,
26ad17
+            F_STONITH_CALLOPTS
26ad17
+        };
26ad17
+
26ad17
+        crm_trace("Creating a result reply with%s reply output (rc=%d)",
26ad17
+                  (data? "" : "out"), rc);
26ad17
+        for (int lpc = 0; lpc < DIMOF(names); lpc++) {
26ad17
+            name = names[lpc];
26ad17
+            value = crm_element_value(request, name);
26ad17
+            crm_xml_add(reply, name, value);
26ad17
+        }
26ad17
+        if (data != NULL) {
26ad17
+            add_message_xml(reply, F_STONITH_CALLDATA, data);
26ad17
+        }
26ad17
     }
26ad17
     return reply;
26ad17
 }
26ad17
-- 
26ad17
1.8.3.1
26ad17