diff --git a/.gitignore b/.gitignore index 66cdb0e..e31915e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ SOURCES/nagios-agents-metadata-105ab8a7b2c16b9a29cf1c1596b80136eeef332b.tar.gz -SOURCES/pacemaker-dc6eb4362.tar.gz +SOURCES/pacemaker-a3f44794f.tar.gz diff --git a/.pacemaker.metadata b/.pacemaker.metadata index 315149b..f5f737d 100644 --- a/.pacemaker.metadata +++ b/.pacemaker.metadata @@ -1,2 +1,2 @@ 2cbec94ad67dfbeba75e38d2c3c5c44961b3cd16 SOURCES/nagios-agents-metadata-105ab8a7b2c16b9a29cf1c1596b80136eeef332b.tar.gz -24ccc9f234896595a1f7a8baec22652620fd609f SOURCES/pacemaker-dc6eb4362.tar.gz +b16198db5f86857ba8bc0ebd04fd386da360478a SOURCES/pacemaker-a3f44794f.tar.gz diff --git a/SOURCES/001-stonith-enabled.patch b/SOURCES/001-stonith-enabled.patch deleted file mode 100644 index ebeb650..0000000 --- a/SOURCES/001-stonith-enabled.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 243139b2ec0f6b17877a4e7f651fc3f70f76b11a Mon Sep 17 00:00:00 2001 -From: Christine Caulfield -Date: Fri, 6 May 2022 15:23:43 +0100 -Subject: [PATCH 1/2] fenced: Don't ignore CIB updates if stonith-enabled=false - -Fixes: T378 ---- - daemons/fenced/pacemaker-fenced.c | 23 +++-------------------- - 1 file changed, 3 insertions(+), 20 deletions(-) - -diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c -index caab7de83..dadd187b6 100644 ---- a/daemons/fenced/pacemaker-fenced.c -+++ b/daemons/fenced/pacemaker-fenced.c -@@ -1136,11 +1136,8 @@ static void - update_cib_cache_cb(const char *event, xmlNode * msg) - { - int rc = pcmk_ok; -- xmlNode *stonith_enabled_xml = NULL; -- static gboolean stonith_enabled_saved = TRUE; - long timeout_ms_saved = stonith_watchdog_timeout_ms; - gboolean need_full_refresh = FALSE; -- bool value = false; - - if(!have_cib_devices) { - crm_trace("Skipping updates until we get a full dump"); -@@ -1191,32 +1188,18 @@ update_cib_cache_cb(const char *event, xmlNode * msg) - return; - } - CRM_ASSERT(local_cib != NULL); -- stonith_enabled_saved = FALSE; /* Trigger a full refresh below */ -+ need_full_refresh = TRUE; - } - - pcmk__refresh_node_caches_from_cib(local_cib); - update_stonith_watchdog_timeout_ms(local_cib); - -- stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", -- local_cib, LOG_NEVER); -- if (pcmk__xe_get_bool_attr(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE, &value) == pcmk_rc_ok && !value) { -- crm_trace("Ignoring CIB updates while fencing is disabled"); -- stonith_enabled_saved = FALSE; -- -- } else if (stonith_enabled_saved == FALSE) { -- crm_info("Updating fencing device and topology lists " -- "now that fencing is enabled"); -- stonith_enabled_saved = TRUE; -- need_full_refresh = TRUE; -- -- } else { -- if (timeout_ms_saved != stonith_watchdog_timeout_ms) { -+ if (timeout_ms_saved != stonith_watchdog_timeout_ms) { - need_full_refresh = TRUE; -- } else { -+ } else { - update_fencing_topology(event, msg); - update_cib_stonith_devices(event, msg); - watchdog_device_update(); -- } - } - - if (need_full_refresh) { --- -2.31.1 - - -From c600ef49022e7473acbe121fae50a0c1aa2d7c03 Mon Sep 17 00:00:00 2001 -From: Christine Caulfield -Date: Thu, 9 Jun 2022 11:08:43 +0100 -Subject: [PATCH 2/2] Also don't check for stonith-disabled in - update_stonith_watchdog_timeout_ms - ---- - daemons/fenced/pacemaker-fenced.c | 34 +++++++++++-------------------- - 1 file changed, 12 insertions(+), 22 deletions(-) - -diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c -index dadd187b6..ec42d5bc2 100644 ---- a/daemons/fenced/pacemaker-fenced.c -+++ b/daemons/fenced/pacemaker-fenced.c -@@ -643,31 +643,21 @@ watchdog_device_update(void) - static void - update_stonith_watchdog_timeout_ms(xmlNode *cib) - { -- xmlNode *stonith_enabled_xml = NULL; -- bool stonith_enabled = false; -- int rc = pcmk_rc_ok; - long timeout_ms = 0; -+ xmlNode *stonith_watchdog_xml = NULL; -+ const char *value = NULL; - -- stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", -- cib, LOG_NEVER); -- rc = pcmk__xe_get_bool_attr(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE, &stonith_enabled); -- -- if (rc != pcmk_rc_ok || stonith_enabled) { -- xmlNode *stonith_watchdog_xml = NULL; -- const char *value = NULL; -- -- stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", -- cib, LOG_NEVER); -- if (stonith_watchdog_xml) { -- value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); -- } -- if (value) { -- timeout_ms = crm_get_msec(value); -- } -+ stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", -+ cib, LOG_NEVER); -+ if (stonith_watchdog_xml) { -+ value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); -+ } -+ if (value) { -+ timeout_ms = crm_get_msec(value); -+ } - -- if (timeout_ms < 0) { -- timeout_ms = pcmk__auto_watchdog_timeout(); -- } -+ if (timeout_ms < 0) { -+ timeout_ms = pcmk__auto_watchdog_timeout(); - } - - stonith_watchdog_timeout_ms = timeout_ms; --- -2.31.1 - diff --git a/SOURCES/001-sync-points.patch b/SOURCES/001-sync-points.patch new file mode 100644 index 0000000..c034c78 --- /dev/null +++ b/SOURCES/001-sync-points.patch @@ -0,0 +1,2429 @@ +From de05f6b52c667155d262ceeb541dc1041d079d71 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 8 Sep 2022 11:36:58 -0400 +Subject: [PATCH 01/26] Refactor: tools: Use a uint32_t for attr_options. + +--- + tools/attrd_updater.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c +index d90567a..b85a281 100644 +--- a/tools/attrd_updater.c ++++ b/tools/attrd_updater.c +@@ -47,7 +47,7 @@ struct { + gchar *attr_node; + gchar *attr_set; + char *attr_value; +- int attr_options; ++ uint32_t attr_options; + gboolean query_all; + gboolean quiet; + } options = { +-- +2.31.1 + +From c6637520b474d44553ade52c0dbe9e36e873135f Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 21 Oct 2022 14:31:16 -0400 +Subject: [PATCH 02/26] Refactor: libcrmcommon: Make pcmk__xe_match more + broadly useful. + +If attr_v is NULL, simply return the first node with a matching name. +--- + lib/common/xml.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/lib/common/xml.c b/lib/common/xml.c +index 036dd87..ac6f46a 100644 +--- a/lib/common/xml.c ++++ b/lib/common/xml.c +@@ -510,7 +510,7 @@ find_xml_node(const xmlNode *root, const char *search_path, gboolean must_find) + * \param[in] parent XML element to search + * \param[in] node_name If not NULL, only match children of this type + * \param[in] attr_n If not NULL, only match children with an attribute +- * of this name and a value of \p attr_v ++ * of this name. + * \param[in] attr_v If \p attr_n and this are not NULL, only match children + * with an attribute named \p attr_n and this value + * +@@ -520,14 +520,16 @@ xmlNode * + pcmk__xe_match(const xmlNode *parent, const char *node_name, + const char *attr_n, const char *attr_v) + { +- /* ensure attr_v specified when attr_n is */ +- CRM_CHECK(attr_n == NULL || attr_v != NULL, return NULL); ++ CRM_CHECK(parent != NULL, return NULL); ++ CRM_CHECK(attr_v == NULL || attr_n != NULL, return NULL); + + for (xmlNode *child = pcmk__xml_first_child(parent); child != NULL; + child = pcmk__xml_next(child)) { + if (pcmk__str_eq(node_name, (const char *) (child->name), + pcmk__str_null_matches) +- && ((attr_n == NULL) || attr_matches(child, attr_n, attr_v))) { ++ && ((attr_n == NULL) || ++ (attr_v == NULL && xmlHasProp(child, (pcmkXmlStr) attr_n)) || ++ (attr_v != NULL && attr_matches(child, attr_n, attr_v)))) { + return child; + } + } +-- +2.31.1 + +From dd520579484c6ec091f7fbb550347941302dad0e Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 21 Oct 2022 14:32:46 -0400 +Subject: [PATCH 03/26] Tests: libcrmcommon: Add tests for pcmk__xe_match. + +--- + lib/common/tests/xml/Makefile.am | 3 +- + lib/common/tests/xml/pcmk__xe_match_test.c | 105 +++++++++++++++++++++ + 2 files changed, 107 insertions(+), 1 deletion(-) + create mode 100644 lib/common/tests/xml/pcmk__xe_match_test.c + +diff --git a/lib/common/tests/xml/Makefile.am b/lib/common/tests/xml/Makefile.am +index 342ca07..0ccdcc3 100644 +--- a/lib/common/tests/xml/Makefile.am ++++ b/lib/common/tests/xml/Makefile.am +@@ -11,6 +11,7 @@ include $(top_srcdir)/mk/tap.mk + include $(top_srcdir)/mk/unittest.mk + + # Add "_test" to the end of all test program names to simplify .gitignore. +-check_PROGRAMS = pcmk__xe_foreach_child_test ++check_PROGRAMS = pcmk__xe_foreach_child_test \ ++ pcmk__xe_match_test + + TESTS = $(check_PROGRAMS) +diff --git a/lib/common/tests/xml/pcmk__xe_match_test.c b/lib/common/tests/xml/pcmk__xe_match_test.c +new file mode 100644 +index 0000000..fd529ba +--- /dev/null ++++ b/lib/common/tests/xml/pcmk__xe_match_test.c +@@ -0,0 +1,105 @@ ++/* ++ * Copyright 2022 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. ++ */ ++ ++#include ++ ++#include ++#include ++ ++const char *str1 = ++ "\n" ++ " \n" ++ " \n" ++ " content\n" ++ " \n" ++ " \n" ++ " \n" ++ " content\n" ++ " \n" ++ " \n" ++ " \n" ++ " content\n" ++ " \n" ++ " \n" ++ " \n" ++ " content\n" ++ " \n" ++ " \n" ++ " \n" ++ " content\n" ++ " \n" ++ ""; ++ ++static void ++bad_input(void **state) { ++ xmlNode *xml = string2xml(str1); ++ ++ assert_null(pcmk__xe_match(NULL, NULL, NULL, NULL)); ++ assert_null(pcmk__xe_match(NULL, NULL, NULL, "attrX")); ++ ++ free_xml(xml); ++} ++ ++static void ++not_found(void **state) { ++ xmlNode *xml = string2xml(str1); ++ ++ /* No node with an attrX attribute */ ++ assert_null(pcmk__xe_match(xml, NULL, "attrX", NULL)); ++ /* No nodeX node */ ++ assert_null(pcmk__xe_match(xml, "nodeX", NULL, NULL)); ++ /* No nodeA node with attrX */ ++ assert_null(pcmk__xe_match(xml, "nodeA", "attrX", NULL)); ++ /* No nodeA node with attrA=XYZ */ ++ assert_null(pcmk__xe_match(xml, "nodeA", "attrA", "XYZ")); ++ ++ free_xml(xml); ++} ++ ++static void ++find_attrB(void **state) { ++ xmlNode *xml = string2xml(str1); ++ xmlNode *result = NULL; ++ ++ /* Find the first node with attrB */ ++ result = pcmk__xe_match(xml, NULL, "attrB", NULL); ++ assert_non_null(result); ++ assert_string_equal(crm_element_value(result, "id"), "3"); ++ ++ /* Find the first nodeB with attrB */ ++ result = pcmk__xe_match(xml, "nodeB", "attrB", NULL); ++ assert_non_null(result); ++ assert_string_equal(crm_element_value(result, "id"), "5"); ++ ++ free_xml(xml); ++} ++ ++static void ++find_attrA_matching(void **state) { ++ xmlNode *xml = string2xml(str1); ++ xmlNode *result = NULL; ++ ++ /* Find attrA=456 */ ++ result = pcmk__xe_match(xml, NULL, "attrA", "456"); ++ assert_non_null(result); ++ assert_string_equal(crm_element_value(result, "id"), "2"); ++ ++ /* Find a nodeB with attrA=123 */ ++ result = pcmk__xe_match(xml, "nodeB", "attrA", "123"); ++ assert_non_null(result); ++ assert_string_equal(crm_element_value(result, "id"), "4"); ++ ++ free_xml(xml); ++} ++ ++PCMK__UNIT_TEST(NULL, NULL, ++ cmocka_unit_test(bad_input), ++ cmocka_unit_test(not_found), ++ cmocka_unit_test(find_attrB), ++ cmocka_unit_test(find_attrA_matching)); +-- +2.31.1 + +From 03af8498d8aaf21c509cec9b0ec4b78475da41d7 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 8 Sep 2022 12:22:26 -0400 +Subject: [PATCH 04/26] Feature: libcrmcommon: Add attrd options for specifying + a sync point. + +--- + include/crm/common/attrd_internal.h | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +diff --git a/include/crm/common/attrd_internal.h b/include/crm/common/attrd_internal.h +index f7033ad..389be48 100644 +--- a/include/crm/common/attrd_internal.h ++++ b/include/crm/common/attrd_internal.h +@@ -16,13 +16,15 @@ extern "C" { + + // Options for clients to use with functions below + enum pcmk__node_attr_opts { +- pcmk__node_attr_none = 0, +- pcmk__node_attr_remote = (1 << 0), +- pcmk__node_attr_private = (1 << 1), +- pcmk__node_attr_pattern = (1 << 2), +- pcmk__node_attr_value = (1 << 3), +- pcmk__node_attr_delay = (1 << 4), +- pcmk__node_attr_perm = (1 << 5), ++ pcmk__node_attr_none = 0, ++ pcmk__node_attr_remote = (1 << 0), ++ pcmk__node_attr_private = (1 << 1), ++ pcmk__node_attr_pattern = (1 << 2), ++ pcmk__node_attr_value = (1 << 3), ++ pcmk__node_attr_delay = (1 << 4), ++ pcmk__node_attr_perm = (1 << 5), ++ pcmk__node_attr_sync_local = (1 << 6), ++ pcmk__node_attr_sync_cluster = (1 << 7), + }; + + #define pcmk__set_node_attr_flags(node_attr_flags, flags_to_set) do { \ +-- +2.31.1 + +From 5c8825293ee21d3823bdcd01b0df9c7d39739940 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 8 Sep 2022 12:23:09 -0400 +Subject: [PATCH 05/26] Feature: libcrmcommon: Add sync point to IPC request + XML. + +If one of the pcmk__node_attr_sync_* options is provided, add an +attribute to the request XML. This will later be inspected by the +server to determine when to send the reply to the client. +--- + include/crm/common/options_internal.h | 2 ++ + include/crm_internal.h | 1 + + lib/common/ipc_attrd.c | 6 ++++++ + 3 files changed, 9 insertions(+) + +diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h +index b153c67..f29ba3f 100644 +--- a/include/crm/common/options_internal.h ++++ b/include/crm/common/options_internal.h +@@ -145,9 +145,11 @@ bool pcmk__valid_sbd_timeout(const char *value); + #define PCMK__META_ALLOW_UNHEALTHY_NODES "allow-unhealthy-nodes" + + // Constants for enumerated values for various options ++#define PCMK__VALUE_CLUSTER "cluster" + #define PCMK__VALUE_CUSTOM "custom" + #define PCMK__VALUE_FENCING "fencing" + #define PCMK__VALUE_GREEN "green" ++#define PCMK__VALUE_LOCAL "local" + #define PCMK__VALUE_MIGRATE_ON_RED "migrate-on-red" + #define PCMK__VALUE_NONE "none" + #define PCMK__VALUE_NOTHING "nothing" +diff --git a/include/crm_internal.h b/include/crm_internal.h +index e6e2e96..08193c3 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -71,6 +71,7 @@ + #define PCMK__XA_ATTR_RESOURCE "attr_resource" + #define PCMK__XA_ATTR_SECTION "attr_section" + #define PCMK__XA_ATTR_SET "attr_set" ++#define PCMK__XA_ATTR_SYNC_POINT "attr_sync_point" + #define PCMK__XA_ATTR_USER "attr_user" + #define PCMK__XA_ATTR_UUID "attr_key" + #define PCMK__XA_ATTR_VALUE "attr_value" +diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c +index f6cfbc4..4606509 100644 +--- a/lib/common/ipc_attrd.c ++++ b/lib/common/ipc_attrd.c +@@ -431,6 +431,12 @@ populate_update_op(xmlNode *op, const char *node, const char *name, const char * + pcmk_is_set(options, pcmk__node_attr_remote)); + crm_xml_add_int(op, PCMK__XA_ATTR_IS_PRIVATE, + pcmk_is_set(options, pcmk__node_attr_private)); ++ ++ if (pcmk_is_set(options, pcmk__node_attr_sync_local)) { ++ crm_xml_add(op, PCMK__XA_ATTR_SYNC_POINT, PCMK__VALUE_LOCAL); ++ } else if (pcmk_is_set(options, pcmk__node_attr_sync_cluster)) { ++ crm_xml_add(op, PCMK__XA_ATTR_SYNC_POINT, PCMK__VALUE_CLUSTER); ++ } + } + + int +-- +2.31.1 + +From e2b3fee630caf0846ca8bbffcef4d6d2acfd32a5 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 8 Sep 2022 12:26:28 -0400 +Subject: [PATCH 06/26] Feature: tools: Add --wait= parameter to attrd_updater. + +This command line option is used to specify the sync point to use. For +the moment, it has no effect. +--- + tools/attrd_updater.c | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c +index b85a281..c4779a6 100644 +--- a/tools/attrd_updater.c ++++ b/tools/attrd_updater.c +@@ -97,6 +97,22 @@ section_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError + return TRUE; + } + ++static gboolean ++wait_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { ++ if (pcmk__str_eq(optarg, "no", pcmk__str_none)) { ++ pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); ++ return TRUE; ++ } else if (pcmk__str_eq(optarg, PCMK__VALUE_LOCAL, pcmk__str_none)) { ++ pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); ++ pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local); ++ return TRUE; ++ } else { ++ g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_USAGE, ++ "--wait= must be one of 'no', 'local', 'cluster'"); ++ return FALSE; ++ } ++} ++ + #define INDENT " " + + static GOptionEntry required_entries[] = { +@@ -175,6 +191,14 @@ static GOptionEntry addl_entries[] = { + "If this creates a new attribute, never write the attribute to CIB", + NULL }, + ++ { "wait", 'W', 0, G_OPTION_ARG_CALLBACK, wait_cb, ++ "Wait for some event to occur before returning. Values are 'no' (wait\n" ++ INDENT "only for the attribute daemon to acknowledge the request) or\n" ++ INDENT "'local' (wait until the change has propagated to where a local\n" ++ INDENT "query will return the request value, or the value set by a\n" ++ INDENT "later request). Default is 'no'.", ++ "UNTIL" }, ++ + { NULL } + }; + +-- +2.31.1 + +From 52d51ab41b2f00e72724ab39835b3db86605a96b Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 20 Oct 2022 14:40:13 -0400 +Subject: [PATCH 07/26] Feature: daemons: Add functions for checking a request + for a sync point. + +--- + daemons/attrd/Makefile.am | 1 + + daemons/attrd/attrd_sync.c | 38 +++++++++++++++++++++++++++++++++ + daemons/attrd/pacemaker-attrd.h | 3 +++ + 3 files changed, 42 insertions(+) + create mode 100644 daemons/attrd/attrd_sync.c + +diff --git a/daemons/attrd/Makefile.am b/daemons/attrd/Makefile.am +index 1a3d360..6bb81c4 100644 +--- a/daemons/attrd/Makefile.am ++++ b/daemons/attrd/Makefile.am +@@ -32,6 +32,7 @@ pacemaker_attrd_SOURCES = attrd_alerts.c \ + attrd_elections.c \ + attrd_ipc.c \ + attrd_messages.c \ ++ attrd_sync.c \ + attrd_utils.c \ + pacemaker-attrd.c + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +new file mode 100644 +index 0000000..92759d2 +--- /dev/null ++++ b/daemons/attrd/attrd_sync.c +@@ -0,0 +1,38 @@ ++/* ++ * Copyright 2022 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU General Public License version 2 ++ * or later (GPLv2+) WITHOUT ANY WARRANTY. ++ */ ++ ++#include ++ ++#include ++#include ++ ++#include "pacemaker-attrd.h" ++ ++const char * ++attrd_request_sync_point(xmlNode *xml) ++{ ++ if (xml_has_children(xml)) { ++ xmlNode *child = pcmk__xe_match(xml, XML_ATTR_OP, PCMK__XA_ATTR_SYNC_POINT, NULL); ++ ++ if (child) { ++ return crm_element_value(child, PCMK__XA_ATTR_SYNC_POINT); ++ } else { ++ return NULL; ++ } ++ ++ } else { ++ return crm_element_value(xml, PCMK__XA_ATTR_SYNC_POINT); ++ } ++} ++ ++bool ++attrd_request_has_sync_point(xmlNode *xml) ++{ ++ return attrd_request_sync_point(xml) != NULL; ++} +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 71ce90a..ff850bb 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -182,4 +182,7 @@ mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *a + void attrd_unregister_handlers(void); + void attrd_handle_request(pcmk__request_t *request); + ++const char *attrd_request_sync_point(xmlNode *xml); ++bool attrd_request_has_sync_point(xmlNode *xml); ++ + #endif /* PACEMAKER_ATTRD__H */ +-- +2.31.1 + +From 2e0509a12ee7d4a612133ee65b75245eea7d271d Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 20 Oct 2022 14:42:04 -0400 +Subject: [PATCH 08/26] Refactor: daemons: Don't ACK update requests that give + a sync point. + +The ACK is the only response from the server for update messages. If +the message specified that it wanted to wait for a sync point, we need +to delay sending that response until the sync point is reached. +Therefore, do not always immediately send the ACK. +--- + daemons/attrd/attrd_messages.c | 19 ++++++++++++++----- + 1 file changed, 14 insertions(+), 5 deletions(-) + +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index de4a28a..9e8ae40 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -137,12 +137,21 @@ handle_update_request(pcmk__request_t *request) + attrd_peer_update(peer, request->xml, host, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; ++ + } else { +- /* Because attrd_client_update can be called recursively, we send the ACK +- * here to ensure that the client only ever receives one. +- */ +- attrd_send_ack(request->ipc_client, request->ipc_id, +- request->flags|crm_ipc_client_response); ++ if (!attrd_request_has_sync_point(request->xml)) { ++ /* If the client doesn't want to wait for a sync point, go ahead and send ++ * the ACK immediately. Otherwise, we'll send the ACK when the appropriate ++ * sync point is reached. ++ * ++ * In the normal case, attrd_client_update can be called recursively which ++ * makes where to send the ACK tricky. Doing it here ensures the client ++ * only ever receives one. ++ */ ++ attrd_send_ack(request->ipc_client, request->ipc_id, ++ request->flags|crm_ipc_client_response); ++ } ++ + return attrd_client_update(request); + } + } +-- +2.31.1 + +From 2a0ff66cdf0085c4c8ab1992ef7e785a4facc8c7 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 20 Oct 2022 14:48:48 -0400 +Subject: [PATCH 09/26] Feature: daemons: Add support for local sync points on + updates. + +In the IPC dispatcher for attrd, add the client to a wait list if its +request specifies a sync point. When the attribute's value is changed +on the local attrd, alert any clients waiting on a local sync point by +then sending the previously delayed ACK. + +Sync points for other requests and the global sync point are not yet +supported. + +Fixes T35. +--- + daemons/attrd/attrd_corosync.c | 18 +++++ + daemons/attrd/attrd_messages.c | 12 ++- + daemons/attrd/attrd_sync.c | 137 ++++++++++++++++++++++++++++++++ + daemons/attrd/pacemaker-attrd.h | 7 ++ + 4 files changed, 173 insertions(+), 1 deletion(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 539e5bf..4337280 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -568,14 +568,32 @@ void + attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, + bool filter) + { ++ bool handle_sync_point = false; ++ + if (xml_has_children(xml)) { + for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL; + child = crm_next_same_xml(child)) { + copy_attrs(xml, child); + attrd_peer_update_one(peer, child, filter); ++ ++ if (attrd_request_has_sync_point(child)) { ++ handle_sync_point = true; ++ } + } + + } else { + attrd_peer_update_one(peer, xml, filter); ++ ++ if (attrd_request_has_sync_point(xml)) { ++ handle_sync_point = true; ++ } ++ } ++ ++ /* If the update XML specified that the client wanted to wait for a sync ++ * point, process that now. ++ */ ++ if (handle_sync_point) { ++ crm_debug("Hit local sync point for attribute update"); ++ attrd_ack_waitlist_clients(attrd_sync_point_local, xml); + } + } +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 9e8ae40..c96700f 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -139,7 +139,17 @@ handle_update_request(pcmk__request_t *request) + return NULL; + + } else { +- if (!attrd_request_has_sync_point(request->xml)) { ++ if (attrd_request_has_sync_point(request->xml)) { ++ /* If this client supplied a sync point it wants to wait for, add it to ++ * the wait list. Clients on this list will not receive an ACK until ++ * their sync point is hit which will result in the client stalled there ++ * until it receives a response. ++ * ++ * All other clients will receive the expected response as normal. ++ */ ++ attrd_add_client_to_waitlist(request); ++ ++ } else { + /* If the client doesn't want to wait for a sync point, go ahead and send + * the ACK immediately. Otherwise, we'll send the ACK when the appropriate + * sync point is reached. +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index 92759d2..2981bd0 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -14,6 +14,143 @@ + + #include "pacemaker-attrd.h" + ++/* A hash table storing clients that are waiting on a sync point to be reached. ++ * The key is waitlist_client - just a plain int. The obvious key would be ++ * the IPC client's ID, but this is not guaranteed to be unique. A single client ++ * could be waiting on a sync point for multiple attributes at the same time. ++ * ++ * It is not expected that this hash table will ever be especially large. ++ */ ++static GHashTable *waitlist = NULL; ++static int waitlist_client = 0; ++ ++struct waitlist_node { ++ /* What kind of sync point does this node describe? */ ++ enum attrd_sync_point sync_point; ++ ++ /* Information required to construct and send a reply to the client. */ ++ char *client_id; ++ uint32_t ipc_id; ++ uint32_t flags; ++}; ++ ++static void ++next_key(void) ++{ ++ do { ++ waitlist_client++; ++ if (waitlist_client < 0) { ++ waitlist_client = 1; ++ } ++ } while (g_hash_table_contains(waitlist, GINT_TO_POINTER(waitlist_client))); ++} ++ ++static void ++free_waitlist_node(gpointer data) ++{ ++ struct waitlist_node *wl = (struct waitlist_node *) data; ++ ++ free(wl->client_id); ++ free(wl); ++} ++ ++static const char * ++sync_point_str(enum attrd_sync_point sync_point) ++{ ++ if (sync_point == attrd_sync_point_local) { ++ return PCMK__VALUE_LOCAL; ++ } else if (sync_point == attrd_sync_point_cluster) { ++ return PCMK__VALUE_CLUSTER; ++ } else { ++ return "unknown"; ++ } ++} ++ ++void ++attrd_add_client_to_waitlist(pcmk__request_t *request) ++{ ++ const char *sync_point = attrd_request_sync_point(request->xml); ++ struct waitlist_node *wl = NULL; ++ ++ if (sync_point == NULL) { ++ return; ++ } ++ ++ if (waitlist == NULL) { ++ waitlist = pcmk__intkey_table(free_waitlist_node); ++ } ++ ++ wl = calloc(sizeof(struct waitlist_node), 1); ++ ++ CRM_ASSERT(wl != NULL); ++ ++ wl->client_id = strdup(request->ipc_client->id); ++ ++ CRM_ASSERT(wl->client_id); ++ ++ if (pcmk__str_eq(sync_point, PCMK__VALUE_LOCAL, pcmk__str_none)) { ++ wl->sync_point = attrd_sync_point_local; ++ } else if (pcmk__str_eq(sync_point, PCMK__VALUE_CLUSTER, pcmk__str_none)) { ++ wl->sync_point = attrd_sync_point_cluster; ++ } else { ++ free_waitlist_node(wl); ++ return; ++ } ++ ++ wl->ipc_id = request->ipc_id; ++ wl->flags = request->flags; ++ ++ crm_debug("Added client %s to waitlist for %s sync point", ++ wl->client_id, sync_point_str(wl->sync_point)); ++ ++ next_key(); ++ pcmk__intkey_table_insert(waitlist, waitlist_client, wl); ++ ++ /* And then add the key to the request XML so we can uniquely identify ++ * it when it comes time to issue the ACK. ++ */ ++ crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); ++} ++ ++void ++attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) ++{ ++ int callid; ++ gpointer value; ++ ++ if (waitlist == NULL) { ++ return; ++ } ++ ++ if (crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid) == -1) { ++ crm_warn("Could not get callid from request XML"); ++ return; ++ } ++ ++ value = pcmk__intkey_table_lookup(waitlist, callid); ++ if (value != NULL) { ++ struct waitlist_node *wl = (struct waitlist_node *) value; ++ pcmk__client_t *client = NULL; ++ ++ if (wl->sync_point != sync_point) { ++ return; ++ } ++ ++ crm_debug("Alerting client %s for reached %s sync point", ++ wl->client_id, sync_point_str(wl->sync_point)); ++ ++ client = pcmk__find_client_by_id(wl->client_id); ++ if (client == NULL) { ++ return; ++ } ++ ++ attrd_send_ack(client, wl->ipc_id, wl->flags | crm_ipc_client_response); ++ ++ /* And then remove the client so it doesn't get alerted again. */ ++ pcmk__intkey_table_remove(waitlist, callid); ++ } ++} ++ + const char * + attrd_request_sync_point(xmlNode *xml) + { +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index ff850bb..9dd8320 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -182,6 +182,13 @@ mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *a + void attrd_unregister_handlers(void); + void attrd_handle_request(pcmk__request_t *request); + ++enum attrd_sync_point { ++ attrd_sync_point_local, ++ attrd_sync_point_cluster, ++}; ++ ++void attrd_add_client_to_waitlist(pcmk__request_t *request); ++void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); + const char *attrd_request_sync_point(xmlNode *xml); + bool attrd_request_has_sync_point(xmlNode *xml); + +-- +2.31.1 + +From 59caaf1682191a91d6062358b770f8b9457ba3eb Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 20 Oct 2022 14:56:58 -0400 +Subject: [PATCH 10/26] Feature: daemons: If a client disconnects, remove it + from the waitlist. + +--- + daemons/attrd/attrd_ipc.c | 5 +++++ + daemons/attrd/attrd_sync.c | 21 +++++++++++++++++++++ + daemons/attrd/pacemaker-attrd.h | 1 + + 3 files changed, 27 insertions(+) + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 7e4a1c0..8aa39c2 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -438,8 +438,13 @@ attrd_ipc_closed(qb_ipcs_connection_t *c) + crm_trace("Ignoring request to clean up unknown connection %p", c); + } else { + crm_trace("Cleaning up closed client connection %p", c); ++ ++ /* Remove the client from the sync point waitlist if it's present. */ ++ attrd_remove_client_from_waitlist(client); ++ + pcmk__free_client(client); + } ++ + return FALSE; + } + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index 2981bd0..7293318 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -112,6 +112,27 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) + crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); + } + ++void ++attrd_remove_client_from_waitlist(pcmk__client_t *client) ++{ ++ GHashTableIter iter; ++ gpointer value; ++ ++ if (waitlist == NULL) { ++ return; ++ } ++ ++ g_hash_table_iter_init(&iter, waitlist); ++ ++ while (g_hash_table_iter_next(&iter, NULL, &value)) { ++ struct waitlist_node *wl = (struct waitlist_node *) value; ++ ++ if (wl->client_id == client->id) { ++ g_hash_table_iter_remove(&iter); ++ } ++ } ++} ++ + void + attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) + { +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 9dd8320..b6ecb75 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -189,6 +189,7 @@ enum attrd_sync_point { + + void attrd_add_client_to_waitlist(pcmk__request_t *request); + void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); ++void attrd_remove_client_from_waitlist(pcmk__client_t *client); + const char *attrd_request_sync_point(xmlNode *xml); + bool attrd_request_has_sync_point(xmlNode *xml); + +-- +2.31.1 + +From b28042e1d64b48c96dbd9da1e9ee3ff481bbf620 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 10 Oct 2022 11:00:20 -0400 +Subject: [PATCH 11/26] Feature: daemons: Add support for local sync points on + clearing failures. + +attrd_clear_client_failure just calls attrd_client_update underneath, so +that function will handle all the rest of the sync point functionality +for us. +--- + daemons/attrd/attrd_ipc.c | 2 -- + daemons/attrd/attrd_messages.c | 19 +++++++++++++++++++ + 2 files changed, 19 insertions(+), 2 deletions(-) + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 8aa39c2..2e614e8 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -101,8 +101,6 @@ attrd_client_clear_failure(pcmk__request_t *request) + xmlNode *xml = request->xml; + const char *rsc, *op, *interval_spec; + +- attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); +- + if (minimum_protocol_version >= 2) { + /* Propagate to all peers (including ourselves). + * This ends up at attrd_peer_message(). +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index c96700f..3ba14a6 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -42,6 +42,25 @@ handle_clear_failure_request(pcmk__request_t *request) + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { ++ if (attrd_request_has_sync_point(request->xml)) { ++ /* If this client supplied a sync point it wants to wait for, add it to ++ * the wait list. Clients on this list will not receive an ACK until ++ * their sync point is hit which will result in the client stalled there ++ * until it receives a response. ++ * ++ * All other clients will receive the expected response as normal. ++ */ ++ attrd_add_client_to_waitlist(request); ++ ++ } else { ++ /* If the client doesn't want to wait for a sync point, go ahead and send ++ * the ACK immediately. Otherwise, we'll send the ACK when the appropriate ++ * sync point is reached. ++ */ ++ attrd_send_ack(request->ipc_client, request->ipc_id, ++ request->ipc_flags); ++ } ++ + return attrd_client_clear_failure(request); + } + } +-- +2.31.1 + +From 291dc3b91e57f2584bbf88cfbe3a360e0332e814 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 10 Oct 2022 13:17:24 -0400 +Subject: [PATCH 12/26] Refactor: daemons: Free the waitlist on attrd exit. + +--- + daemons/attrd/attrd_sync.c | 11 +++++++++++ + daemons/attrd/attrd_utils.c | 2 ++ + daemons/attrd/pacemaker-attrd.c | 1 + + daemons/attrd/pacemaker-attrd.h | 1 + + 4 files changed, 15 insertions(+) + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index 7293318..557e49a 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -112,6 +112,17 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) + crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); + } + ++void ++attrd_free_waitlist(void) ++{ ++ if (waitlist == NULL) { ++ return; ++ } ++ ++ g_hash_table_destroy(waitlist); ++ waitlist = NULL; ++} ++ + void + attrd_remove_client_from_waitlist(pcmk__client_t *client) + { +diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c +index 6a19009..00b879b 100644 +--- a/daemons/attrd/attrd_utils.c ++++ b/daemons/attrd/attrd_utils.c +@@ -93,6 +93,8 @@ attrd_shutdown(int nsig) + mainloop_destroy_signal(SIGUSR2); + mainloop_destroy_signal(SIGTRAP); + ++ attrd_free_waitlist(); ++ + if ((mloop == NULL) || !g_main_loop_is_running(mloop)) { + /* If there's no main loop active, just exit. This should be possible + * only if we get SIGTERM in brief windows at start-up and shutdown. +diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c +index 2100db4..1336542 100644 +--- a/daemons/attrd/pacemaker-attrd.c ++++ b/daemons/attrd/pacemaker-attrd.c +@@ -300,6 +300,7 @@ main(int argc, char **argv) + attrd_ipc_fini(); + attrd_lrmd_disconnect(); + attrd_cib_disconnect(); ++ attrd_free_waitlist(); + g_hash_table_destroy(attributes); + } + +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index b6ecb75..537bf85 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -52,6 +52,7 @@ void attrd_run_mainloop(void); + + void attrd_set_requesting_shutdown(void); + void attrd_clear_requesting_shutdown(void); ++void attrd_free_waitlist(void); + bool attrd_requesting_shutdown(void); + bool attrd_shutting_down(void); + void attrd_shutdown(int nsig); +-- +2.31.1 + +From 7715ce617c520e14687a82e11ff794c93cd7f64a Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 10 Oct 2022 13:21:16 -0400 +Subject: [PATCH 13/26] Feature: includes: Bump CRM_FEATURE_SET for local sync + points. + +--- + include/crm/crm.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/crm/crm.h b/include/crm/crm.h +index 5710e4b..7c5c602 100644 +--- a/include/crm/crm.h ++++ b/include/crm/crm.h +@@ -66,7 +66,7 @@ extern "C" { + * >=3.0.13: Fail counts include operation name and interval + * >=3.2.0: DC supports PCMK_EXEC_INVALID and PCMK_EXEC_NOT_CONNECTED + */ +-# define CRM_FEATURE_SET "3.16.1" ++# define CRM_FEATURE_SET "3.16.2" + + /* Pacemaker's CPG protocols use fixed-width binary fields for the sender and + * recipient of a CPG message. This imposes an arbitrary limit on cluster node +-- +2.31.1 + +From b9054425a76d03f538cd0b3ae27490b1874eee8a Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 28 Oct 2022 14:23:49 -0400 +Subject: [PATCH 14/26] Refactor: daemons: Add comments for previously added + sync point code. + +--- + daemons/attrd/attrd_sync.c | 63 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 63 insertions(+) + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index 557e49a..e9690b5 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -66,6 +66,20 @@ sync_point_str(enum attrd_sync_point sync_point) + } + } + ++/*! ++ * \internal ++ * \brief Add a client to the attrd waitlist ++ * ++ * Typically, a client receives an ACK for its XML IPC request immediately. However, ++ * some clients want to wait until their request has been processed and taken effect. ++ * This is called a sync point. Any client placed on this waitlist will have its ++ * ACK message delayed until either its requested sync point is hit, or until it ++ * times out. ++ * ++ * The XML IPC request must specify the type of sync point it wants to wait for. ++ * ++ * \param[in,out] request The request describing the client to place on the waitlist. ++ */ + void + attrd_add_client_to_waitlist(pcmk__request_t *request) + { +@@ -112,6 +126,11 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) + crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); + } + ++/*! ++ * \internal ++ * \brief Free all memory associated with the waitlist. This is most typically ++ * used when attrd shuts down. ++ */ + void + attrd_free_waitlist(void) + { +@@ -123,6 +142,13 @@ attrd_free_waitlist(void) + waitlist = NULL; + } + ++/*! ++ * \internal ++ * \brief Unconditionally remove a client from the waitlist, such as when the client ++ * node disconnects from the cluster ++ * ++ * \param[in] client The client to remove ++ */ + void + attrd_remove_client_from_waitlist(pcmk__client_t *client) + { +@@ -144,6 +170,18 @@ attrd_remove_client_from_waitlist(pcmk__client_t *client) + } + } + ++/*! ++ * \internal ++ * \brief Send an IPC ACK message to all awaiting clients ++ * ++ * This function will search the waitlist for all clients that are currently awaiting ++ * an ACK indicating their attrd operation is complete. Only those clients with a ++ * matching sync point type and callid from their original XML IPC request will be ++ * ACKed. Once they have received an ACK, they will be removed from the waitlist. ++ * ++ * \param[in] sync_point What kind of sync point have we hit? ++ * \param[in] xml The original XML IPC request. ++ */ + void + attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) + { +@@ -183,6 +221,23 @@ attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) + } + } + ++/*! ++ * \internal ++ * \brief Return the sync point attribute for an IPC request ++ * ++ * This function will check both the top-level element of \p xml for a sync ++ * point attribute, as well as all of its \p op children, if any. The latter ++ * is useful for newer versions of attrd that can put multiple IPC requests ++ * into a single message. ++ * ++ * \param[in] xml An XML IPC request ++ * ++ * \note It is assumed that if one child element has a sync point attribute, ++ * all will have a sync point attribute and they will all be the same ++ * sync point. No other configuration is supported. ++ * ++ * \return The sync point attribute of \p xml, or NULL if none. ++ */ + const char * + attrd_request_sync_point(xmlNode *xml) + { +@@ -200,6 +255,14 @@ attrd_request_sync_point(xmlNode *xml) + } + } + ++/*! ++ * \internal ++ * \brief Does an IPC request contain any sync point attribute? ++ * ++ * \param[in] xml An XML IPC request ++ * ++ * \return true if there's a sync point attribute, false otherwise ++ */ + bool + attrd_request_has_sync_point(xmlNode *xml) + { +-- +2.31.1 + +From 64219fb7075ee58d29f94f077a3b8f94174bb32a Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 26 Oct 2022 12:43:05 -0400 +Subject: [PATCH 15/26] Feature: tools: Add --wait=cluster option to + attrd_updater. + +--- + tools/attrd_updater.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c +index c4779a6..3cd766d 100644 +--- a/tools/attrd_updater.c ++++ b/tools/attrd_updater.c +@@ -106,6 +106,10 @@ wait_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError ** + pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); + pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local); + return TRUE; ++ } else if (pcmk__str_eq(optarg, PCMK__VALUE_CLUSTER, pcmk__str_none)) { ++ pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); ++ pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_cluster); ++ return TRUE; + } else { + g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_USAGE, + "--wait= must be one of 'no', 'local', 'cluster'"); +@@ -193,10 +197,12 @@ static GOptionEntry addl_entries[] = { + + { "wait", 'W', 0, G_OPTION_ARG_CALLBACK, wait_cb, + "Wait for some event to occur before returning. Values are 'no' (wait\n" +- INDENT "only for the attribute daemon to acknowledge the request) or\n" ++ INDENT "only for the attribute daemon to acknowledge the request),\n" + INDENT "'local' (wait until the change has propagated to where a local\n" + INDENT "query will return the request value, or the value set by a\n" +- INDENT "later request). Default is 'no'.", ++ INDENT "later request), or 'cluster' (wait until the change has propagated\n" ++ INDENT "to where a query anywhere on the cluster will return the requested\n" ++ INDENT "value, or the value set by a later request). Default is 'no'.", + "UNTIL" }, + + { NULL } +-- +2.31.1 + +From 1bc5511fadf6ad670508bd3a2a55129bde16f774 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 16 Sep 2022 14:55:06 -0400 +Subject: [PATCH 16/26] Refactor: daemons: Add a confirm= attribute to attrd + messages. + +This allows informing the originator of a message that the message has +been received and processed. As yet, there is no mechanism for handling +and returning the confirmation, only for requesting it. +--- + daemons/attrd/attrd_corosync.c | 6 +++--- + daemons/attrd/attrd_ipc.c | 26 +++++++++++++++++++++----- + daemons/attrd/attrd_messages.c | 11 +++++++++-- + daemons/attrd/pacemaker-attrd.h | 7 ++++--- + include/crm_internal.h | 1 + + 5 files changed, 38 insertions(+), 13 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 4337280..e86ca07 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -124,7 +124,7 @@ broadcast_local_value(const attribute_t *a) + + crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); + attrd_add_value_xml(sync, a, v, false); +- attrd_send_message(NULL, sync); ++ attrd_send_message(NULL, sync, false); + free_xml(sync); + return v; + } +@@ -387,7 +387,7 @@ broadcast_unseen_local_values(void) + + if (sync != NULL) { + crm_debug("Broadcasting local-only values"); +- attrd_send_message(NULL, sync); ++ attrd_send_message(NULL, sync, false); + free_xml(sync); + } + } +@@ -539,7 +539,7 @@ attrd_peer_sync(crm_node_t *peer, xmlNode *xml) + } + + crm_debug("Syncing values to %s", peer?peer->uname:"everyone"); +- attrd_send_message(peer, sync); ++ attrd_send_message(peer, sync, false); + free_xml(sync); + } + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 2e614e8..0fc5e93 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -105,7 +105,7 @@ attrd_client_clear_failure(pcmk__request_t *request) + /* Propagate to all peers (including ourselves). + * This ends up at attrd_peer_message(). + */ +- attrd_send_message(NULL, xml); ++ attrd_send_message(NULL, xml, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } +@@ -184,7 +184,7 @@ attrd_client_peer_remove(pcmk__request_t *request) + if (host) { + crm_info("Client %s is requesting all values for %s be removed", + pcmk__client_name(request->ipc_client), host); +- attrd_send_message(NULL, xml); /* ends up at attrd_peer_message() */ ++ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ + free(host_alloc); + } else { + crm_info("Ignoring request by client %s to remove all peer values without specifying peer", +@@ -314,7 +314,7 @@ attrd_client_update(pcmk__request_t *request) + } + } + +- attrd_send_message(NULL, xml); ++ attrd_send_message(NULL, xml, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + + } else { +@@ -358,7 +358,7 @@ attrd_client_update(pcmk__request_t *request) + if (status == 0) { + crm_trace("Matched %s with %s", attr, regex); + crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr); +- attrd_send_message(NULL, xml); ++ attrd_send_message(NULL, xml, false); + } + } + +@@ -388,7 +388,23 @@ attrd_client_update(pcmk__request_t *request) + crm_debug("Broadcasting %s[%s]=%s%s", attr, crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME), + value, (attrd_election_won()? " (writer)" : "")); + +- attrd_send_message(NULL, xml); /* ends up at attrd_peer_message() */ ++ if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { ++ /* The client is waiting on the cluster-wide sync point. In this case, ++ * the response ACK is not sent until this attrd broadcasts the update ++ * and receives its own confirmation back from all peers. ++ */ ++ attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ ++ ++ } else { ++ /* The client is either waiting on the local sync point or was not ++ * waiting on any sync point at all. For the local sync point, the ++ * response ACK is sent in attrd_peer_update. For clients not ++ * waiting on any sync point, the response ACK is sent in ++ * handle_update_request immediately before this function was called. ++ */ ++ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ ++ } ++ + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 3ba14a6..78df0d0 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -279,16 +279,23 @@ attrd_broadcast_protocol(void) + crm_debug("Broadcasting attrd protocol version %s for node %s", + ATTRD_PROTOCOL_VERSION, attrd_cluster->uname); + +- attrd_send_message(NULL, attrd_op); /* ends up at attrd_peer_message() */ ++ attrd_send_message(NULL, attrd_op, false); /* ends up at attrd_peer_message() */ + + free_xml(attrd_op); + } + + gboolean +-attrd_send_message(crm_node_t * node, xmlNode * data) ++attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm) + { + crm_xml_add(data, F_TYPE, T_ATTRD); + crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); ++ ++ /* Request a confirmation from the destination peer node (which could ++ * be all if node is NULL) that the message has been received and ++ * acted upon. ++ */ ++ pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); ++ + attrd_xml_add_writer(data); + return send_cluster_message(node, crm_msg_attrd, data, TRUE); + } +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 537bf85..25f7c8a 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -39,10 +39,11 @@ + * PCMK__ATTRD_CMD_UPDATE_DELAY + * 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE + * 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes +- * 4 2.2.0 Multiple attributes can be updated in a single IPC ++ * 4 2.1.5 Multiple attributes can be updated in a single IPC + * message ++ * 5 2.1.5 Peers can request confirmation of a sent message + */ +-#define ATTRD_PROTOCOL_VERSION "4" ++#define ATTRD_PROTOCOL_VERSION "5" + + #define attrd_send_ack(client, id, flags) \ + pcmk__ipc_send_ack((client), (id), (flags), "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE) +@@ -162,7 +163,7 @@ xmlNode *attrd_client_clear_failure(pcmk__request_t *request); + xmlNode *attrd_client_update(pcmk__request_t *request); + xmlNode *attrd_client_refresh(pcmk__request_t *request); + xmlNode *attrd_client_query(pcmk__request_t *request); +-gboolean attrd_send_message(crm_node_t * node, xmlNode * data); ++gboolean attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm); + + xmlNode *attrd_add_value_xml(xmlNode *parent, const attribute_t *a, + const attribute_value_t *v, bool force_write); +diff --git a/include/crm_internal.h b/include/crm_internal.h +index 08193c3..63a1726 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -79,6 +79,7 @@ + #define PCMK__XA_ATTR_WRITER "attr_writer" + #define PCMK__XA_CONFIG_ERRORS "config-errors" + #define PCMK__XA_CONFIG_WARNINGS "config-warnings" ++#define PCMK__XA_CONFIRM "confirm" + #define PCMK__XA_GRAPH_ERRORS "graph-errors" + #define PCMK__XA_GRAPH_WARNINGS "graph-warnings" + #define PCMK__XA_MODE "mode" +-- +2.31.1 + +From 6f389038fc0b11f6291c022c99f188666c65f530 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 26 Oct 2022 14:44:42 -0400 +Subject: [PATCH 17/26] Feature: daemons: Respond to received attrd + confirmation requests. + +On the receiving peer side, if the XML request contains confirm="true", +construct a confirmation message after handling the request completes +and send it back to the originating peer. + +On the originating peer side, add a skeleton handler for confirmation +messages. This does nothing at the moment except log it. +--- + daemons/attrd/attrd_corosync.c | 38 ++++++++++++++++++++++++++++++++++ + daemons/attrd/attrd_messages.c | 13 ++++++++++++ + include/crm_internal.h | 1 + + 3 files changed, 52 insertions(+) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index e86ca07..1245d9c 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -25,6 +25,19 @@ + + extern crm_exit_t attrd_exit_status; + ++static xmlNode * ++attrd_confirmation(int callid) ++{ ++ xmlNode *node = create_xml_node(NULL, __func__); ++ ++ crm_xml_add(node, F_TYPE, T_ATTRD); ++ crm_xml_add(node, F_ORIG, get_local_node_name()); ++ crm_xml_add(node, PCMK__XA_TASK, PCMK__ATTRD_CMD_CONFIRM); ++ crm_xml_add_int(node, XML_LRM_ATTR_CALLID, callid); ++ ++ return node; ++} ++ + static void + attrd_peer_message(crm_node_t *peer, xmlNode *xml) + { +@@ -57,6 +70,31 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml) + CRM_CHECK(request.op != NULL, return); + + attrd_handle_request(&request); ++ ++ /* Having finished handling the request, check to see if the originating ++ * peer requested confirmation. If so, send that confirmation back now. ++ */ ++ if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM)) { ++ int callid = 0; ++ xmlNode *reply = NULL; ++ ++ /* Add the confirmation ID for the message we are confirming to the ++ * response so the originating peer knows what they're a confirmation ++ * for. ++ */ ++ crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid); ++ reply = attrd_confirmation(callid); ++ ++ /* And then send the confirmation back to the originating peer. This ++ * ends up right back in this same function (attrd_peer_message) on the ++ * peer where it will have to do something with a PCMK__XA_CONFIRM type ++ * message. ++ */ ++ crm_debug("Sending %s a confirmation", peer->uname); ++ attrd_send_message(peer, reply, false); ++ free_xml(reply); ++ } ++ + pcmk__reset_request(&request); + } + } +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 78df0d0..9c792b2 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -65,6 +65,18 @@ handle_clear_failure_request(pcmk__request_t *request) + } + } + ++static xmlNode * ++handle_confirm_request(pcmk__request_t *request) ++{ ++ if (request->peer != NULL) { ++ crm_debug("Received confirmation from %s", request->peer); ++ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); ++ return NULL; ++ } else { ++ return handle_unknown_request(request); ++ } ++} ++ + static xmlNode * + handle_flush_request(pcmk__request_t *request) + { +@@ -190,6 +202,7 @@ attrd_register_handlers(void) + { + pcmk__server_command_t handlers[] = { + { PCMK__ATTRD_CMD_CLEAR_FAILURE, handle_clear_failure_request }, ++ { PCMK__ATTRD_CMD_CONFIRM, handle_confirm_request }, + { PCMK__ATTRD_CMD_FLUSH, handle_flush_request }, + { PCMK__ATTRD_CMD_PEER_REMOVE, handle_remove_request }, + { PCMK__ATTRD_CMD_QUERY, handle_query_request }, +diff --git a/include/crm_internal.h b/include/crm_internal.h +index 63a1726..f60e7b4 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -108,6 +108,7 @@ + #define PCMK__ATTRD_CMD_SYNC "sync" + #define PCMK__ATTRD_CMD_SYNC_RESPONSE "sync-response" + #define PCMK__ATTRD_CMD_CLEAR_FAILURE "clear-failure" ++#define PCMK__ATTRD_CMD_CONFIRM "confirm" + + #define PCMK__CONTROLD_CMD_NODES "list-nodes" + +-- +2.31.1 + +From dfb730e9ced9dc75886fda9452c584860573fe30 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 26 Oct 2022 15:58:00 -0400 +Subject: [PATCH 18/26] Feature: daemons: Keep track of #attrd-protocol from + each peer. + +This information can be used in the future when dealing with +cluster-wide sync points to know which peers we are waiting on a reply +from. +--- + daemons/attrd/attrd_corosync.c | 3 +- + daemons/attrd/attrd_utils.c | 60 ++++++++++++++++++++++++++++++--- + daemons/attrd/pacemaker-attrd.h | 4 ++- + 3 files changed, 60 insertions(+), 7 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 1245d9c..6f88ab6 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -268,6 +268,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da + // Remove votes from cluster nodes that leave, in case election in progress + if (gone && !is_remote) { + attrd_remove_voter(peer); ++ attrd_remove_peer_protocol_ver(peer->uname); + + // Ensure remote nodes that come up are in the remote node cache + } else if (!gone && is_remote) { +@@ -395,7 +396,7 @@ attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter) + * version, check to see if it's a new minimum version. + */ + if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) { +- attrd_update_minimum_protocol_ver(value); ++ attrd_update_minimum_protocol_ver(peer->uname, value); + } + } + +diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c +index 00b879b..421faed 100644 +--- a/daemons/attrd/attrd_utils.c ++++ b/daemons/attrd/attrd_utils.c +@@ -29,6 +29,11 @@ static bool requesting_shutdown = false; + static bool shutting_down = false; + static GMainLoop *mloop = NULL; + ++/* A hash table storing information on the protocol version of each peer attrd. ++ * The key is the peer's uname, and the value is the protocol version number. ++ */ ++GHashTable *peer_protocol_vers = NULL; ++ + /*! + * \internal + * \brief Set requesting_shutdown state +@@ -94,6 +99,10 @@ attrd_shutdown(int nsig) + mainloop_destroy_signal(SIGTRAP); + + attrd_free_waitlist(); ++ if (peer_protocol_vers != NULL) { ++ g_hash_table_destroy(peer_protocol_vers); ++ peer_protocol_vers = NULL; ++ } + + if ((mloop == NULL) || !g_main_loop_is_running(mloop)) { + /* If there's no main loop active, just exit. This should be possible +@@ -273,16 +282,57 @@ attrd_free_attribute(gpointer data) + } + } + ++/*! ++ * \internal ++ * \brief When a peer node leaves the cluster, stop tracking its protocol version. ++ * ++ * \param[in] host The peer node's uname to be removed ++ */ ++void ++attrd_remove_peer_protocol_ver(const char *host) ++{ ++ if (peer_protocol_vers != NULL) { ++ g_hash_table_remove(peer_protocol_vers, host); ++ } ++} ++ ++/*! ++ * \internal ++ * \brief When a peer node broadcasts a message with its protocol version, keep ++ * track of that information. ++ * ++ * We keep track of each peer's protocol version so we know which peers to ++ * expect confirmation messages from when handling cluster-wide sync points. ++ * We additionally keep track of the lowest protocol version supported by all ++ * peers so we know when we can send IPC messages containing more than one ++ * request. ++ * ++ * \param[in] host The peer node's uname to be tracked ++ * \param[in] value The peer node's protocol version ++ */ + void +-attrd_update_minimum_protocol_ver(const char *value) ++attrd_update_minimum_protocol_ver(const char *host, const char *value) + { + int ver; + ++ if (peer_protocol_vers == NULL) { ++ peer_protocol_vers = pcmk__strkey_table(free, NULL); ++ } ++ + pcmk__scan_min_int(value, &ver, 0); + +- if (ver > 0 && (minimum_protocol_version == -1 || ver < minimum_protocol_version)) { +- minimum_protocol_version = ver; +- crm_trace("Set minimum attrd protocol version to %d", +- minimum_protocol_version); ++ if (ver > 0) { ++ char *host_name = strdup(host); ++ ++ /* Record the peer attrd's protocol version. */ ++ CRM_ASSERT(host_name != NULL); ++ g_hash_table_insert(peer_protocol_vers, host_name, GINT_TO_POINTER(ver)); ++ ++ /* If the protocol version is a new minimum, record it as such. */ ++ if (minimum_protocol_version == -1 || ver < minimum_protocol_version) { ++ minimum_protocol_version = ver; ++ crm_trace("Set minimum attrd protocol version to %d", ++ minimum_protocol_version); ++ } + } + } +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 25f7c8a..302ef63 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -145,6 +145,7 @@ typedef struct attribute_value_s { + + extern crm_cluster_t *attrd_cluster; + extern GHashTable *attributes; ++extern GHashTable *peer_protocol_vers; + + #define CIB_OP_TIMEOUT_S 120 + +@@ -177,7 +178,8 @@ void attrd_write_attributes(bool all, bool ignore_delay); + void attrd_write_or_elect_attribute(attribute_t *a); + + extern int minimum_protocol_version; +-void attrd_update_minimum_protocol_ver(const char *value); ++void attrd_remove_peer_protocol_ver(const char *host); ++void attrd_update_minimum_protocol_ver(const char *host, const char *value); + + mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr); + +-- +2.31.1 + +From 945f0fe51d3bf69c2cb1258b394f2f11b8996525 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 27 Oct 2022 14:42:59 -0400 +Subject: [PATCH 19/26] Feature: daemons: Handle cluster-wide sync points in + attrd. + +When an attrd receives an IPC request to update some value, record the +protocol versions of all peer attrds. Additionally register a function +that will be called when all confirmations are received. + +The originating IPC cilent (attrd_updater for instance) will sit there +waiting for an ACK until its timeout is hit. + +As each confirmation message comes back to attrd, mark it off the list +of peers we are waiting on. When no more peers are expected, call the +previously registered function. + +For attribute updates, this function just sends an ack back to +attrd_updater. + +Fixes T35 +--- + daemons/attrd/attrd_corosync.c | 1 + + daemons/attrd/attrd_ipc.c | 4 + + daemons/attrd/attrd_messages.c | 10 ++ + daemons/attrd/attrd_sync.c | 260 +++++++++++++++++++++++++++++++- + daemons/attrd/attrd_utils.c | 2 + + daemons/attrd/pacemaker-attrd.h | 8 + + 6 files changed, 281 insertions(+), 4 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 6f88ab6..37701aa 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -269,6 +269,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da + if (gone && !is_remote) { + attrd_remove_voter(peer); + attrd_remove_peer_protocol_ver(peer->uname); ++ attrd_do_not_expect_from_peer(peer->uname); + + // Ensure remote nodes that come up are in the remote node cache + } else if (!gone && is_remote) { +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 0fc5e93..c70aa1b 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -393,6 +393,7 @@ attrd_client_update(pcmk__request_t *request) + * the response ACK is not sent until this attrd broadcasts the update + * and receives its own confirmation back from all peers. + */ ++ attrd_expect_confirmations(request, attrd_cluster_sync_point_update); + attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ + + } else { +@@ -456,6 +457,9 @@ attrd_ipc_closed(qb_ipcs_connection_t *c) + /* Remove the client from the sync point waitlist if it's present. */ + attrd_remove_client_from_waitlist(client); + ++ /* And no longer wait for confirmations from any peers. */ ++ attrd_do_not_wait_for_client(client); ++ + pcmk__free_client(client); + } + +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 9c792b2..f7b9c7c 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -69,7 +69,17 @@ static xmlNode * + handle_confirm_request(pcmk__request_t *request) + { + if (request->peer != NULL) { ++ int callid; ++ + crm_debug("Received confirmation from %s", request->peer); ++ ++ if (crm_element_value_int(request->xml, XML_LRM_ATTR_CALLID, &callid) == -1) { ++ pcmk__set_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, ++ "Could not get callid from XML"); ++ } else { ++ attrd_handle_confirmation(callid, request->peer); ++ } ++ + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index e9690b5..d3d7108 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -34,6 +34,51 @@ struct waitlist_node { + uint32_t flags; + }; + ++/* A hash table storing information on in-progress IPC requests that are awaiting ++ * confirmations. These requests are currently being processed by peer attrds and ++ * we are waiting to receive confirmation messages from each peer indicating that ++ * processing is complete. ++ * ++ * Multiple requests could be waiting on confirmations at the same time. ++ * ++ * The key is the unique callid for the IPC request, and the value is a ++ * confirmation_action struct. ++ */ ++static GHashTable *expected_confirmations = NULL; ++ ++/*! ++ * \internal ++ * \brief A structure describing a single IPC request that is awaiting confirmations ++ */ ++struct confirmation_action { ++ /*! ++ * \brief A list of peer attrds that we are waiting to receive confirmation ++ * messages from ++ * ++ * This list is dynamic - as confirmations arrive from peer attrds, they will ++ * be removed from this list. When the list is empty, all peers have processed ++ * the request and the associated confirmation action will be taken. ++ */ ++ GList *respondents; ++ ++ /*! ++ * \brief A function to run when all confirmations have been received ++ */ ++ attrd_confirmation_action_fn fn; ++ ++ /*! ++ * \brief Information required to construct and send a reply to the client ++ */ ++ char *client_id; ++ uint32_t ipc_id; ++ uint32_t flags; ++ ++ /*! ++ * \brief The XML request containing the callid associated with this action ++ */ ++ void *xml; ++}; ++ + static void + next_key(void) + { +@@ -114,12 +159,13 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) + wl->ipc_id = request->ipc_id; + wl->flags = request->flags; + +- crm_debug("Added client %s to waitlist for %s sync point", +- wl->client_id, sync_point_str(wl->sync_point)); +- + next_key(); + pcmk__intkey_table_insert(waitlist, waitlist_client, wl); + ++ crm_trace("Added client %s to waitlist for %s sync point", ++ wl->client_id, sync_point_str(wl->sync_point)); ++ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); ++ + /* And then add the key to the request XML so we can uniquely identify + * it when it comes time to issue the ACK. + */ +@@ -166,6 +212,7 @@ attrd_remove_client_from_waitlist(pcmk__client_t *client) + + if (wl->client_id == client->id) { + g_hash_table_iter_remove(&iter); ++ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); + } + } + } +@@ -206,7 +253,7 @@ attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) + return; + } + +- crm_debug("Alerting client %s for reached %s sync point", ++ crm_trace("Alerting client %s for reached %s sync point", + wl->client_id, sync_point_str(wl->sync_point)); + + client = pcmk__find_client_by_id(wl->client_id); +@@ -218,9 +265,28 @@ attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) + + /* And then remove the client so it doesn't get alerted again. */ + pcmk__intkey_table_remove(waitlist, callid); ++ ++ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); + } + } + ++/*! ++ * \internal ++ * \brief Action to take when a cluster sync point is hit for a ++ * PCMK__ATTRD_CMD_UPDATE* message. ++ * ++ * \param[in] xml The request that should be passed along to ++ * attrd_ack_waitlist_clients. This should be the original ++ * IPC request containing the callid for this update message. ++ */ ++int ++attrd_cluster_sync_point_update(xmlNode *xml) ++{ ++ crm_trace("Hit cluster sync point for attribute update"); ++ attrd_ack_waitlist_clients(attrd_sync_point_cluster, xml); ++ return pcmk_rc_ok; ++} ++ + /*! + * \internal + * \brief Return the sync point attribute for an IPC request +@@ -268,3 +334,189 @@ attrd_request_has_sync_point(xmlNode *xml) + { + return attrd_request_sync_point(xml) != NULL; + } ++ ++static void ++free_action(gpointer data) ++{ ++ struct confirmation_action *action = (struct confirmation_action *) data; ++ g_list_free_full(action->respondents, free); ++ free_xml(action->xml); ++ free(action->client_id); ++ free(action); ++} ++ ++/*! ++ * \internal ++ * \brief When a peer disconnects from the cluster, no longer wait for its confirmation ++ * for any IPC action. If this peer is the last one being waited on, this will ++ * trigger the confirmation action. ++ * ++ * \param[in] host The disconnecting peer attrd's uname ++ */ ++void ++attrd_do_not_expect_from_peer(const char *host) ++{ ++ GList *keys = g_hash_table_get_keys(expected_confirmations); ++ ++ crm_trace("Removing peer %s from expected confirmations", host); ++ ++ for (GList *node = keys; node != NULL; node = node->next) { ++ int callid = *(int *) node->data; ++ attrd_handle_confirmation(callid, host); ++ } ++ ++ g_list_free(keys); ++} ++ ++/*! ++ * \internal ++ * \brief When a client disconnects from the cluster, no longer wait on confirmations ++ * for it. Because the peer attrds may still be processing the original IPC ++ * message, they may still send us confirmations. However, we will take no ++ * action on them. ++ * ++ * \param[in] client The disconnecting client ++ */ ++void ++attrd_do_not_wait_for_client(pcmk__client_t *client) ++{ ++ GHashTableIter iter; ++ gpointer value; ++ ++ if (expected_confirmations == NULL) { ++ return; ++ } ++ ++ g_hash_table_iter_init(&iter, expected_confirmations); ++ ++ while (g_hash_table_iter_next(&iter, NULL, &value)) { ++ struct confirmation_action *action = (struct confirmation_action *) value; ++ ++ if (pcmk__str_eq(action->client_id, client->id, pcmk__str_none)) { ++ crm_trace("Removing client %s from expected confirmations", client->id); ++ g_hash_table_iter_remove(&iter); ++ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); ++ break; ++ } ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Register some action to be taken when IPC request confirmations are ++ * received ++ * ++ * When this function is called, a list of all peer attrds that support confirming ++ * requests is generated. As confirmations from these peer attrds are received, ++ * they are removed from this list. When the list is empty, the registered action ++ * will be called. ++ * ++ * \note This function should always be called before attrd_send_message is called ++ * to broadcast to the peers to ensure that we know what replies we are ++ * waiting on. Otherwise, it is possible the peer could finish and confirm ++ * before we know to expect it. ++ * ++ * \param[in] request The request that is awaiting confirmations ++ * \param[in] fn A function to be run after all confirmations are received ++ */ ++void ++attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn) ++{ ++ struct confirmation_action *action = NULL; ++ GHashTableIter iter; ++ gpointer host, ver; ++ GList *respondents = NULL; ++ int callid; ++ ++ if (expected_confirmations == NULL) { ++ expected_confirmations = pcmk__intkey_table((GDestroyNotify) free_action); ++ } ++ ++ if (crm_element_value_int(request->xml, XML_LRM_ATTR_CALLID, &callid) == -1) { ++ crm_err("Could not get callid from xml"); ++ return; ++ } ++ ++ if (pcmk__intkey_table_lookup(expected_confirmations, callid)) { ++ crm_err("Already waiting on confirmations for call id %d", callid); ++ return; ++ } ++ ++ g_hash_table_iter_init(&iter, peer_protocol_vers); ++ while (g_hash_table_iter_next(&iter, &host, &ver)) { ++ if (GPOINTER_TO_INT(ver) >= 5) { ++ char *s = strdup((char *) host); ++ ++ CRM_ASSERT(s != NULL); ++ respondents = g_list_prepend(respondents, s); ++ } ++ } ++ ++ action = calloc(1, sizeof(struct confirmation_action)); ++ CRM_ASSERT(action != NULL); ++ ++ action->respondents = respondents; ++ action->fn = fn; ++ action->xml = copy_xml(request->xml); ++ ++ action->client_id = strdup(request->ipc_client->id); ++ CRM_ASSERT(action->client_id != NULL); ++ ++ action->ipc_id = request->ipc_id; ++ action->flags = request->flags; ++ ++ pcmk__intkey_table_insert(expected_confirmations, callid, action); ++ crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(respondents)); ++ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); ++} ++ ++void ++attrd_free_confirmations(void) ++{ ++ if (expected_confirmations != NULL) { ++ g_hash_table_destroy(expected_confirmations); ++ expected_confirmations = NULL; ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Process a confirmation message from a peer attrd ++ * ++ * This function is called every time a PCMK__ATTRD_CMD_CONFIRM message is ++ * received from a peer attrd. If this is the last confirmation we are waiting ++ * on for a given operation, the registered action will be called. ++ * ++ * \param[in] callid The unique callid for the XML IPC request ++ * \param[in] host The confirming peer attrd's uname ++ */ ++void ++attrd_handle_confirmation(int callid, const char *host) ++{ ++ struct confirmation_action *action = NULL; ++ GList *node = NULL; ++ ++ if (expected_confirmations == NULL) { ++ return; ++ } ++ ++ action = pcmk__intkey_table_lookup(expected_confirmations, callid); ++ if (action == NULL) { ++ return; ++ } ++ ++ node = g_list_find_custom(action->respondents, host, (GCompareFunc) strcasecmp); ++ ++ if (node == NULL) { ++ return; ++ } ++ ++ action->respondents = g_list_remove(action->respondents, node->data); ++ crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(action->respondents)); ++ ++ if (action->respondents == NULL) { ++ action->fn(action->xml); ++ pcmk__intkey_table_remove(expected_confirmations, callid); ++ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); ++ } ++} +diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c +index 421faed..f3a2059 100644 +--- a/daemons/attrd/attrd_utils.c ++++ b/daemons/attrd/attrd_utils.c +@@ -99,6 +99,8 @@ attrd_shutdown(int nsig) + mainloop_destroy_signal(SIGTRAP); + + attrd_free_waitlist(); ++ attrd_free_confirmations(); ++ + if (peer_protocol_vers != NULL) { + g_hash_table_destroy(peer_protocol_vers); + peer_protocol_vers = NULL; +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 302ef63..bcc329d 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -191,8 +191,16 @@ enum attrd_sync_point { + attrd_sync_point_cluster, + }; + ++typedef int (*attrd_confirmation_action_fn)(xmlNode *); ++ + void attrd_add_client_to_waitlist(pcmk__request_t *request); + void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); ++int attrd_cluster_sync_point_update(xmlNode *xml); ++void attrd_do_not_expect_from_peer(const char *host); ++void attrd_do_not_wait_for_client(pcmk__client_t *client); ++void attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn); ++void attrd_free_confirmations(void); ++void attrd_handle_confirmation(int callid, const char *host); + void attrd_remove_client_from_waitlist(pcmk__client_t *client); + const char *attrd_request_sync_point(xmlNode *xml); + bool attrd_request_has_sync_point(xmlNode *xml); +-- +2.31.1 + +From 07a032a7eb2f03dce18a7c94c56b8c837dedda15 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 28 Oct 2022 14:54:15 -0400 +Subject: [PATCH 20/26] Refactor: daemons: Add some attrd version checking + macros. + +These are just to make it a little more obvious what is actually being +asked in the code, instead of having magic numbers sprinkled around. +--- + daemons/attrd/attrd_ipc.c | 2 +- + daemons/attrd/attrd_sync.c | 2 +- + daemons/attrd/pacemaker-attrd.h | 3 +++ + 3 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index c70aa1b..16bfff4 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -294,7 +294,7 @@ attrd_client_update(pcmk__request_t *request) + * two ways we can handle that. + */ + if (xml_has_children(xml)) { +- if (minimum_protocol_version >= 4) { ++ if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) { + /* First, if all peers support a certain protocol version, we can + * just broadcast the big message and they'll handle it. However, + * we also need to apply all the transformations in this function +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index d3d7108..e48f82e 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -444,7 +444,7 @@ attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_f + + g_hash_table_iter_init(&iter, peer_protocol_vers); + while (g_hash_table_iter_next(&iter, &host, &ver)) { +- if (GPOINTER_TO_INT(ver) >= 5) { ++ if (ATTRD_SUPPORTS_CONFIRMATION(GPOINTER_TO_INT(ver))) { + char *s = strdup((char *) host); + + CRM_ASSERT(s != NULL); +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index bcc329d..83d7c6b 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -45,6 +45,9 @@ + */ + #define ATTRD_PROTOCOL_VERSION "5" + ++#define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4) ++#define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5) ++ + #define attrd_send_ack(client, id, flags) \ + pcmk__ipc_send_ack((client), (id), (flags), "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE) + +-- +2.31.1 + +From 811361b96c6f26a1f5eccc54b6e8bf6e6fd003be Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 31 Oct 2022 12:53:22 -0400 +Subject: [PATCH 21/26] Low: attrd: Fix removing clients from the waitlist when + they disconnect. + +The client ID is a string, so it must be compared like a string. +--- + daemons/attrd/attrd_sync.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index e48f82e..c9b4784 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -210,7 +210,7 @@ attrd_remove_client_from_waitlist(pcmk__client_t *client) + while (g_hash_table_iter_next(&iter, NULL, &value)) { + struct waitlist_node *wl = (struct waitlist_node *) value; + +- if (wl->client_id == client->id) { ++ if (pcmk__str_eq(wl->client_id, client->id, pcmk__str_none)) { + g_hash_table_iter_remove(&iter); + crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); + } +-- +2.31.1 + +From 4e933ad14456af85c60701410c3b23b4eab03f86 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 1 Nov 2022 12:35:12 -0400 +Subject: [PATCH 22/26] Feature: daemons: Handle an attrd client timing out. + +If the update confirmations do not come back in time, use a main loop +timer to remove the client from the table. +--- + daemons/attrd/attrd_sync.c | 49 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 49 insertions(+) + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index c9b4784..9d07796 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -61,6 +61,12 @@ struct confirmation_action { + */ + GList *respondents; + ++ /*! ++ * \brief A timer that will be used to remove the client should it time out ++ * before receiving all confirmations ++ */ ++ mainloop_timer_t *timer; ++ + /*! + * \brief A function to run when all confirmations have been received + */ +@@ -340,11 +346,51 @@ free_action(gpointer data) + { + struct confirmation_action *action = (struct confirmation_action *) data; + g_list_free_full(action->respondents, free); ++ mainloop_timer_del(action->timer); + free_xml(action->xml); + free(action->client_id); + free(action); + } + ++/* Remove an IPC request from the expected_confirmations table if the peer attrds ++ * don't respond before the timeout is hit. We set the timeout to 15s. The exact ++ * number isn't critical - we just want to make sure that the table eventually gets ++ * cleared of things that didn't complete. ++ */ ++static gboolean ++confirmation_timeout_cb(gpointer data) ++{ ++ struct confirmation_action *action = (struct confirmation_action *) data; ++ ++ GHashTableIter iter; ++ gpointer value; ++ ++ if (expected_confirmations == NULL) { ++ return G_SOURCE_REMOVE; ++ } ++ ++ g_hash_table_iter_init(&iter, expected_confirmations); ++ ++ while (g_hash_table_iter_next(&iter, NULL, &value)) { ++ if (value == action) { ++ pcmk__client_t *client = pcmk__find_client_by_id(action->client_id); ++ if (client == NULL) { ++ return G_SOURCE_REMOVE; ++ } ++ ++ crm_trace("Timed out waiting for confirmations for client %s", client->id); ++ pcmk__ipc_send_ack(client, action->ipc_id, action->flags | crm_ipc_client_response, ++ "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_TIMEOUT); ++ ++ g_hash_table_iter_remove(&iter); ++ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); ++ break; ++ } ++ } ++ ++ return G_SOURCE_REMOVE; ++} ++ + /*! + * \internal + * \brief When a peer disconnects from the cluster, no longer wait for its confirmation +@@ -465,6 +511,9 @@ attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_f + action->ipc_id = request->ipc_id; + action->flags = request->flags; + ++ action->timer = mainloop_timer_add(NULL, 15000, FALSE, confirmation_timeout_cb, action); ++ mainloop_timer_start(action->timer); ++ + pcmk__intkey_table_insert(expected_confirmations, callid, action); + crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(respondents)); + crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); +-- +2.31.1 + +From 101896383cbe0103c98078e46540c076af08f040 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 2 Nov 2022 14:40:30 -0400 +Subject: [PATCH 23/26] Refactor: Demote a sync point related message to trace. + +--- + daemons/attrd/attrd_corosync.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 37701aa..5cbed7e 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -633,7 +633,7 @@ attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, + * point, process that now. + */ + if (handle_sync_point) { +- crm_debug("Hit local sync point for attribute update"); ++ crm_trace("Hit local sync point for attribute update"); + attrd_ack_waitlist_clients(attrd_sync_point_local, xml); + } + } +-- +2.31.1 + +From acd13246d4c2bef7982ca103e34896efcad22348 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 3 Nov 2022 10:29:20 -0400 +Subject: [PATCH 24/26] Low: daemons: Avoid infinite confirm loops in attrd. + +On the sending side, do not add confirm="yes" to a message with +op="confirm". On the receiving side, do not confirm a message with +op="confirm" even if confirm="yes" is set. +--- + daemons/attrd/attrd_corosync.c | 3 ++- + daemons/attrd/attrd_messages.c | 6 +++++- + 2 files changed, 7 insertions(+), 2 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 5cbed7e..88c1ecc 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -74,7 +74,8 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml) + /* Having finished handling the request, check to see if the originating + * peer requested confirmation. If so, send that confirmation back now. + */ +- if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM)) { ++ if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) && ++ !pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { + int callid = 0; + xmlNode *reply = NULL; + +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index f7b9c7c..184176a 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -310,6 +310,8 @@ attrd_broadcast_protocol(void) + gboolean + attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm) + { ++ const char *op = crm_element_value(data, PCMK__XA_TASK); ++ + crm_xml_add(data, F_TYPE, T_ATTRD); + crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); + +@@ -317,7 +319,9 @@ attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm) + * be all if node is NULL) that the message has been received and + * acted upon. + */ +- pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); ++ if (!pcmk__str_eq(op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { ++ pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); ++ } + + attrd_xml_add_writer(data); + return send_cluster_message(node, crm_msg_attrd, data, TRUE); +-- +2.31.1 + +From 115e6c3a0d8db4df3eccf6da1c344168799f890d Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 15 Nov 2022 09:35:28 -0500 +Subject: [PATCH 25/26] Fix: daemons: Check for NULL in + attrd_do_not_expect_from_peer. + +--- + daemons/attrd/attrd_sync.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index 9d07796..6936771 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -402,7 +402,13 @@ confirmation_timeout_cb(gpointer data) + void + attrd_do_not_expect_from_peer(const char *host) + { +- GList *keys = g_hash_table_get_keys(expected_confirmations); ++ GList *keys = NULL; ++ ++ if (expected_confirmations == NULL) { ++ return; ++ } ++ ++ keys = g_hash_table_get_keys(expected_confirmations); + + crm_trace("Removing peer %s from expected confirmations", host); + +-- +2.31.1 + +From 05da14f97ccd4f63f53801acc107ad661e5fd0c8 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 16 Nov 2022 17:37:44 -0500 +Subject: [PATCH 26/26] Low: daemons: Support cluster-wide sync points for + multi IPC messages. + +Supporting cluster-wide sync points means attrd_expect_confirmations +needs to be called, and then attrd_send_message needs "true" as a third +argument. This indicates attrd wants confirmations back from all its +peers when they have applied the update. + +We're already doing this at the end of attrd_client_update for +single-update IPC messages, and handling it for multi-update messages is +a simple matter of breaking that code out into a function and making +sure it's called. + +Note that this leaves two other spots where sync points still need to be +dealt with: + +* An update message that uses a regex. See + https://projects.clusterlabs.org/T600 for details. + +* A multi-update IPC message in a cluster where that is not supported. + See https://projects.clusterlabs.org/T601 for details. +--- + daemons/attrd/attrd_ipc.c | 43 ++++++++++++++++++++++----------------- + 1 file changed, 24 insertions(+), 19 deletions(-) + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 16bfff4..8c5660d 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -283,6 +283,28 @@ handle_value_expansion(const char **value, xmlNode *xml, const char *op, + return pcmk_rc_ok; + } + ++static void ++send_update_msg_to_cluster(pcmk__request_t *request, xmlNode *xml) ++{ ++ if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { ++ /* The client is waiting on the cluster-wide sync point. In this case, ++ * the response ACK is not sent until this attrd broadcasts the update ++ * and receives its own confirmation back from all peers. ++ */ ++ attrd_expect_confirmations(request, attrd_cluster_sync_point_update); ++ attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ ++ ++ } else { ++ /* The client is either waiting on the local sync point or was not ++ * waiting on any sync point at all. For the local sync point, the ++ * response ACK is sent in attrd_peer_update. For clients not ++ * waiting on any sync point, the response ACK is sent in ++ * handle_update_request immediately before this function was called. ++ */ ++ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ ++ } ++} ++ + xmlNode * + attrd_client_update(pcmk__request_t *request) + { +@@ -314,7 +336,7 @@ attrd_client_update(pcmk__request_t *request) + } + } + +- attrd_send_message(NULL, xml, false); ++ send_update_msg_to_cluster(request, xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + + } else { +@@ -388,24 +410,7 @@ attrd_client_update(pcmk__request_t *request) + crm_debug("Broadcasting %s[%s]=%s%s", attr, crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME), + value, (attrd_election_won()? " (writer)" : "")); + +- if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { +- /* The client is waiting on the cluster-wide sync point. In this case, +- * the response ACK is not sent until this attrd broadcasts the update +- * and receives its own confirmation back from all peers. +- */ +- attrd_expect_confirmations(request, attrd_cluster_sync_point_update); +- attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ +- +- } else { +- /* The client is either waiting on the local sync point or was not +- * waiting on any sync point at all. For the local sync point, the +- * response ACK is sent in attrd_peer_update. For clients not +- * waiting on any sync point, the response ACK is sent in +- * handle_update_request immediately before this function was called. +- */ +- attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ +- } +- ++ send_update_msg_to_cluster(request, xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } +-- +2.31.1 + diff --git a/SOURCES/002-acl_group.patch b/SOURCES/002-acl_group.patch deleted file mode 100644 index 3114887..0000000 --- a/SOURCES/002-acl_group.patch +++ /dev/null @@ -1,425 +0,0 @@ -From 80c64be80f2bffdcf5d2432e1e59d633fd68d516 Mon Sep 17 00:00:00 2001 -From: Grace Chin -Date: Mon, 13 Jun 2022 09:02:32 -0400 -Subject: [PATCH 1/4] Add pcmk__is_user_in_group() - ---- - lib/common/crmcommon_private.h | 3 +++ - lib/common/utils.c | 33 +++++++++++++++++++++++++++++++++ - 2 files changed, 36 insertions(+) - -diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h -index 6b7be9c68..c2fcb0adf 100644 ---- a/lib/common/crmcommon_private.h -+++ b/lib/common/crmcommon_private.h -@@ -96,6 +96,9 @@ void pcmk__free_acls(GList *acls); - G_GNUC_INTERNAL - void pcmk__unpack_acl(xmlNode *source, xmlNode *target, const char *user); - -+G_GNUC_INTERNAL -+bool pcmk__is_user_in_group(const char *user, const char *group); -+ - G_GNUC_INTERNAL - void pcmk__apply_acl(xmlNode *xml); - -diff --git a/lib/common/utils.c b/lib/common/utils.c -index 2dfbef278..f23583acb 100644 ---- a/lib/common/utils.c -+++ b/lib/common/utils.c -@@ -27,6 +27,7 @@ - #include - #include - #include -+#include - - #include - -@@ -53,6 +54,38 @@ gboolean crm_config_error = FALSE; - gboolean crm_config_warning = FALSE; - char *crm_system_name = NULL; - -+bool -+pcmk__is_user_in_group(const char *user, const char *group) -+{ -+ struct group *grent; -+ char **gr_mem; -+ -+ if (user == NULL || group == NULL) { -+ return false; -+ } -+ -+ setgrent(); -+ while ((grent = getgrent()) != NULL) { -+ if (grent->gr_mem == NULL) { -+ continue; -+ } -+ -+ if(strcmp(group, grent->gr_name) != 0) { -+ continue; -+ } -+ -+ gr_mem = grent->gr_mem; -+ while (*gr_mem != NULL) { -+ if (!strcmp(user, *gr_mem++)) { -+ endgrent(); -+ return true; -+ } -+ } -+ } -+ endgrent(); -+ return false; -+} -+ - int - crm_user_lookup(const char *name, uid_t * uid, gid_t * gid) - { --- -2.31.1 - - -From 5fbe5c310de00390fb36d866823a7745ba4812e3 Mon Sep 17 00:00:00 2001 -From: Grace Chin -Date: Mon, 13 Jun 2022 09:04:57 -0400 -Subject: [PATCH 2/4] Add unit test for pcmk__is_user_in_group() - ---- - lib/common/Makefile.am | 2 +- - lib/common/mock.c | 31 +++++-- - lib/common/mock_private.h | 11 +++ - lib/common/tests/acl/Makefile.am | 11 ++- - .../tests/acl/pcmk__is_user_in_group_test.c | 92 +++++++++++++++++++ - 5 files changed, 137 insertions(+), 10 deletions(-) - create mode 100644 lib/common/tests/acl/pcmk__is_user_in_group_test.c - -diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am -index d7aae53bf..04d56dc3c 100644 ---- a/lib/common/Makefile.am -+++ b/lib/common/Makefile.am -@@ -94,7 +94,7 @@ libcrmcommon_la_SOURCES += watchdog.c - libcrmcommon_la_SOURCES += xml.c - libcrmcommon_la_SOURCES += xpath.c - --WRAPPED = calloc getenv getpwnam_r uname -+WRAPPED = calloc getenv getpwnam_r uname setgrent getgrent endgrent - WRAPPED_FLAGS = $(foreach fn,$(WRAPPED),-Wl,--wrap=$(fn)) - - libcrmcommon_test_la_SOURCES = $(libcrmcommon_la_SOURCES) -diff --git a/lib/common/mock.c b/lib/common/mock.c -index 55812ddbc..fa9431e6d 100644 ---- a/lib/common/mock.c -+++ b/lib/common/mock.c -@@ -11,6 +11,7 @@ - #include - #include - #include -+#include - - #include "mock_private.h" - -@@ -18,13 +19,13 @@ - * libcrmcommon_test.a, not into libcrmcommon.so. It is used to support - * constructing mock versions of library functions for unit testing. - * -- * Each unit test will only ever want to use a mocked version of one or two -- * library functions. However, we need to mark all the mocked functions as -- * wrapped (with -Wl,--wrap= in the LDFLAGS) in libcrmcommon_test.a so that -- * all those unit tests can share the same special test library. The unit -- * test then defines its own wrapped function. Because a unit test won't -- * define every single wrapped function, there will be undefined references -- * at link time. -+ * Each unit test will only ever want to use a mocked version of a few -+ * library functions (i.e. not all of them). However, we need to mark all -+ * the mocked functions as wrapped (with -Wl,--wrap= in the LDFLAGS) in -+ * libcrmcommon_test.a so that all those unit tests can share the same -+ * special test library. The unit test then defines its own wrapped -+ * function. Because a unit test won't define every single wrapped -+ * function, there will be undefined references at link time. - * - * This file takes care of those undefined references. It defines a - * wrapped version of every function that simply calls the real libc -@@ -74,3 +75,19 @@ int __attribute__((weak)) - __wrap_uname(struct utsname *buf) { - return __real_uname(buf); - } -+ -+void __attribute__((weak)) -+__wrap_setgrent(void) { -+ __real_setgrent(); -+} -+ -+struct group * __attribute__((weak)) -+__wrap_getgrent(void) { -+ return __real_getgrent(); -+} -+ -+void __attribute__((weak)) -+__wrap_endgrent(void) { -+ __real_endgrent(); -+} -+ -diff --git a/lib/common/mock_private.h b/lib/common/mock_private.h -index 3df7c9839..0c1134cc3 100644 ---- a/lib/common/mock_private.h -+++ b/lib/common/mock_private.h -@@ -14,6 +14,7 @@ - #include - #include - #include -+#include - - /* This header is for the sole use of libcrmcommon_test. */ - -@@ -31,4 +32,14 @@ int __wrap_getpwnam_r(const char *name, struct passwd *pwd, - int __real_uname(struct utsname *buf); - int __wrap_uname(struct utsname *buf); - -+void __real_setgrent(void); -+void __wrap_setgrent(void); -+ -+struct group *__real_getgrent(void); -+struct group *__wrap_getgrent(void); -+ -+void __real_endgrent(void); -+void __wrap_endgrent(void); -+ -+ - #endif // MOCK_PRIVATE__H -diff --git a/lib/common/tests/acl/Makefile.am b/lib/common/tests/acl/Makefile.am -index 679c9cb8e..a73fc354c 100644 ---- a/lib/common/tests/acl/Makefile.am -+++ b/lib/common/tests/acl/Makefile.am -@@ -1,19 +1,26 @@ - # --# Copyright 2021 the Pacemaker project contributors -+# Copyright 2021-2022 the Pacemaker project contributors - # - # The version control history for this file may have further details. - # - # This source code is licensed under the GNU General Public License version 2 - # or later (GPLv2+) WITHOUT ANY WARRANTY. - # --AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_builddir)/include -+AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_builddir)/include -I$(top_srcdir)/lib/common - LDADD = $(top_builddir)/lib/common/libcrmcommon.la -lcmocka - -+pcmk__is_user_in_group_test_LDADD = $(top_builddir)/lib/common/libcrmcommon_test.la -lcmocka -+pcmk__is_user_in_group_test_LDFLAGS = \ -+ -Wl,--wrap=setgrent \ -+ -Wl,--wrap=getgrent \ -+ -Wl,--wrap=endgrent -+ - include $(top_srcdir)/mk/tap.mk - - # Add "_test" to the end of all test program names to simplify .gitignore. - - check_PROGRAMS = \ -+ pcmk__is_user_in_group_test \ - pcmk_acl_required_test \ - xml_acl_denied_test \ - xml_acl_enabled_test -diff --git a/lib/common/tests/acl/pcmk__is_user_in_group_test.c b/lib/common/tests/acl/pcmk__is_user_in_group_test.c -new file mode 100644 -index 000000000..67b8c2c7c ---- /dev/null -+++ b/lib/common/tests/acl/pcmk__is_user_in_group_test.c -@@ -0,0 +1,92 @@ -+/* -+ * Copyright 2020-2022 the Pacemaker project contributors -+ * -+ * The version control history for this file may have further details. -+ * -+ * This source code is licensed under the GNU Lesser General Public License -+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. -+ */ -+ -+#include -+#include -+#include "../../crmcommon_private.h" -+ -+#include "mock_private.h" -+ -+#include -+#include -+#include -+#include -+#include -+ -+// THe index of the group that is going to be returned next from "get group entry" (getgrent) -+static int group_idx = 0; -+ -+// Data used for testing -+static const char* grp0_members[] = { -+ "user0", "user1", NULL -+}; -+ -+static const char* grp1_members[] = { -+ "user1", NULL -+}; -+ -+static const char* grp2_members[] = { -+ "user2", "user1", NULL -+}; -+ -+// an array of "groups" (a struct from grp.h), the members of the groups are initalized here to some testing data. -+// Casting away the consts to make the compiler happy and simplify initialization. -+// We never actually change these variables during the test! -+// string literal = const char* (cannot be changed b/c ? ) vs. char* (its getting casted to this) -+static const int NUM_GROUPS = 3; -+static struct group groups[] = { -+ {(char*)"grp0", (char*)"", 0, (char**)grp0_members}, -+ {(char*)"grp1", (char*)"", 1, (char**)grp1_members}, -+ {(char*)"grp2", (char*)"", 2, (char**)grp2_members}, -+}; -+ -+// This function resets the group_idx to 0. -+void -+__wrap_setgrent(void) { -+ group_idx = 0; -+} -+ -+// This function returns the next group entry in the list of groups, or -+// NULL if there aren't any left. -+// group_idx is a global variable which keeps track of where you are in the list -+struct group * -+__wrap_getgrent(void) { -+ if(group_idx >= NUM_GROUPS) return NULL; -+ return &groups[group_idx++]; -+} -+ -+void -+__wrap_endgrent(void) { -+} -+ -+static void -+is_pcmk__is_user_in_group(void **state) -+{ -+ // null user -+ assert_false(pcmk__is_user_in_group(NULL, "grp0")); -+ // null group -+ assert_false(pcmk__is_user_in_group("user0", NULL)); -+ // nonexistent group -+ assert_false(pcmk__is_user_in_group("user0", "nonexistent_group")); -+ // user is in group -+ assert_true(pcmk__is_user_in_group("user0", "grp0")); -+ // user is not in group -+ assert_false(pcmk__is_user_in_group("user2", "grp0")); -+} -+ -+int -+main(int argc, char **argv) -+{ -+ const struct CMUnitTest tests[] = { -+ cmocka_unit_test(is_pcmk__is_user_in_group) -+ }; -+ -+ cmocka_set_message_output(CM_OUTPUT_TAP); -+ return cmocka_run_group_tests(tests, NULL, NULL); -+} --- -2.31.1 - - -From 1bb7fda60f5b8547d7457f20543b7e50089cf06b Mon Sep 17 00:00:00 2001 -From: Grace Chin -Date: Mon, 13 Jun 2022 09:17:36 -0400 -Subject: [PATCH 3/4] Add ACL group support - -closes T61 ---- - lib/common/acl.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/lib/common/acl.c b/lib/common/acl.c -index f68069bbd..d7f8469b1 100644 ---- a/lib/common/acl.c -+++ b/lib/common/acl.c -@@ -320,6 +320,13 @@ pcmk__unpack_acl(xmlNode *source, xmlNode *target, const char *user) - crm_debug("Unpacking ACLs for user '%s'", id); - p->acls = parse_acl_entry(acls, child, p->acls); - } -+ } else if (!strcmp(tag, XML_ACL_TAG_GROUP)) { -+ const char *id = crm_element_value(child, XML_ATTR_ID); -+ -+ if (id && pcmk__is_user_in_group(user,id)) { -+ crm_debug("Unpacking ACLs for group '%s'", id); -+ p->acls = parse_acl_entry(acls, child, p->acls); -+ } - } - } - } --- -2.31.1 - - -From f4efd55d9424d34908ba3e2bcffe16c00b2cf660 Mon Sep 17 00:00:00 2001 -From: Grace Chin -Date: Mon, 13 Jun 2022 09:20:36 -0400 -Subject: [PATCH 4/4] Allow acl_target and acl_group elements to take a 'name' - attribute to use a name different from 'id' - -closes T60 ---- - include/crm/msg_xml.h | 1 + - lib/common/acl.c | 21 +++++++++++++++++---- - 2 files changed, 18 insertions(+), 4 deletions(-) - -diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h -index b36dcf060..6470520b1 100644 ---- a/include/crm/msg_xml.h -+++ b/include/crm/msg_xml.h -@@ -133,6 +133,7 @@ extern "C" { - # define XML_ATTR_VERSION "version" - # define XML_ATTR_DESC "description" - # define XML_ATTR_ID "id" -+# define XML_ATTR_NAME "name" - # define XML_ATTR_IDREF "id-ref" - # define XML_ATTR_ID_LONG "long-id" - # define XML_ATTR_TYPE "type" -diff --git a/lib/common/acl.c b/lib/common/acl.c -index d7f8469b1..b9f7472ee 100644 ---- a/lib/common/acl.c -+++ b/lib/common/acl.c -@@ -278,8 +278,13 @@ pcmk__apply_acl(xmlNode *xml) - - /*! - * \internal -- * \brief Unpack ACLs for a given user -- * -+ * \brief Unpack ACLs for a given user into the -+ * metadata of the target XML tree -+ * -+ * Taking the description of ACLs from the source XML tree and -+ * marking up the target XML tree with access information for the -+ * given user by tacking it onto the relevant nodes -+ * - * \param[in] source XML with ACL definitions - * \param[in,out] target XML that ACLs will be applied to - * \param[in] user Username whose ACLs need to be unpacked -@@ -314,14 +319,22 @@ pcmk__unpack_acl(xmlNode *source, xmlNode *target, const char *user) - - if (!strcmp(tag, XML_ACL_TAG_USER) - || !strcmp(tag, XML_ACL_TAG_USERv1)) { -- const char *id = crm_element_value(child, XML_ATTR_ID); -+ const char *id = crm_element_value(child, XML_ATTR_NAME); -+ -+ if (id == NULL) { -+ id = crm_element_value(child, XML_ATTR_ID); -+ } - - if (id && strcmp(id, user) == 0) { - crm_debug("Unpacking ACLs for user '%s'", id); - p->acls = parse_acl_entry(acls, child, p->acls); - } - } else if (!strcmp(tag, XML_ACL_TAG_GROUP)) { -- const char *id = crm_element_value(child, XML_ATTR_ID); -+ const char *id = crm_element_value(child, XML_ATTR_NAME); -+ -+ if (id == NULL) { -+ id = crm_element_value(child, XML_ATTR_ID); -+ } - - if (id && pcmk__is_user_in_group(user,id)) { - crm_debug("Unpacking ACLs for group '%s'", id); --- -2.31.1 - diff --git a/SOURCES/002-remote-regression.patch b/SOURCES/002-remote-regression.patch new file mode 100644 index 0000000..0f0bea8 --- /dev/null +++ b/SOURCES/002-remote-regression.patch @@ -0,0 +1,98 @@ +From d8e08729ad5e3dc62f774172f992210902fc0ed4 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 23 Jan 2023 14:25:56 -0600 +Subject: [PATCH] High: executor: fix regression in remote node shutdown + +This reverts the essential part of d61494347, which was based on misdiagnosing +a remote node shutdown issue. Initially, it was thought that a "TLS server +session ended" log just after a remote node requested shutdown indicated that +the proxy connection coincidentally dropped at that moment. It actually is the +routine stopping of accepting new proxy connections, and existing when that +happens makes the remote node exit immediately without waiting for the +all-clear from the cluster. + +Fixes T361 +--- + daemons/execd/pacemaker-execd.c | 19 +------------------ + daemons/execd/pacemaker-execd.h | 3 +-- + daemons/execd/remoted_tls.c | 6 +----- + 3 files changed, 3 insertions(+), 25 deletions(-) + +diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c +index db12674f13..491808974a 100644 +--- a/daemons/execd/pacemaker-execd.c ++++ b/daemons/execd/pacemaker-execd.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2012-2022 the Pacemaker project contributors ++ * Copyright 2012-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -305,23 +305,6 @@ lrmd_exit(gpointer data) + return FALSE; + } + +-/*! +- * \internal +- * \brief Clean up and exit if shutdown has started +- * +- * \return Doesn't return +- */ +-void +-execd_exit_if_shutting_down(void) +-{ +-#ifdef PCMK__COMPILE_REMOTE +- if (shutting_down) { +- crm_warn("exit because TLS connection was closed and 'shutting_down' set"); +- lrmd_exit(NULL); +- } +-#endif +-} +- + /*! + * \internal + * \brief Request cluster shutdown if appropriate, otherwise exit immediately +diff --git a/daemons/execd/pacemaker-execd.h b/daemons/execd/pacemaker-execd.h +index 6646ae29e3..f78e8dcdde 100644 +--- a/daemons/execd/pacemaker-execd.h ++++ b/daemons/execd/pacemaker-execd.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2012-2022 the Pacemaker project contributors ++ * Copyright 2012-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -105,6 +105,5 @@ void remoted_spawn_pidone(int argc, char **argv, char **envp); + int process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id, + xmlNode *request); + void lrmd_drain_alerts(GMainLoop *mloop); +-void execd_exit_if_shutting_down(void); + + #endif // PACEMAKER_EXECD__H +diff --git a/daemons/execd/remoted_tls.c b/daemons/execd/remoted_tls.c +index 6f4b2d0062..c65e3f394d 100644 +--- a/daemons/execd/remoted_tls.c ++++ b/daemons/execd/remoted_tls.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2012-2022 the Pacemaker project contributors ++ * Copyright 2012-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -250,10 +250,6 @@ static void + tls_server_dropped(gpointer user_data) + { + crm_notice("TLS server session ended"); +- /* If we are in the process of shutting down, then we should actually exit. +- * bz#1804259 +- */ +- execd_exit_if_shutting_down(); + return; + } + +-- +2.31.1 + diff --git a/SOURCES/003-history-cleanup.patch b/SOURCES/003-history-cleanup.patch new file mode 100644 index 0000000..87a3e27 --- /dev/null +++ b/SOURCES/003-history-cleanup.patch @@ -0,0 +1,2829 @@ +From e953591a9796edebd4796c344df0eddcbc7a2dff Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 30 Jan 2023 16:34:32 -0600 +Subject: [PATCH 01/14] Refactor: scheduler: drop unneeded arguments from + process_rsc_state() + +migrate_op has been unused since at least 2011 +--- + lib/pengine/unpack.c | 36 +++++++++++++++--------------------- + 1 file changed, 15 insertions(+), 21 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 5fcba3b..9524def 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -1963,8 +1963,7 @@ process_orphan_resource(xmlNode * rsc_entry, pe_node_t * node, pe_working_set_t + + static void + process_rsc_state(pe_resource_t * rsc, pe_node_t * node, +- enum action_fail_response on_fail, +- xmlNode * migrate_op, pe_working_set_t * data_set) ++ enum action_fail_response on_fail) + { + pe_node_t *tmpnode = NULL; + char *reason = NULL; +@@ -2016,7 +2015,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); + should_fence = TRUE; + +- } else if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { ++ } else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) { + if (pe__is_remote_node(node) && node->details->remote_rsc + && !pcmk_is_set(node->details->remote_rsc->flags, pe_rsc_failed)) { + +@@ -2039,7 +2038,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + if (reason == NULL) { + reason = crm_strdup_printf("%s is thought to be active there", rsc->id); + } +- pe_fence_node(data_set, node, reason, FALSE); ++ pe_fence_node(rsc->cluster, node, reason, FALSE); + } + free(reason); + } +@@ -2069,7 +2068,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + * but also mark the node as unclean + */ + reason = crm_strdup_printf("%s failed there", rsc->id); +- pe_fence_node(data_set, node, reason, FALSE); ++ pe_fence_node(rsc->cluster, node, reason, FALSE); + free(reason); + break; + +@@ -2090,7 +2089,8 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + /* make sure it comes up somewhere else + * or not at all + */ +- resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set); ++ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", ++ rsc->cluster); + break; + + case action_fail_stop: +@@ -2112,8 +2112,8 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + * container is running yet, so remember it and add a stop + * action for it later. + */ +- data_set->stop_needed = g_list_prepend(data_set->stop_needed, +- rsc->container); ++ rsc->cluster->stop_needed = ++ g_list_prepend(rsc->cluster->stop_needed, rsc->container); + } else if (rsc->container) { + stop_action(rsc->container, node, FALSE); + } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { +@@ -2123,10 +2123,10 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + + case action_fail_reset_remote: + pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); +- if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { ++ if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) { + tmpnode = NULL; + if (rsc->is_remote_node) { +- tmpnode = pe_find_node(data_set->nodes, rsc->id); ++ tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); + } + if (tmpnode && + pe__is_remote_node(tmpnode) && +@@ -2135,7 +2135,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + /* The remote connection resource failed in a way that + * should result in fencing the remote node. + */ +- pe_fence_node(data_set, tmpnode, ++ pe_fence_node(rsc->cluster, tmpnode, + "remote connection is unrecoverable", FALSE); + } + } +@@ -2158,7 +2158,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + * result in a fencing operation regardless if we're going to attempt to + * reconnect to the remote-node in this transition or not. */ + if (pcmk_is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) { +- tmpnode = pe_find_node(data_set->nodes, rsc->id); ++ tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); + if (tmpnode && tmpnode->details->unclean) { + tmpnode->details->unseen = FALSE; + } +@@ -2177,7 +2177,8 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + } + } + +- native_add_running(rsc, node, data_set, (save_on_fail != action_fail_ignore)); ++ native_add_running(rsc, node, rsc->cluster, ++ (save_on_fail != action_fail_ignore)); + switch (on_fail) { + case action_fail_ignore: + break; +@@ -2376,14 +2377,12 @@ unpack_lrm_resource(pe_node_t *node, xmlNode *lrm_resource, + int start_index = -1; + enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; + +- const char *task = NULL; + const char *rsc_id = ID(lrm_resource); + + pe_resource_t *rsc = NULL; + GList *op_list = NULL; + GList *sorted_op_list = NULL; + +- xmlNode *migrate_op = NULL; + xmlNode *rsc_op = NULL; + xmlNode *last_failure = NULL; + +@@ -2437,11 +2436,6 @@ unpack_lrm_resource(pe_node_t *node, xmlNode *lrm_resource, + for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { + xmlNode *rsc_op = (xmlNode *) gIter->data; + +- task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); +- if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { +- migrate_op = rsc_op; +- } +- + unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set); + } + +@@ -2452,7 +2446,7 @@ unpack_lrm_resource(pe_node_t *node, xmlNode *lrm_resource, + /* no need to free the contents */ + g_list_free(sorted_op_list); + +- process_rsc_state(rsc, node, on_fail, migrate_op, data_set); ++ process_rsc_state(rsc, node, on_fail); + + if (get_target_role(rsc, &req_role)) { + if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { +-- +2.31.1 + +From 6f4e34cccc4864961d2020a2dd547450ac53a44e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 1 Feb 2023 16:30:20 -0600 +Subject: [PATCH 02/14] Log: scheduler: improve trace logs when unpacking + resource history + +--- + lib/pengine/unpack.c | 112 +++++++++++++++++++++++++++---------------- + 1 file changed, 71 insertions(+), 41 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 9524def..b7b2873 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -3363,6 +3363,24 @@ check_recoverable(pe_resource_t *rsc, pe_node_t *node, const char *task, + pe__set_resource_flags(rsc, pe_rsc_block); + } + ++/*! ++ * \internal ++ * \brief Update an integer value and why ++ * ++ * \param[in,out] i Pointer to integer to update ++ * \param[in,out] why Where to store reason for update ++ * \param[in] value New value ++ * \param[in,out] reason Description of why value was changed ++ */ ++static inline void ++remap_because(int *i, const char **why, int value, const char *reason) ++{ ++ if (*i != value) { ++ *i = value; ++ *why = reason; ++ } ++} ++ + /*! + * \internal + * \brief Remap informational monitor results and operation status +@@ -3393,29 +3411,34 @@ check_recoverable(pe_resource_t *rsc, pe_node_t *node, const char *task, + static void + remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + pe_working_set_t *data_set, enum action_fail_response *on_fail, +- int target_rc, int *rc, int *status) { ++ int target_rc, int *rc, int *status) ++{ + bool is_probe = false; ++ int orig_exit_status = *rc; ++ int orig_exec_status = *status; ++ const char *why = NULL; + const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); + const char *key = get_op_key(xml_op); + const char *exit_reason = crm_element_value(xml_op, + XML_LRM_ATTR_EXIT_REASON); + + if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_none)) { +- int remapped_rc = pcmk__effective_rc(*rc); +- +- if (*rc != remapped_rc) { +- crm_trace("Remapping monitor result %d to %d", *rc, remapped_rc); ++ // Remap degraded results to their usual counterparts ++ *rc = pcmk__effective_rc(*rc); ++ if (*rc != orig_exit_status) { ++ why = "degraded monitor result"; + if (!node->details->shutdown || node->details->online) { + record_failed_op(xml_op, node, rsc, data_set); + } +- +- *rc = remapped_rc; + } + } + + if (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) { +- *status = PCMK_EXEC_DONE; +- *rc = PCMK_OCF_NOT_RUNNING; ++ if ((*status != PCMK_EXEC_DONE) || (*rc != PCMK_OCF_NOT_RUNNING)) { ++ *status = PCMK_EXEC_DONE; ++ *rc = PCMK_OCF_NOT_RUNNING; ++ why = "irrelevant probe result"; ++ } + } + + /* If the executor reported an operation status of anything but done or +@@ -3423,22 +3446,19 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + * it should be treated as a failure or not, because we know the expected + * result. + */ +- if (*status != PCMK_EXEC_DONE && *status != PCMK_EXEC_ERROR) { +- return; ++ switch (*status) { ++ case PCMK_EXEC_DONE: ++ case PCMK_EXEC_ERROR: ++ break; ++ default: ++ goto remap_done; + } + +- CRM_ASSERT(rsc); +- CRM_CHECK(task != NULL, +- *status = PCMK_EXEC_ERROR; return); +- +- *status = PCMK_EXEC_DONE; +- + if (exit_reason == NULL) { + exit_reason = ""; + } + + is_probe = pcmk_xe_is_probe(xml_op); +- + if (is_probe) { + task = "probe"; + } +@@ -3452,12 +3472,15 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + * those versions or processing of saved CIB files from those versions, + * so we do not need to care much about this case. + */ +- *status = PCMK_EXEC_ERROR; ++ remap_because(status, &why, PCMK_EXEC_ERROR, "obsolete history format"); + crm_warn("Expected result not found for %s on %s (corrupt or obsolete CIB?)", + key, pe__node_name(node)); + +- } else if (target_rc != *rc) { +- *status = PCMK_EXEC_ERROR; ++ } else if (*rc == target_rc) { ++ remap_because(status, &why, PCMK_EXEC_DONE, "expected result"); ++ ++ } else { ++ remap_because(status, &why, PCMK_EXEC_ERROR, "unexpected result"); + pe_rsc_debug(rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)", + key, pe__node_name(node), + target_rc, services_ocf_exitcode_str(target_rc), +@@ -3468,7 +3491,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + switch (*rc) { + case PCMK_OCF_OK: + if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) { +- *status = PCMK_EXEC_DONE; ++ remap_because(status, &why,PCMK_EXEC_DONE, "probe"); + pe_rsc_info(rsc, "Probe found %s active on %s at %s", + rsc->id, pe__node_name(node), + last_change_str(xml_op)); +@@ -3479,7 +3502,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + if (is_probe || (target_rc == *rc) + || !pcmk_is_set(rsc->flags, pe_rsc_managed)) { + +- *status = PCMK_EXEC_DONE; ++ remap_because(status, &why, PCMK_EXEC_DONE, "exit status"); + rsc->role = RSC_ROLE_STOPPED; + + /* clear any previous failure actions */ +@@ -3490,7 +3513,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + + case PCMK_OCF_RUNNING_PROMOTED: + if (is_probe && (*rc != target_rc)) { +- *status = PCMK_EXEC_DONE; ++ remap_because(status, &why, PCMK_EXEC_DONE, "probe"); + pe_rsc_info(rsc, + "Probe found %s active and promoted on %s at %s", + rsc->id, pe__node_name(node), +@@ -3502,11 +3525,11 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + case PCMK_OCF_DEGRADED_PROMOTED: + case PCMK_OCF_FAILED_PROMOTED: + rsc->role = RSC_ROLE_PROMOTED; +- *status = PCMK_EXEC_ERROR; ++ remap_because(status, &why, PCMK_EXEC_ERROR, "exit status"); + break; + + case PCMK_OCF_NOT_CONFIGURED: +- *status = PCMK_EXEC_ERROR_FATAL; ++ remap_because(status, &why, PCMK_EXEC_ERROR_FATAL, "exit status"); + break; + + case PCMK_OCF_UNIMPLEMENT_FEATURE: +@@ -3517,9 +3540,11 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + + if (interval_ms == 0) { + check_recoverable(rsc, node, task, *rc, xml_op); +- *status = PCMK_EXEC_ERROR_HARD; ++ remap_because(status, &why, PCMK_EXEC_ERROR_HARD, ++ "exit status"); + } else { +- *status = PCMK_EXEC_NOT_SUPPORTED; ++ remap_because(status, &why, PCMK_EXEC_NOT_SUPPORTED, ++ "exit status"); + } + } + break; +@@ -3528,7 +3553,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + case PCMK_OCF_INVALID_PARAM: + case PCMK_OCF_INSUFFICIENT_PRIV: + check_recoverable(rsc, node, task, *rc, xml_op); +- *status = PCMK_EXEC_ERROR_HARD; ++ remap_because(status, &why, PCMK_EXEC_ERROR_HARD, "exit status"); + break; + + default: +@@ -3537,13 +3562,21 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + "on %s at %s as failure", + *rc, task, rsc->id, pe__node_name(node), + last_change_str(xml_op)); +- *status = PCMK_EXEC_ERROR; ++ remap_because(status, &why, PCMK_EXEC_ERROR, ++ "unknown exit status"); + } + break; + } + +- pe_rsc_trace(rsc, "Remapped %s status to '%s'", +- key, pcmk_exec_status_str(*status)); ++remap_done: ++ if (why != NULL) { ++ pe_rsc_trace(rsc, ++ "Remapped %s result from [%s: %s] to [%s: %s] " ++ "because of %s", ++ key, pcmk_exec_status_str(orig_exec_status), ++ crm_exit_str(orig_exit_status), ++ pcmk_exec_status_str(*status), crm_exit_str(*rc), why); ++ } + } + + // return TRUE if start or monitor last failure but parameters changed +@@ -3947,9 +3980,9 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + parent = uber_parent(rsc); + } + +- pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)", +- task_key, task, task_id, status, rc, pe__node_name(node), +- role2text(rsc->role)); ++ pe_rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)", ++ ID(xml_op), task, task_id, pe__node_name(node), ++ pcmk_exec_status_str(status), crm_exit_str(rc)); + + if (node->details->unclean) { + pe_rsc_trace(rsc, +@@ -4077,9 +4110,6 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + goto done; + + case PCMK_EXEC_DONE: +- pe_rsc_trace(rsc, "%s of %s on %s completed at %s " CRM_XS " id=%s", +- task, rsc->id, pe__node_name(node), +- last_change_str(xml_op), ID(xml_op)); + update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set); + goto done; + +@@ -4175,9 +4205,9 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + } + + done: +- pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s", +- rsc->id, task, role2text(rsc->role), +- role2text(rsc->next_role)); ++ pe_rsc_trace(rsc, "%s role on %s after %s is %s (next %s)", ++ rsc->id, pe__node_name(node), ID(xml_op), ++ role2text(rsc->role), role2text(rsc->next_role)); + } + + static void +-- +2.31.1 + +From 5a1d2a3ba58fa73225433dab40cee0a6e0ef9bda Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 1 Feb 2023 12:08:55 -0600 +Subject: [PATCH 03/14] Low: scheduler: improve migration history validation + +Instead of a simple CRM_CHECK(), functionize parsing the source and target node +names from a migration action's resource history entry. This reduces +duplication and allows us to log more helpful errors. + +Also, CRM_CHECK() tries to dump core for debugging, and that's not helpful for +corrupted CIB entries. +--- + lib/pengine/unpack.c | 87 ++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 75 insertions(+), 12 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index b7b2873..cd1b038 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2786,6 +2786,60 @@ newer_state_after_migrate(const char *rsc_id, const char *node_name, + || monitor_not_running_after(rsc_id, node_name, xml_op, same_node, + data_set); + } ++ ++/*! ++ * \internal ++ * \brief Parse migration source and target node names from history entry ++ * ++ * \param[in] entry Resource history entry for a migration action ++ * \param[in] source_node If not NULL, source must match this node ++ * \param[in] target_node If not NULL, target must match this node ++ * \param[out] source_name Where to store migration source node name ++ * \param[out] target_name Where to store migration target node name ++ * ++ * \return Standard Pacemaker return code ++ */ ++static int ++get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, ++ const pe_node_t *target_node, ++ const char **source_name, const char **target_name) ++{ ++ const char *id = ID(entry); ++ ++ if (id == NULL) { ++ crm_err("Ignoring resource history entry without ID"); ++ return pcmk_rc_unpack_error; ++ } ++ ++ *source_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_SOURCE); ++ *target_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_TARGET); ++ if ((*source_name == NULL) || (*target_name == NULL)) { ++ crm_err("Ignoring resource history entry %s without " ++ XML_LRM_ATTR_MIGRATE_SOURCE " and " XML_LRM_ATTR_MIGRATE_TARGET, ++ id); ++ return pcmk_rc_unpack_error; ++ } ++ ++ if ((source_node != NULL) ++ && !pcmk__str_eq(*source_name, source_node->details->uname, ++ pcmk__str_casei|pcmk__str_null_matches)) { ++ crm_err("Ignoring resource history entry %s because " ++ XML_LRM_ATTR_MIGRATE_SOURCE "='%s' does not match %s", ++ id, pcmk__s(*source_name, ""), pe__node_name(source_node)); ++ return pcmk_rc_unpack_error; ++ } ++ ++ if ((target_node != NULL) ++ && !pcmk__str_eq(*target_name, target_node->details->uname, ++ pcmk__str_casei|pcmk__str_null_matches)) { ++ crm_err("Ignoring resource history entry %s because " ++ XML_LRM_ATTR_MIGRATE_TARGET "='%s' does not match %s", ++ id, pcmk__s(*target_name, ""), pe__node_name(target_node)); ++ return pcmk_rc_unpack_error; ++ } ++ ++ return pcmk_rc_ok; ++} + + static void + unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, +@@ -2834,13 +2888,16 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + pe_node_t *target_node = NULL; + pe_node_t *source_node = NULL; + xmlNode *migrate_from = NULL; +- const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); +- const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); ++ const char *source = NULL; ++ const char *target = NULL; + bool source_newer_op = false; + bool target_newer_state = false; + +- // Sanity check +- CRM_CHECK(source && target && !strcmp(source, node->details->uname), return); ++ // Get source and target node names from XML ++ if (get_migration_node_names(xml_op, node, NULL, &source, ++ &target) != pcmk_rc_ok) { ++ return; ++ } + + /* If there's any newer non-monitor operation on the source, this migrate_to + * potentially no longer matters for the source. +@@ -2949,11 +3006,14 @@ unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + pe_working_set_t *data_set) + { + xmlNode *target_migrate_from = NULL; +- const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); +- const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); ++ const char *source = NULL; ++ const char *target = NULL; + +- // Sanity check +- CRM_CHECK(source && target && !strcmp(source, node->details->uname), return); ++ // Get source and target node names from XML ++ if (get_migration_node_names(xml_op, node, NULL, &source, ++ &target) != pcmk_rc_ok) { ++ return; ++ } + + /* If a migration failed, we have to assume the resource is active. Clones + * are not allowed to migrate, so role can't be promoted. +@@ -3001,11 +3061,14 @@ unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node, + xmlNode *xml_op, pe_working_set_t *data_set) + { + xmlNode *source_migrate_to = NULL; +- const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); +- const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); ++ const char *source = NULL; ++ const char *target = NULL; + +- // Sanity check +- CRM_CHECK(source && target && !strcmp(target, node->details->uname), return); ++ // Get source and target node names from XML ++ if (get_migration_node_names(xml_op, NULL, node, &source, ++ &target) != pcmk_rc_ok) { ++ return; ++ } + + /* If a migration failed, we have to assume the resource is active. Clones + * are not allowed to migrate, so role can't be promoted. +-- +2.31.1 + +From 5139e5369769e733b05bc28940d3dccb4f7fca95 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 14:30:16 -0600 +Subject: [PATCH 04/14] Refactor: scheduler: functionize adding a dangling + migration + +... for code isolation and readability +--- + lib/pengine/unpack.c | 31 +++++++++++++++++++++++-------- + 1 file changed, 23 insertions(+), 8 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index cd1b038..fa7c2cc 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2841,6 +2841,28 @@ get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, + return pcmk_rc_ok; + } + ++/* ++ * \internal ++ * \brief Add a migration source to a resource's list of dangling migrations ++ * ++ * If the migrate_to and migrate_from actions in a live migration both ++ * succeeded, but there is no stop on the source, the migration is considered ++ * "dangling." Add the source to the resource's dangling migration list, which ++ * will be used to schedule a stop on the source without affecting the target. ++ * ++ * \param[in,out] rsc Resource involved in migration ++ * \param[in] node Migration source ++ */ ++static void ++add_dangling_migration(pe_resource_t *rsc, const pe_node_t *node) ++{ ++ pe_rsc_trace(rsc, "Dangling migration of %s requires stop on %s", ++ rsc->id, pe__node_name(node)); ++ rsc->role = RSC_ROLE_STOPPED; ++ rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, ++ (gpointer) node); ++} ++ + static void + unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + pe_working_set_t *data_set) +@@ -2941,14 +2963,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + + if (migrate_from && from_rc == PCMK_OCF_OK + && (from_status == PCMK_EXEC_DONE)) { +- /* The migrate_to and migrate_from both succeeded, so mark the migration +- * as "dangling". This will be used to schedule a stop action on the +- * source without affecting the target. +- */ +- pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), +- source); +- rsc->role = RSC_ROLE_STOPPED; +- rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); ++ add_dangling_migration(rsc, node); + + } else if (migrate_from && (from_status != PCMK_EXEC_PENDING)) { // Failed + /* If the resource has newer state on the target, this migrate_to no +-- +2.31.1 + +From da71c04463d31338dd5da54d1d48b53e413716dc Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 16:57:55 -0600 +Subject: [PATCH 05/14] Refactor: scheduler: check for dangling migration + before setting role + +Previously, unpack_migrate_to_success() set rsc->role = RSC_ROLE_STARTED +then checked for dangling migration, which would reset it to RSC_ROLE_STOPPED. + +For clarity, do the dangling migration check first. +--- + lib/pengine/unpack.c | 47 ++++++++++++++++++++++++-------------------- + 1 file changed, 26 insertions(+), 21 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index fa7c2cc..b858b59 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2905,8 +2905,8 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + * migration is considered to be "dangling". Schedule a stop on the source + * in this case. + */ +- int from_rc = 0; +- int from_status = 0; ++ int from_rc = PCMK_OCF_OK; ++ int from_status = PCMK_EXEC_PENDING; + pe_node_t *target_node = NULL; + pe_node_t *source_node = NULL; + xmlNode *migrate_from = NULL; +@@ -2930,12 +2930,17 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + // Check whether there was a migrate_from action on the target + migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, + source, -1, data_set); +- +- /* Even if there's a newer non-monitor operation on the source, we still +- * need to check how this migrate_to might matter for the target. +- */ +- if (source_newer_op && migrate_from) { +- return; ++ if (migrate_from != NULL) { ++ if (source_newer_op) { ++ /* There's a newer non-monitor operation on the source and a ++ * migrate_from on the target, so this migrate_to is irrelevant to ++ * the resource's state. ++ */ ++ return; ++ } ++ crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); ++ crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, ++ &from_status); + } + + /* If the resource has newer state on the target after the migration +@@ -2948,24 +2953,24 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + return; + } + +- // Clones are not allowed to migrate, so role can't be promoted ++ /* Check for dangling migration (migrate_from succeeded but stop not done). ++ * We know there's no stop because we already returned if the target has a ++ * migrate_from and the source has any newer non-monitor operation. ++ */ ++ if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) { ++ add_dangling_migration(rsc, node); ++ return; ++ } ++ ++ /* Without newer state, this migrate_to implies the resource is active. ++ * (Clones are not allowed to migrate, so role can't be promoted.) ++ */ + rsc->role = RSC_ROLE_STARTED; + + target_node = pe_find_node(data_set->nodes, target); + source_node = pe_find_node(data_set->nodes, source); + +- if (migrate_from) { +- crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); +- crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); +- pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", +- ID(migrate_from), target, from_status, from_rc); +- } +- +- if (migrate_from && from_rc == PCMK_OCF_OK +- && (from_status == PCMK_EXEC_DONE)) { +- add_dangling_migration(rsc, node); +- +- } else if (migrate_from && (from_status != PCMK_EXEC_PENDING)) { // Failed ++ if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target + /* If the resource has newer state on the target, this migrate_to no + * longer matters for the target. + */ +-- +2.31.1 + +From d98a2687d68747b0598554939dea05c420456a12 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 17:05:50 -0600 +Subject: [PATCH 06/14] Refactor: scheduler: avoid duplication of + active-on-target check + +--- + lib/pengine/unpack.c | 24 ++++++------------------ + 1 file changed, 6 insertions(+), 18 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index b858b59..8cfc0ef 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2914,6 +2914,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + const char *target = NULL; + bool source_newer_op = false; + bool target_newer_state = false; ++ bool active_on_target = false; + + // Get source and target node names from XML + if (get_migration_node_names(xml_op, node, NULL, &source, +@@ -2969,23 +2970,14 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + + target_node = pe_find_node(data_set->nodes, target); + source_node = pe_find_node(data_set->nodes, source); ++ active_on_target = !target_newer_state && (target_node != NULL) ++ && target_node->details->online; + + if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target +- /* If the resource has newer state on the target, this migrate_to no +- * longer matters for the target. +- */ +- if (!target_newer_state +- && target_node && target_node->details->online) { +- pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node, +- target_node->details->online); ++ if (active_on_target) { + native_add_running(rsc, target_node, data_set, TRUE); +- + } else { +- /* With the earlier bail logic, migrate_from != NULL here implies +- * source_newer_op is false, meaning this migrate_to still matters +- * for the source. +- * Consider it failed here - forces a restart, prevents migration +- */ ++ // Mark resource as failed, require recovery, and prevent migration + pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); + pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); + } +@@ -2994,11 +2986,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + /* If the resource has newer state on the target, this migrate_to no + * longer matters for the target. + */ +- if (!target_newer_state +- && target_node && target_node->details->online) { +- pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node, +- target_node->details->online); +- ++ if (active_on_target) { + native_add_running(rsc, target_node, data_set, FALSE); + if (source_node && source_node->details->online) { + /* This is a partial migration: the migrate_to completed +-- +2.31.1 + +From ae145309e3fdb26608e99f6d1fe1a7859d98efd0 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 17:07:58 -0600 +Subject: [PATCH 07/14] Refactor: scheduler: improve unpacking of successful + migrate_to + +Improve log messages, comments, and formatting, and avoid doing things until +needed, to improve efficiency of early returns. +--- + lib/pengine/unpack.c | 109 +++++++++++++++++++------------------------ + 1 file changed, 48 insertions(+), 61 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 8cfc0ef..224b7b5 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2867,48 +2867,40 @@ static void + unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + pe_working_set_t *data_set) + { +- /* A successful migration sequence is: +- * migrate_to on source node +- * migrate_from on target node +- * stop on source node ++ /* A complete migration sequence is: ++ * 1. migrate_to on source node (which succeeded if we get to this function) ++ * 2. migrate_from on target node ++ * 3. stop on source node + * +- * But there could be scenarios like (It's easier to produce with cluster +- * property batch-limit=1): +- * +- * - rscA is live-migrating from node1 to node2. +- * +- * - Before migrate_to on node1 returns, put node2 into standby. +- * +- * - Transition aborts upon return of successful migrate_to on node1. New +- * transition is going to stop the rscA on both nodes and start it on +- * node1. ++ * If no migrate_from has happened, the migration is considered to be ++ * "partial". If the migrate_from succeeded but no stop has happened, the ++ * migration is considered to be "dangling". + * +- * - While it is stopping on node1, run something that is going to make +- * the transition abort again like: +- * crm_resource --resource rscA --ban --node node2 ++ * If a successful migrate_to and stop have happened on the source node, we ++ * still need to check for a partial migration, due to scenarios (easier to ++ * produce with batch-limit=1) like: + * +- * - Transition aborts upon return of stop on node1. ++ * - A resource is migrating from node1 to node2, and a migrate_to is ++ * initiated for it on node1. + * +- * Now although there's a stop on node1, it's still a partial migration and +- * rscA is still potentially active on node2. ++ * - node2 goes into standby mode while the migrate_to is pending, which ++ * aborts the transition. + * +- * So even if a migrate_to is followed by a stop, we still need to check +- * whether there's a corresponding migrate_from or any newer operation on +- * the target. ++ * - Upon completion of the migrate_to, a new transition schedules a stop ++ * on both nodes and a start on node1. + * +- * If no migrate_from has happened, the migration is considered to be +- * "partial". If the migrate_from failed, make sure the resource gets +- * stopped on both source and target (if up). ++ * - If the new transition is aborted for any reason while the resource is ++ * stopping on node1, the transition after that stop completes will see ++ * the migrate_from and stop on the source, but it's still a partial ++ * migration, and the resource must be stopped on node2 because it is ++ * potentially active there due to the migrate_to. + * +- * If the migrate_to and migrate_from both succeeded (which also implies the +- * resource is no longer running on the source), but there is no stop, the +- * migration is considered to be "dangling". Schedule a stop on the source +- * in this case. ++ * We also need to take into account that either node's history may be ++ * cleared at any point in the migration process. + */ + int from_rc = PCMK_OCF_OK; + int from_status = PCMK_EXEC_PENDING; + pe_node_t *target_node = NULL; +- pe_node_t *source_node = NULL; + xmlNode *migrate_from = NULL; + const char *source = NULL; + const char *target = NULL; +@@ -2922,13 +2914,11 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + return; + } + +- /* If there's any newer non-monitor operation on the source, this migrate_to +- * potentially no longer matters for the source. +- */ ++ // Check for newer state on the source + source_newer_op = non_monitor_after(rsc->id, source, xml_op, true, + data_set); + +- // Check whether there was a migrate_from action on the target ++ // Check for a migrate_from action from this source on the target + migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, + source, -1, data_set); + if (migrate_from != NULL) { +@@ -2944,12 +2934,11 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + &from_status); + } + +- /* If the resource has newer state on the target after the migration +- * events, this migrate_to no longer matters for the target. ++ /* If the resource has newer state on both the source and target after the ++ * migration events, this migrate_to is irrelevant to the resource's state. + */ + target_newer_state = newer_state_after_migrate(rsc->id, target, xml_op, + migrate_from, data_set); +- + if (source_newer_op && target_newer_state) { + return; + } +@@ -2969,7 +2958,6 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + rsc->role = RSC_ROLE_STARTED; + + target_node = pe_find_node(data_set->nodes, target); +- source_node = pe_find_node(data_set->nodes, source); + active_on_target = !target_newer_state && (target_node != NULL) + && target_node->details->online; + +@@ -2981,31 +2969,30 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); + pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); + } ++ return; ++ } + +- } else { // Pending, or complete but erased +- /* If the resource has newer state on the target, this migrate_to no +- * longer matters for the target. +- */ +- if (active_on_target) { +- native_add_running(rsc, target_node, data_set, FALSE); +- if (source_node && source_node->details->online) { +- /* This is a partial migration: the migrate_to completed +- * successfully on the source, but the migrate_from has not +- * completed. Remember the source and target; if the newly +- * chosen target remains the same when we schedule actions +- * later, we may continue with the migration. +- */ +- rsc->partial_migration_target = target_node; +- rsc->partial_migration_source = source_node; +- } +- } else if (!source_newer_op) { +- /* This migrate_to matters for the source only if it's the last +- * non-monitor operation here. +- * Consider it failed here - forces a restart, prevents migration ++ // The migrate_from is pending, complete but erased, or to be scheduled ++ ++ if (active_on_target) { ++ pe_node_t *source_node = pe_find_node(data_set->nodes, source); ++ ++ native_add_running(rsc, target_node, data_set, FALSE); ++ if ((source_node != NULL) && source_node->details->online) { ++ /* This is a partial migration: the migrate_to completed ++ * successfully on the source, but the migrate_from has not ++ * completed. Remember the source and target; if the newly ++ * chosen target remains the same when we schedule actions ++ * later, we may continue with the migration. + */ +- pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); +- pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); ++ rsc->partial_migration_target = target_node; ++ rsc->partial_migration_source = source_node; + } ++ ++ } else if (!source_newer_op) { ++ // Mark resource as failed, require recovery, and prevent migration ++ pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); ++ pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); + } + } + +-- +2.31.1 + +From 7d63ed8d52f64d2523367cff36bf77bd85296bd9 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 17:14:57 -0600 +Subject: [PATCH 08/14] Refactor: scheduler: drop redundant argument from + unpack_migrate_to_success() + +--- + lib/pengine/unpack.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 224b7b5..6222115 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2864,8 +2864,7 @@ add_dangling_migration(pe_resource_t *rsc, const pe_node_t *node) + } + + static void +-unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, +- pe_working_set_t *data_set) ++unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op) + { + /* A complete migration sequence is: + * 1. migrate_to on source node (which succeeded if we get to this function) +@@ -2916,11 +2915,11 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + + // Check for newer state on the source + source_newer_op = non_monitor_after(rsc->id, source, xml_op, true, +- data_set); ++ rsc->cluster); + + // Check for a migrate_from action from this source on the target + migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, +- source, -1, data_set); ++ source, -1, rsc->cluster); + if (migrate_from != NULL) { + if (source_newer_op) { + /* There's a newer non-monitor operation on the source and a +@@ -2938,7 +2937,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + * migration events, this migrate_to is irrelevant to the resource's state. + */ + target_newer_state = newer_state_after_migrate(rsc->id, target, xml_op, +- migrate_from, data_set); ++ migrate_from, rsc->cluster); + if (source_newer_op && target_newer_state) { + return; + } +@@ -2957,13 +2956,13 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + */ + rsc->role = RSC_ROLE_STARTED; + +- target_node = pe_find_node(data_set->nodes, target); ++ target_node = pe_find_node(rsc->cluster->nodes, target); + active_on_target = !target_newer_state && (target_node != NULL) + && target_node->details->online; + + if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target + if (active_on_target) { +- native_add_running(rsc, target_node, data_set, TRUE); ++ native_add_running(rsc, target_node, rsc->cluster, TRUE); + } else { + // Mark resource as failed, require recovery, and prevent migration + pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); +@@ -2975,9 +2974,9 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + // The migrate_from is pending, complete but erased, or to be scheduled + + if (active_on_target) { +- pe_node_t *source_node = pe_find_node(data_set->nodes, source); ++ pe_node_t *source_node = pe_find_node(rsc->cluster->nodes, source); + +- native_add_running(rsc, target_node, data_set, FALSE); ++ native_add_running(rsc, target_node, rsc->cluster, FALSE); + if ((source_node != NULL) && source_node->details->online) { + /* This is a partial migration: the migrate_to completed + * successfully on the source, but the migrate_from has not +@@ -3946,7 +3945,7 @@ update_resource_state(pe_resource_t * rsc, pe_node_t * node, xmlNode * xml_op, c + clear_past_failure = TRUE; + + } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { +- unpack_migrate_to_success(rsc, node, xml_op, data_set); ++ unpack_migrate_to_success(rsc, node, xml_op); + + } else if (rsc->role < RSC_ROLE_STARTED) { + pe_rsc_trace(rsc, "%s active on %s", rsc->id, pe__node_name(node)); +-- +2.31.1 + +From 3be487f87bf5e26277379148922525fd98d29681 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Feb 2023 09:13:30 -0600 +Subject: [PATCH 09/14] Doc: scheduler: clarify comments about unpacking + migration history + +per review +--- + lib/pengine/unpack.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 6222115..ec2cf26 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2791,9 +2791,9 @@ newer_state_after_migrate(const char *rsc_id, const char *node_name, + * \internal + * \brief Parse migration source and target node names from history entry + * +- * \param[in] entry Resource history entry for a migration action +- * \param[in] source_node If not NULL, source must match this node +- * \param[in] target_node If not NULL, target must match this node ++ * \param[in] entry Resource history entry for a migration action ++ * \param[in] source_node If not NULL, source must match this node ++ * \param[in] target_node If not NULL, target must match this node + * \param[out] source_name Where to store migration source node name + * \param[out] target_name Where to store migration target node name + * +@@ -2825,7 +2825,7 @@ get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, + pcmk__str_casei|pcmk__str_null_matches)) { + crm_err("Ignoring resource history entry %s because " + XML_LRM_ATTR_MIGRATE_SOURCE "='%s' does not match %s", +- id, pcmk__s(*source_name, ""), pe__node_name(source_node)); ++ id, *source_name, pe__node_name(source_node)); + return pcmk_rc_unpack_error; + } + +@@ -2834,7 +2834,7 @@ get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, + pcmk__str_casei|pcmk__str_null_matches)) { + crm_err("Ignoring resource history entry %s because " + XML_LRM_ATTR_MIGRATE_TARGET "='%s' does not match %s", +- id, pcmk__s(*target_name, ""), pe__node_name(target_node)); ++ id, *target_name, pe__node_name(target_node)); + return pcmk_rc_unpack_error; + } + +@@ -2890,7 +2890,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op) + * + * - If the new transition is aborted for any reason while the resource is + * stopping on node1, the transition after that stop completes will see +- * the migrate_from and stop on the source, but it's still a partial ++ * the migrate_to and stop on the source, but it's still a partial + * migration, and the resource must be stopped on node2 because it is + * potentially active there due to the migrate_to. + * +@@ -3425,9 +3425,9 @@ check_recoverable(pe_resource_t *rsc, pe_node_t *node, const char *task, + * \brief Update an integer value and why + * + * \param[in,out] i Pointer to integer to update +- * \param[in,out] why Where to store reason for update ++ * \param[out] why Where to store reason for update + * \param[in] value New value +- * \param[in,out] reason Description of why value was changed ++ * \param[in] reason Description of why value was changed + */ + static inline void + remap_because(int *i, const char **why, int value, const char *reason) +@@ -3456,7 +3456,7 @@ remap_because(int *i, const char **why, int value, const char *reason) + * \param[in] data_set Current cluster working set + * \param[in,out] on_fail What should be done about the result + * \param[in] target_rc Expected return code of operation +- * \param[in,out] rc Actual return code of operation ++ * \param[in,out] rc Actual return code of operation (treated as OCF) + * \param[in,out] status Operation execution status + * + * \note If the result is remapped and the node is not shutting down or failed, +@@ -3548,7 +3548,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + switch (*rc) { + case PCMK_OCF_OK: + if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) { +- remap_because(status, &why,PCMK_EXEC_DONE, "probe"); ++ remap_because(status, &why, PCMK_EXEC_DONE, "probe"); + pe_rsc_info(rsc, "Probe found %s active on %s at %s", + rsc->id, pe__node_name(node), + last_change_str(xml_op)); +-- +2.31.1 + +From 3ef6c84a7b0dd434731e72d91f2724bdb52e292e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Feb 2023 09:42:01 -0600 +Subject: [PATCH 10/14] Refactor: scheduler: improve xpath efficiency when + unpacking + +Using "//" means that every child must be searched recursively. If we know the +exact path, we should explicitly specify it. +--- + lib/pengine/unpack.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index ec2cf26..8aead58 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2571,6 +2571,13 @@ set_node_score(gpointer key, gpointer value, gpointer user_data) + node->weight = *score; + } + ++#define XPATH_NODE_STATE "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ ++ "/" XML_CIB_TAG_STATE ++#define SUB_XPATH_LRM_RESOURCE "/" XML_CIB_TAG_LRM \ ++ "/" XML_LRM_TAG_RESOURCES \ ++ "/" XML_LRM_TAG_RESOURCE ++#define SUB_XPATH_LRM_RSC_OP "/" XML_LRM_TAG_RSC_OP ++ + static xmlNode * + find_lrm_op(const char *resource, const char *op, const char *node, const char *source, + int target_rc, pe_working_set_t *data_set) +@@ -2583,10 +2590,9 @@ find_lrm_op(const char *resource, const char *op, const char *node, const char * + + xpath = g_string_sized_new(256); + pcmk__g_strcat(xpath, +- "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='", node, "']" +- "//" XML_LRM_TAG_RESOURCE +- "[@" XML_ATTR_ID "='", resource, "']" +- "/" XML_LRM_TAG_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'", ++ XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node, "']" ++ SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", resource, "']" ++ SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'", + NULL); + + /* Need to check against transition_magic too? */ +@@ -2631,10 +2637,8 @@ find_lrm_resource(const char *rsc_id, const char *node_name, + + xpath = g_string_sized_new(256); + pcmk__g_strcat(xpath, +- "//" XML_CIB_TAG_STATE +- "[@" XML_ATTR_UNAME "='", node_name, "']" +- "//" XML_LRM_TAG_RESOURCE +- "[@" XML_ATTR_ID "='", rsc_id, "']", ++ XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']" ++ SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc_id, "']", + NULL); + + xml = get_xpath_object((const char *) xpath->str, data_set->input, +-- +2.31.1 + +From 1869f99bc8eeedb976f96f0f1cc3d4dd86735504 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Feb 2023 10:25:53 -0600 +Subject: [PATCH 11/14] Low: scheduler: unknown_on_node() should ignore pending + actions + +Previously, unknown_on_node() looked for any lrm_rsc_op at all to decide +whether a resource is known on a node. However if the only action is pending, +the resource is not yet known. + +Also drop a redundant argument and add a doxygen block. (The rsc argument is +not const due to a getDocPtr() call in the chain, as well as libxml2 calls that +are likely const in practice but aren't marked as such.) +--- + lib/pengine/unpack.c | 37 +++++++++++++++++++++++++------------ + 1 file changed, 25 insertions(+), 12 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 8aead58..14dc202 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2648,19 +2648,32 @@ find_lrm_resource(const char *rsc_id, const char *node_name, + return xml; + } + ++/*! ++ * \internal ++ * \brief Check whether a resource has no completed action history on a node ++ * ++ * \param[in,out] rsc Resource to check ++ * \param[in] node_name Node to check ++ * ++ * \return true if \p rsc_id is unknown on \p node_name, otherwise false ++ */ + static bool +-unknown_on_node(const char *rsc_id, const char *node_name, +- pe_working_set_t *data_set) ++unknown_on_node(pe_resource_t *rsc, const char *node_name) + { +- xmlNode *lrm_resource = NULL; +- +- lrm_resource = find_lrm_resource(rsc_id, node_name, data_set); ++ bool result = false; ++ xmlXPathObjectPtr search; ++ GString *xpath = g_string_sized_new(256); + +- /* If the resource has no lrm_rsc_op history on the node, that means its +- * state is unknown there. +- */ +- return (lrm_resource == NULL +- || first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP) == NULL); ++ pcmk__g_strcat(xpath, ++ XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']" ++ SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc->id, "']" ++ SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_RC "!='193']", ++ NULL); ++ search = xpath_search(rsc->cluster->input, (const char *) xpath->str); ++ result = (numXpathResults(search) == 0); ++ freeXpathObject(search); ++ g_string_free(xpath, TRUE); ++ return result; + } + + /*! +@@ -3027,7 +3040,7 @@ unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + * Don't just consider it running there. We will get back here anyway in + * case the probe detects it's running there. + */ +- !unknown_on_node(rsc->id, target, data_set) ++ !unknown_on_node(rsc, target) + /* If the resource has newer state on the target after the migration + * events, this migrate_to no longer matters for the target. + */ +@@ -3082,7 +3095,7 @@ unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node, + * Don't just consider it running there. We will get back here anyway in + * case the probe detects it's running there. + */ +- !unknown_on_node(rsc->id, source, data_set) ++ !unknown_on_node(rsc, source) + /* If the resource has newer state on the source after the migration + * events, this migrate_from no longer matters for the source. + */ +-- +2.31.1 + +From 22fbab8e0d449d2accb231dfcec94294ded27f4e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 12:11:19 -0600 +Subject: [PATCH 12/14] Test: scheduler: add regression test for migration + intermediary + +As of this commit, the cluster wrongly restarts the migrated resource +--- + cts/cts-scheduler.in | 3 + + .../dot/migration-intermediary-cleaned.dot | 46 ++ + .../exp/migration-intermediary-cleaned.exp | 316 +++++++++++ + .../migration-intermediary-cleaned.scores | 201 +++++++ + .../migration-intermediary-cleaned.summary | 94 ++++ + .../xml/migration-intermediary-cleaned.xml | 513 ++++++++++++++++++ + 6 files changed, 1173 insertions(+) + create mode 100644 cts/scheduler/dot/migration-intermediary-cleaned.dot + create mode 100644 cts/scheduler/exp/migration-intermediary-cleaned.exp + create mode 100644 cts/scheduler/scores/migration-intermediary-cleaned.scores + create mode 100644 cts/scheduler/summary/migration-intermediary-cleaned.summary + create mode 100644 cts/scheduler/xml/migration-intermediary-cleaned.xml + +diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in +index feb5dc8..9899c36 100644 +--- a/cts/cts-scheduler.in ++++ b/cts/cts-scheduler.in +@@ -387,6 +387,9 @@ TESTS = [ + [ "probe-target-of-failed-migrate_to-1", "Failed migrate_to, target rejoins" ], + [ "probe-target-of-failed-migrate_to-2", "Failed migrate_to, target rejoined and probed" ], + [ "partial-live-migration-multiple-active", "Prevent running on multiple nodes due to partial live migration" ], ++ [ "migration-intermediary-cleaned", ++ "Probe live-migration intermediary with no history" ++ ], + [ "bug-lf-2422", "Dependency on partially active group - stop ocfs:*" ], + ], + [ +diff --git a/cts/scheduler/dot/migration-intermediary-cleaned.dot b/cts/scheduler/dot/migration-intermediary-cleaned.dot +new file mode 100644 +index 0000000..09568d0 +--- /dev/null ++++ b/cts/scheduler/dot/migration-intermediary-cleaned.dot +@@ -0,0 +1,46 @@ ++ digraph "g" { ++"Connectivity_running_0" [ style=bold color="green" fontcolor="orange"] ++"Connectivity_start_0" -> "Connectivity_running_0" [ style = bold] ++"Connectivity_start_0" -> "ping-1_start_0 rhel8-2" [ style = bold] ++"Connectivity_start_0" [ style=bold color="green" fontcolor="orange"] ++"FencingFail_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"FencingPass_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"Fencing_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"lsb-dummy_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"migrator_monitor_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] ++"migrator_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"migrator_monitor_10000 rhel8-5" [ style=bold color="green" fontcolor="black"] ++"migrator_start_0 rhel8-5" -> "migrator_monitor_10000 rhel8-5" [ style = bold] ++"migrator_start_0 rhel8-5" [ style=bold color="green" fontcolor="black"] ++"migrator_stop_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] ++"migrator_stop_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"migrator_stop_0 rhel8-5" -> "migrator_start_0 rhel8-5" [ style = bold] ++"migrator_stop_0 rhel8-5" [ style=bold color="green" fontcolor="black"] ++"petulant_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"ping-1_monitor_0 rhel8-2" -> "Connectivity_start_0" [ style = bold] ++"ping-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"ping-1_monitor_60000 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"ping-1_start_0 rhel8-2" -> "Connectivity_running_0" [ style = bold] ++"ping-1_start_0 rhel8-2" -> "ping-1_monitor_60000 rhel8-2" [ style = bold] ++"ping-1_start_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"r192.168.122.207_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"r192.168.122.208_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-1_monitor_0 rhel8-2" -> "rsc_rhel8-1_start_0 rhel8-2" [ style = bold] ++"rsc_rhel8-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-1_monitor_5000 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-1_start_0 rhel8-2" -> "rsc_rhel8-1_monitor_5000 rhel8-2" [ style = bold] ++"rsc_rhel8-1_start_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-1_stop_0 rhel8-3" -> "rsc_rhel8-1_start_0 rhel8-2" [ style = bold] ++"rsc_rhel8-1_stop_0 rhel8-3" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-2_monitor_0 rhel8-2" -> "rsc_rhel8-2_start_0 rhel8-2" [ style = bold] ++"rsc_rhel8-2_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-2_monitor_5000 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-2_start_0 rhel8-2" -> "rsc_rhel8-2_monitor_5000 rhel8-2" [ style = bold] ++"rsc_rhel8-2_start_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-2_stop_0 rhel8-4" -> "rsc_rhel8-2_start_0 rhel8-2" [ style = bold] ++"rsc_rhel8-2_stop_0 rhel8-4" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-3_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-4_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-5_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"stateful-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/exp/migration-intermediary-cleaned.exp b/cts/scheduler/exp/migration-intermediary-cleaned.exp +new file mode 100644 +index 0000000..28fa776 +--- /dev/null ++++ b/cts/scheduler/exp/migration-intermediary-cleaned.exp +@@ -0,0 +1,316 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/scores/migration-intermediary-cleaned.scores b/cts/scheduler/scores/migration-intermediary-cleaned.scores +new file mode 100644 +index 0000000..b3b8dff +--- /dev/null ++++ b/cts/scheduler/scores/migration-intermediary-cleaned.scores +@@ -0,0 +1,201 @@ ++ ++pcmk__clone_allocate: Connectivity allocation score on rhel8-1: 0 ++pcmk__clone_allocate: Connectivity allocation score on rhel8-2: 0 ++pcmk__clone_allocate: Connectivity allocation score on rhel8-3: 0 ++pcmk__clone_allocate: Connectivity allocation score on rhel8-4: 0 ++pcmk__clone_allocate: Connectivity allocation score on rhel8-5: 0 ++pcmk__clone_allocate: ping-1:0 allocation score on rhel8-1: 0 ++pcmk__clone_allocate: ping-1:0 allocation score on rhel8-2: 0 ++pcmk__clone_allocate: ping-1:0 allocation score on rhel8-3: 1 ++pcmk__clone_allocate: ping-1:0 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: ping-1:0 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: ping-1:1 allocation score on rhel8-1: 0 ++pcmk__clone_allocate: ping-1:1 allocation score on rhel8-2: 0 ++pcmk__clone_allocate: ping-1:1 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: ping-1:1 allocation score on rhel8-4: 1 ++pcmk__clone_allocate: ping-1:1 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: ping-1:2 allocation score on rhel8-1: 0 ++pcmk__clone_allocate: ping-1:2 allocation score on rhel8-2: 0 ++pcmk__clone_allocate: ping-1:2 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: ping-1:2 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: ping-1:2 allocation score on rhel8-5: 1 ++pcmk__clone_allocate: ping-1:3 allocation score on rhel8-1: 0 ++pcmk__clone_allocate: ping-1:3 allocation score on rhel8-2: 0 ++pcmk__clone_allocate: ping-1:3 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: ping-1:3 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: ping-1:3 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: ping-1:4 allocation score on rhel8-1: 0 ++pcmk__clone_allocate: ping-1:4 allocation score on rhel8-2: 0 ++pcmk__clone_allocate: ping-1:4 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: ping-1:4 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: ping-1:4 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: promotable-1 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: promotable-1 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: promotable-1 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: promotable-1 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: promotable-1 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-3: 11 ++pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-4: 6 ++pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-5: 6 ++pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-3: 10 ++pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-4: 5 ++pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-5: 5 ++pcmk__group_assign: group-1 allocation score on rhel8-1: 0 ++pcmk__group_assign: group-1 allocation score on rhel8-2: 0 ++pcmk__group_assign: group-1 allocation score on rhel8-3: 0 ++pcmk__group_assign: group-1 allocation score on rhel8-4: 0 ++pcmk__group_assign: group-1 allocation score on rhel8-5: 0 ++pcmk__group_assign: petulant allocation score on rhel8-1: 0 ++pcmk__group_assign: petulant allocation score on rhel8-2: 0 ++pcmk__group_assign: petulant allocation score on rhel8-3: 0 ++pcmk__group_assign: petulant allocation score on rhel8-4: 0 ++pcmk__group_assign: petulant allocation score on rhel8-5: 0 ++pcmk__group_assign: r192.168.122.207 allocation score on rhel8-1: 0 ++pcmk__group_assign: r192.168.122.207 allocation score on rhel8-2: 0 ++pcmk__group_assign: r192.168.122.207 allocation score on rhel8-3: 0 ++pcmk__group_assign: r192.168.122.207 allocation score on rhel8-4: 0 ++pcmk__group_assign: r192.168.122.207 allocation score on rhel8-5: 0 ++pcmk__group_assign: r192.168.122.208 allocation score on rhel8-1: 0 ++pcmk__group_assign: r192.168.122.208 allocation score on rhel8-2: 0 ++pcmk__group_assign: r192.168.122.208 allocation score on rhel8-3: 0 ++pcmk__group_assign: r192.168.122.208 allocation score on rhel8-4: 0 ++pcmk__group_assign: r192.168.122.208 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-1: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-2: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-3: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-4: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-5: 0 ++pcmk__primitive_assign: FencingFail allocation score on rhel8-1: 0 ++pcmk__primitive_assign: FencingFail allocation score on rhel8-2: 0 ++pcmk__primitive_assign: FencingFail allocation score on rhel8-3: 0 ++pcmk__primitive_assign: FencingFail allocation score on rhel8-4: 0 ++pcmk__primitive_assign: FencingFail allocation score on rhel8-5: 0 ++pcmk__primitive_assign: FencingPass allocation score on rhel8-1: 0 ++pcmk__primitive_assign: FencingPass allocation score on rhel8-2: 0 ++pcmk__primitive_assign: FencingPass allocation score on rhel8-3: 0 ++pcmk__primitive_assign: FencingPass allocation score on rhel8-4: 0 ++pcmk__primitive_assign: FencingPass allocation score on rhel8-5: 0 ++pcmk__primitive_assign: lsb-dummy allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: lsb-dummy allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: lsb-dummy allocation score on rhel8-3: 0 ++pcmk__primitive_assign: lsb-dummy allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: lsb-dummy allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: migrator allocation score on rhel8-1: 0 ++pcmk__primitive_assign: migrator allocation score on rhel8-2: 0 ++pcmk__primitive_assign: migrator allocation score on rhel8-3: 0 ++pcmk__primitive_assign: migrator allocation score on rhel8-4: 0 ++pcmk__primitive_assign: migrator allocation score on rhel8-5: 0 ++pcmk__primitive_assign: petulant allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: petulant allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: petulant allocation score on rhel8-3: 0 ++pcmk__primitive_assign: petulant allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: petulant allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: ping-1:0 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: ping-1:0 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: ping-1:0 allocation score on rhel8-3: 1 ++pcmk__primitive_assign: ping-1:0 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: ping-1:0 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: ping-1:1 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: ping-1:1 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: ping-1:1 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: ping-1:1 allocation score on rhel8-4: 1 ++pcmk__primitive_assign: ping-1:1 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: ping-1:2 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: ping-1:2 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: ping-1:2 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: ping-1:2 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: ping-1:2 allocation score on rhel8-5: 1 ++pcmk__primitive_assign: ping-1:3 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: ping-1:3 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: ping-1:3 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: ping-1:3 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: ping-1:3 allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: ping-1:4 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: ping-1:4 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: ping-1:4 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: ping-1:4 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: ping-1:4 allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-3: 11 ++pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-3: 0 ++pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-1: 100 ++pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-3: 0 ++pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-1: 0 ++pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-2: 100 ++pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-3: 0 ++pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-1: 0 ++pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-3: 100 ++pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-1: 0 ++pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-3: 0 ++pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-4: 100 ++pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-1: 0 ++pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-3: 0 ++pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-5: 100 ++pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-3: 11 ++pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-4: 6 ++pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-5: 6 ++pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-5: -INFINITY ++stateful-1:0 promotion score on rhel8-3: 10 ++stateful-1:1 promotion score on rhel8-4: 5 ++stateful-1:2 promotion score on rhel8-5: 5 ++stateful-1:3 promotion score on none: 0 ++stateful-1:4 promotion score on none: 0 +diff --git a/cts/scheduler/summary/migration-intermediary-cleaned.summary b/cts/scheduler/summary/migration-intermediary-cleaned.summary +new file mode 100644 +index 0000000..5de1355 +--- /dev/null ++++ b/cts/scheduler/summary/migration-intermediary-cleaned.summary +@@ -0,0 +1,94 @@ ++Using the original execution date of: 2023-01-19 21:05:59Z ++Current cluster status: ++ * Node List: ++ * Online: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] ++ * OFFLINE: [ rhel8-1 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started rhel8-3 ++ * FencingPass (stonith:fence_dummy): Started rhel8-4 ++ * FencingFail (stonith:fence_dummy): Started rhel8-5 ++ * rsc_rhel8-1 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * rsc_rhel8-2 (ocf:heartbeat:IPaddr2): Started rhel8-4 ++ * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 ++ * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 ++ * migrator (ocf:pacemaker:Dummy): Started [ rhel8-5 rhel8-2 ] ++ * Clone Set: Connectivity [ping-1]: ++ * Started: [ rhel8-3 rhel8-4 rhel8-5 ] ++ * Stopped: [ rhel8-1 rhel8-2 ] ++ * Clone Set: promotable-1 [stateful-1] (promotable): ++ * Promoted: [ rhel8-3 ] ++ * Unpromoted: [ rhel8-4 rhel8-5 ] ++ * Stopped: [ rhel8-1 rhel8-2 ] ++ * Resource Group: group-1: ++ * r192.168.122.207 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * petulant (service:pacemaker-cts-dummyd@10): Started rhel8-3 ++ * r192.168.122.208 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * lsb-dummy (lsb:LSBDummy): Started rhel8-3 ++ ++Transition Summary: ++ * Move rsc_rhel8-1 ( rhel8-3 -> rhel8-2 ) ++ * Move rsc_rhel8-2 ( rhel8-4 -> rhel8-2 ) ++ * Restart migrator ( rhel8-5 ) ++ * Start ping-1:3 ( rhel8-2 ) ++ ++Executing Cluster Transition: ++ * Resource action: Fencing monitor on rhel8-2 ++ * Resource action: FencingPass monitor on rhel8-2 ++ * Resource action: FencingFail monitor on rhel8-2 ++ * Resource action: rsc_rhel8-1 stop on rhel8-3 ++ * Resource action: rsc_rhel8-1 monitor on rhel8-2 ++ * Resource action: rsc_rhel8-2 stop on rhel8-4 ++ * Resource action: rsc_rhel8-2 monitor on rhel8-2 ++ * Resource action: rsc_rhel8-3 monitor on rhel8-2 ++ * Resource action: rsc_rhel8-4 monitor on rhel8-2 ++ * Resource action: rsc_rhel8-5 monitor on rhel8-2 ++ * Resource action: migrator stop on rhel8-2 ++ * Resource action: migrator stop on rhel8-5 ++ * Resource action: migrator monitor on rhel8-2 ++ * Resource action: ping-1 monitor on rhel8-2 ++ * Pseudo action: Connectivity_start_0 ++ * Resource action: stateful-1 monitor on rhel8-2 ++ * Resource action: r192.168.122.207 monitor on rhel8-2 ++ * Resource action: petulant monitor on rhel8-2 ++ * Resource action: r192.168.122.208 monitor on rhel8-2 ++ * Resource action: lsb-dummy monitor on rhel8-2 ++ * Resource action: rsc_rhel8-1 start on rhel8-2 ++ * Resource action: rsc_rhel8-2 start on rhel8-2 ++ * Resource action: migrator start on rhel8-5 ++ * Resource action: migrator monitor=10000 on rhel8-5 ++ * Resource action: ping-1 start on rhel8-2 ++ * Pseudo action: Connectivity_running_0 ++ * Resource action: rsc_rhel8-1 monitor=5000 on rhel8-2 ++ * Resource action: rsc_rhel8-2 monitor=5000 on rhel8-2 ++ * Resource action: ping-1 monitor=60000 on rhel8-2 ++Using the original execution date of: 2023-01-19 21:05:59Z ++ ++Revised Cluster Status: ++ * Node List: ++ * Online: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] ++ * OFFLINE: [ rhel8-1 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started rhel8-3 ++ * FencingPass (stonith:fence_dummy): Started rhel8-4 ++ * FencingFail (stonith:fence_dummy): Started rhel8-5 ++ * rsc_rhel8-1 (ocf:heartbeat:IPaddr2): Started rhel8-2 ++ * rsc_rhel8-2 (ocf:heartbeat:IPaddr2): Started rhel8-2 ++ * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 ++ * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 ++ * migrator (ocf:pacemaker:Dummy): Started [ rhel8-2 rhel8-5 ] ++ * Clone Set: Connectivity [ping-1]: ++ * Started: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] ++ * Stopped: [ rhel8-1 ] ++ * Clone Set: promotable-1 [stateful-1] (promotable): ++ * Promoted: [ rhel8-3 ] ++ * Unpromoted: [ rhel8-4 rhel8-5 ] ++ * Stopped: [ rhel8-1 rhel8-2 ] ++ * Resource Group: group-1: ++ * r192.168.122.207 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * petulant (service:pacemaker-cts-dummyd@10): Started rhel8-3 ++ * r192.168.122.208 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * lsb-dummy (lsb:LSBDummy): Started rhel8-3 +diff --git a/cts/scheduler/xml/migration-intermediary-cleaned.xml b/cts/scheduler/xml/migration-intermediary-cleaned.xml +new file mode 100644 +index 0000000..bec7888 +--- /dev/null ++++ b/cts/scheduler/xml/migration-intermediary-cleaned.xml +@@ -0,0 +1,513 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +2.31.1 + +From 1f9fadbb06baded3fc393cfe30a0cb620aca0829 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 1 Feb 2023 17:12:13 -0600 +Subject: [PATCH 13/14] Fix: scheduler: handle cleaned migrate_from history + correctly + +Fixes T623 +--- + lib/pengine/unpack.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 14dc202..9c99183 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2990,6 +2990,15 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op) + + // The migrate_from is pending, complete but erased, or to be scheduled + ++ /* If there is no history at all for the resource on an online target, then ++ * it was likely cleaned. Just return, and we'll schedule a probe. Once we ++ * have the probe result, it will be reflected in target_newer_state. ++ */ ++ if ((target_node != NULL) && target_node->details->online ++ && unknown_on_node(rsc, target)) { ++ return; ++ } ++ + if (active_on_target) { + pe_node_t *source_node = pe_find_node(rsc->cluster->nodes, source); + +-- +2.31.1 + +From d9d1bf19e8522ea29c87f0c39b05828947bc5b0f Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Feb 2023 15:48:01 -0600 +Subject: [PATCH 14/14] Test: scheduler: update expected output for migration + fix + +--- + .../dot/migration-intermediary-cleaned.dot | 8 -- + .../exp/migration-intermediary-cleaned.exp | 88 ++++--------------- + .../migration-intermediary-cleaned.scores | 2 +- + .../migration-intermediary-cleaned.summary | 9 +- + 4 files changed, 22 insertions(+), 85 deletions(-) + +diff --git a/cts/scheduler/dot/migration-intermediary-cleaned.dot b/cts/scheduler/dot/migration-intermediary-cleaned.dot +index 09568d0..f6eabba 100644 +--- a/cts/scheduler/dot/migration-intermediary-cleaned.dot ++++ b/cts/scheduler/dot/migration-intermediary-cleaned.dot +@@ -7,15 +7,7 @@ + "FencingPass_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] + "Fencing_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] + "lsb-dummy_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] +-"migrator_monitor_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] + "migrator_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] +-"migrator_monitor_10000 rhel8-5" [ style=bold color="green" fontcolor="black"] +-"migrator_start_0 rhel8-5" -> "migrator_monitor_10000 rhel8-5" [ style = bold] +-"migrator_start_0 rhel8-5" [ style=bold color="green" fontcolor="black"] +-"migrator_stop_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] +-"migrator_stop_0 rhel8-2" [ style=bold color="green" fontcolor="black"] +-"migrator_stop_0 rhel8-5" -> "migrator_start_0 rhel8-5" [ style = bold] +-"migrator_stop_0 rhel8-5" [ style=bold color="green" fontcolor="black"] + "petulant_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] + "ping-1_monitor_0 rhel8-2" -> "Connectivity_start_0" [ style = bold] + "ping-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] +diff --git a/cts/scheduler/exp/migration-intermediary-cleaned.exp b/cts/scheduler/exp/migration-intermediary-cleaned.exp +index 28fa776..8b9bb39 100644 +--- a/cts/scheduler/exp/migration-intermediary-cleaned.exp ++++ b/cts/scheduler/exp/migration-intermediary-cleaned.exp +@@ -148,91 +148,41 @@ + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- + + + +- ++ + + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + +- ++ + + + + + + +- ++ + + + +- ++ + +- ++ + + + + + + +- ++ + + + +- ++ + + + +@@ -241,24 +191,24 @@ + + + +- ++ + +- ++ + + + + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + +@@ -268,7 +218,7 @@ + + + +- ++ + + + +@@ -277,7 +227,7 @@ + + + +- ++ + + + +@@ -286,7 +236,7 @@ + + + +- ++ + + + +@@ -295,7 +245,7 @@ + + + +- ++ + + + +@@ -304,7 +254,7 @@ + + + +- ++ + + + +diff --git a/cts/scheduler/scores/migration-intermediary-cleaned.scores b/cts/scheduler/scores/migration-intermediary-cleaned.scores +index b3b8dff..09f05d1 100644 +--- a/cts/scheduler/scores/migration-intermediary-cleaned.scores ++++ b/cts/scheduler/scores/migration-intermediary-cleaned.scores +@@ -103,7 +103,7 @@ pcmk__primitive_assign: migrator allocation score on rhel8-1: 0 + pcmk__primitive_assign: migrator allocation score on rhel8-2: 0 + pcmk__primitive_assign: migrator allocation score on rhel8-3: 0 + pcmk__primitive_assign: migrator allocation score on rhel8-4: 0 +-pcmk__primitive_assign: migrator allocation score on rhel8-5: 0 ++pcmk__primitive_assign: migrator allocation score on rhel8-5: 1 + pcmk__primitive_assign: petulant allocation score on rhel8-1: -INFINITY + pcmk__primitive_assign: petulant allocation score on rhel8-2: -INFINITY + pcmk__primitive_assign: petulant allocation score on rhel8-3: 0 +diff --git a/cts/scheduler/summary/migration-intermediary-cleaned.summary b/cts/scheduler/summary/migration-intermediary-cleaned.summary +index 5de1355..dd127a8 100644 +--- a/cts/scheduler/summary/migration-intermediary-cleaned.summary ++++ b/cts/scheduler/summary/migration-intermediary-cleaned.summary +@@ -13,7 +13,7 @@ Current cluster status: + * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 + * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 + * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 +- * migrator (ocf:pacemaker:Dummy): Started [ rhel8-5 rhel8-2 ] ++ * migrator (ocf:pacemaker:Dummy): Started rhel8-5 + * Clone Set: Connectivity [ping-1]: + * Started: [ rhel8-3 rhel8-4 rhel8-5 ] + * Stopped: [ rhel8-1 rhel8-2 ] +@@ -30,7 +30,6 @@ Current cluster status: + Transition Summary: + * Move rsc_rhel8-1 ( rhel8-3 -> rhel8-2 ) + * Move rsc_rhel8-2 ( rhel8-4 -> rhel8-2 ) +- * Restart migrator ( rhel8-5 ) + * Start ping-1:3 ( rhel8-2 ) + + Executing Cluster Transition: +@@ -44,8 +43,6 @@ Executing Cluster Transition: + * Resource action: rsc_rhel8-3 monitor on rhel8-2 + * Resource action: rsc_rhel8-4 monitor on rhel8-2 + * Resource action: rsc_rhel8-5 monitor on rhel8-2 +- * Resource action: migrator stop on rhel8-2 +- * Resource action: migrator stop on rhel8-5 + * Resource action: migrator monitor on rhel8-2 + * Resource action: ping-1 monitor on rhel8-2 + * Pseudo action: Connectivity_start_0 +@@ -56,8 +53,6 @@ Executing Cluster Transition: + * Resource action: lsb-dummy monitor on rhel8-2 + * Resource action: rsc_rhel8-1 start on rhel8-2 + * Resource action: rsc_rhel8-2 start on rhel8-2 +- * Resource action: migrator start on rhel8-5 +- * Resource action: migrator monitor=10000 on rhel8-5 + * Resource action: ping-1 start on rhel8-2 + * Pseudo action: Connectivity_running_0 + * Resource action: rsc_rhel8-1 monitor=5000 on rhel8-2 +@@ -79,7 +74,7 @@ Revised Cluster Status: + * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 + * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 + * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 +- * migrator (ocf:pacemaker:Dummy): Started [ rhel8-2 rhel8-5 ] ++ * migrator (ocf:pacemaker:Dummy): Started rhel8-5 + * Clone Set: Connectivity [ping-1]: + * Started: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] + * Stopped: [ rhel8-1 ] +-- +2.31.1 + diff --git a/SOURCES/003-regression.patch b/SOURCES/003-regression.patch deleted file mode 100644 index 0185c2d..0000000 --- a/SOURCES/003-regression.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 9853f4d05a376062d60f2e4c90938e587992237b Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 27 Jun 2022 12:06:24 -0400 -Subject: [PATCH 1/2] Fix: tools: Don't output "(null)" in crm_attribute's - quiet mode. - -If the attribute queried for has no value, simply do not output -anything. - -Regression in 2.1.3 introduced by 8c03553bbf - -Fixes T502 -See: rhbz#2099331 ---- - tools/crm_attribute.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/tools/crm_attribute.c b/tools/crm_attribute.c -index 0bd9dee81..b1463f906 100644 ---- a/tools/crm_attribute.c -+++ b/tools/crm_attribute.c -@@ -56,7 +56,9 @@ attribute_text(pcmk__output_t *out, va_list args) - char *host G_GNUC_UNUSED = va_arg(args, char *); - - if (out->quiet) { -- pcmk__formatted_printf(out, "%s\n", value); -+ if (value != NULL) { -+ pcmk__formatted_printf(out, "%s\n", value); -+ } - } else { - out->info(out, "%s%s %s%s %s%s value=%s", - scope ? "scope=" : "", scope ? scope : "", --- -2.31.1 - - -From 16d00a9b3ef27afd09f5c046ea1be50fc664ed84 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 27 Jun 2022 12:18:06 -0400 -Subject: [PATCH 2/2] Test: cts: Add a test for querying an attribute that does - not exist. - ---- - cts/cli/regression.tools.exp | 4 ++++ - cts/cts-cli.in | 5 +++++ - 2 files changed, 9 insertions(+) - -diff --git a/cts/cli/regression.tools.exp b/cts/cli/regression.tools.exp -index 0d1cfa2ab..464472d42 100644 ---- a/cts/cli/regression.tools.exp -+++ b/cts/cli/regression.tools.exp -@@ -24,6 +24,10 @@ A new shadow instance was created. To begin using it paste the following into y - - =#=#=#= End test: Validate CIB - OK (0) =#=#=#= - * Passed: cibadmin - Validate CIB -+=#=#=#= Begin test: Query the value of an attribute that does not exist =#=#=#= -+crm_attribute: Error performing operation: No such device or address -+=#=#=#= End test: Query the value of an attribute that does not exist - No such object (105) =#=#=#= -+* Passed: crm_attribute - Query the value of an attribute that does not exist - =#=#=#= Begin test: Configure something before erasing =#=#=#= - =#=#=#= Current cib after: Configure something before erasing =#=#=#= - -diff --git a/cts/cts-cli.in b/cts/cts-cli.in -index 8565c485a..b895d36ec 100755 ---- a/cts/cts-cli.in -+++ b/cts/cts-cli.in -@@ -511,6 +511,10 @@ function test_tools() { - cmd="cibadmin -Q" - test_assert $CRM_EX_OK - -+ desc="Query the value of an attribute that does not exist" -+ cmd="crm_attribute -n ABCD --query --quiet" -+ test_assert $CRM_EX_NOSUCH 0 -+ - desc="Configure something before erasing" - cmd="crm_attribute -n cluster-delay -v 60s" - test_assert $CRM_EX_OK -@@ -1980,6 +1984,7 @@ for t in $tests; do - -e 's/ end=\"[0-9][-+: 0-9]*Z*\"/ end=\"\"/' \ - -e 's/ start=\"[0-9][-+: 0-9]*Z*\"/ start=\"\"/' \ - -e 's/^Error checking rule: Device not configured/Error checking rule: No such device or address/' \ -+ -e 's/Error performing operation: Device not configured/Error performing operation: No such device or address/' \ - -e 's/\(Injecting attribute last-failure-ping#monitor_10000=\)[0-9]*/\1/' \ - -e 's/^lt-//' \ - -e 's/ocf::/ocf:/' \ --- -2.31.1 - diff --git a/SOURCES/004-g_source_remove.patch b/SOURCES/004-g_source_remove.patch new file mode 100644 index 0000000..2af0f47 --- /dev/null +++ b/SOURCES/004-g_source_remove.patch @@ -0,0 +1,107 @@ +From 45617b727e280cac384a28ae3d96145e066e6197 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Fri, 3 Feb 2023 12:08:57 -0800 +Subject: [PATCH 01/02] Fix: fencer: Prevent double g_source_remove of op_timer_one + +QE observed a rarely reproducible core dump in the fencer during +Pacemaker shutdown, in which we try to g_source_remove() an op timer +that's already been removed. + +free_stonith_remote_op_list() +-> g_hash_table_destroy() +-> g_hash_table_remove_all_nodes() +-> clear_remote_op_timers() +-> g_source_remove() +-> crm_glib_handler() +-> "Source ID 190 was not found when attempting to remove it" + +The likely cause is that request_peer_fencing() doesn't set +op->op_timer_one to 0 after calling g_source_remove() on it, so if that +op is still in the stonith_remote_op_list at shutdown with the same +timer, clear_remote_op_timers() tries to remove the source for +op_timer_one again. + +There are only five locations that call g_source_remove() on a +remote_fencing_op_t timer. +* Three of them are in clear_remote_op_timers(), which first 0-checks + the timer and then sets it to 0 after g_source_remove(). +* One is in remote_op_query_timeout(), which does the same. +* The last is the one we fix here in request_peer_fencing(). + +I don't know all the conditions of QE's test scenario at this point. +What I do know: +* have-watchdog=true +* stonith-watchdog-timeout=10 +* no explicit topology +* fence agent script is missing for the configured fence device +* requested fencing of one node +* cluster shutdown + +Fixes RHBZ2166967 + +Signed-off-by: Reid Wahl +--- + daemons/fenced/fenced_remote.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index d61b5bd..b7426ff 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1825,6 +1825,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) + op->state = st_exec; + if (op->op_timer_one) { + g_source_remove(op->op_timer_one); ++ op->op_timer_one = 0; + } + + if (!((stonith_watchdog_timeout_ms > 0) +-- +2.31.1 + +From 0291db4750322ec7f01ae6a4a2a30abca9d8e19e Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Wed, 15 Feb 2023 22:30:27 -0800 +Subject: [PATCH 02/02] Fix: fencer: Avoid double source remove of op_timer_total + +remote_op_timeout() returns G_SOURCE_REMOVE, which tells GLib to remove +the source from the main loop after returning. Currently this function +is used as the callback only when creating op->op_timer_total. + +If we don't set op->op_timer_total to 0 before returning from +remote_op_timeout(), then we can get an assertion and core dump from +GLib when the op's timers are being cleared (either during op +finalization or during fencer shutdown). This is because +clear_remote_op_timers() sees that op->op_timer_total != 0 and tries to +remove the source, but the source has already been removed. + +Note that we're already (correctly) zeroing op->op_timer_one and +op->query_timeout as appropriate in their respective callback functions. + +Fortunately, GLib doesn't care whether the source has already been +removed before we return G_SOURCE_REMOVE from a callback. So it's safe +to call finalize_op() (which removes all the op's timer sources) from +within a callback. + +Fixes RHBZ#2166967 + +Signed-off-by: Reid Wahl +--- + daemons/fenced/fenced_remote.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index b7426ff88..adea3d7d8 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -718,6 +718,8 @@ remote_op_timeout(gpointer userdata) + { + remote_fencing_op_t *op = userdata; + ++ op->op_timer_total = 0; ++ + if (op->state == st_done) { + crm_debug("Action '%s' targeting %s for client %s already completed " + CRM_XS " id=%.8s", +-- +2.39.0 diff --git a/SOURCES/004-schema.patch b/SOURCES/004-schema.patch deleted file mode 100644 index 2632a9d..0000000 --- a/SOURCES/004-schema.patch +++ /dev/null @@ -1,624 +0,0 @@ -From e8f96dec79bb33c11d39c9037ac623f18a67b539 Mon Sep 17 00:00:00 2001 -From: Petr Pavlu -Date: Tue, 24 May 2022 18:08:57 +0200 -Subject: [PATCH] Low: schemas: copy API schemas in preparation for changes - -Copy crm_mon, crm_simulate and nodes API schemas in preparation for -changes and bump the external reference version in crm_mon and -crm_simulate to point to the new nodes schema. ---- - include/crm/common/output_internal.h | 2 +- - xml/api/crm_mon-2.21.rng | 183 +++++++++++++++ - xml/api/crm_simulate-2.21.rng | 338 +++++++++++++++++++++++++++ - xml/api/nodes-2.21.rng | 51 ++++ - 4 files changed, 573 insertions(+), 1 deletion(-) - create mode 100644 xml/api/crm_mon-2.21.rng - create mode 100644 xml/api/crm_simulate-2.21.rng - create mode 100644 xml/api/nodes-2.21.rng - -diff --git a/include/crm/common/output_internal.h b/include/crm/common/output_internal.h -index 577fd5247..74ee833c1 100644 ---- a/include/crm/common/output_internal.h -+++ b/include/crm/common/output_internal.h -@@ -28,7 +28,7 @@ extern "C" { - */ - - --# define PCMK__API_VERSION "2.20" -+# define PCMK__API_VERSION "2.21" - - #if defined(PCMK__WITH_ATTRIBUTE_OUTPUT_ARGS) - # define PCMK__OUTPUT_ARGS(ARGS...) __attribute__((output_args(ARGS))) -diff --git a/xml/api/crm_mon-2.21.rng b/xml/api/crm_mon-2.21.rng -new file mode 100644 -index 000000000..37036d665 ---- /dev/null -+++ b/xml/api/crm_mon-2.21.rng -@@ -0,0 +1,183 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ granted -+ revoked -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/xml/api/crm_simulate-2.21.rng b/xml/api/crm_simulate-2.21.rng -new file mode 100644 -index 000000000..75a9b399b ---- /dev/null -+++ b/xml/api/crm_simulate-2.21.rng -@@ -0,0 +1,338 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/xml/api/nodes-2.21.rng b/xml/api/nodes-2.21.rng -new file mode 100644 -index 000000000..df4c77f37 ---- /dev/null -+++ b/xml/api/nodes-2.21.rng -@@ -0,0 +1,51 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ red -+ yellow -+ green -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ unknown -+ member -+ remote -+ ping -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ --- -2.31.1 - diff --git a/SOURCES/005-query-null.patch b/SOURCES/005-query-null.patch new file mode 100644 index 0000000..194cd33 --- /dev/null +++ b/SOURCES/005-query-null.patch @@ -0,0 +1,151 @@ +From 0d15568a538349ac41028db6b506d13dd23e8732 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 14 Feb 2023 14:00:37 -0500 +Subject: [PATCH] High: libcrmcommon: Fix handling node=NULL in + pcmk__attrd_api_query. + +According to the header file, if node is NULL, pcmk__attrd_api_query +should query the value of the given attribute on all cluster nodes. +This is also what the server expects and how attrd_updater is supposed +to work. + +However, pcmk__attrd_api_query has no way of letting callers decide +whether they want to query all nodes or whether they want to use the +local node. We were passing NULL for the node name, which it took to +mean it should look up the local node name. This calls +pcmk__node_attr_target, which probes the local cluster name and returns +that to pcmk__attrd_api_query. If it returns non-NULL, that value will +then be put into the XML IPC call which means the server will only +return the value for that node. + +In testing this was usually fine. However, in pratice, the methods +pcmk__node_attr_target uses to figure out the local cluster node name +involves checking the OCF_RESKEY_CRM_meta_on_node environment variable +among others. + +This variable was never set in testing, but can be set in the real +world. This leads to circumstances where the user did "attrd_updater -QA" +expecting to get the values on all nodes, but instead only got the value +on the local cluster node. + +In pacemaker-2.1.4 and prior, pcmk__node_attr_target was simply never +called if the node was NULL but was called otherwise. + +The fix is to modify pcmk__attrd_api_query to take an option for +querying all nodes. If that's present, we'll query all nodes. If it's +not present, we'll look at the given node name - NULL means look it up, +anything else means just that node. + +Regression in 2.1.5 introduced by eb20a65577 +--- + include/crm/common/attrd_internal.h | 6 +++++- + include/crm/common/ipc_attrd_internal.h | 7 +++++-- + lib/common/ipc_attrd.c | 12 ++++++++---- + tools/attrd_updater.c | 5 +++-- + 4 files changed, 21 insertions(+), 9 deletions(-) + +diff --git a/include/crm/common/attrd_internal.h b/include/crm/common/attrd_internal.h +index 389be48..7337c38 100644 +--- a/include/crm/common/attrd_internal.h ++++ b/include/crm/common/attrd_internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2022 the Pacemaker project contributors ++ * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -25,6 +25,10 @@ enum pcmk__node_attr_opts { + pcmk__node_attr_perm = (1 << 5), + pcmk__node_attr_sync_local = (1 << 6), + pcmk__node_attr_sync_cluster = (1 << 7), ++ // pcmk__node_attr_utilization is 8, but that has not been backported. ++ // I'm leaving the gap here in case we backport that in the future and ++ // also to avoid problems on mixed-version clusters. ++ pcmk__node_attr_query_all = (1 << 9), + }; + + #define pcmk__set_node_attr_flags(node_attr_flags, flags_to_set) do { \ +diff --git a/include/crm/common/ipc_attrd_internal.h b/include/crm/common/ipc_attrd_internal.h +index 2c6713f..b1b7584 100644 +--- a/include/crm/common/ipc_attrd_internal.h ++++ b/include/crm/common/ipc_attrd_internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2022 the Pacemaker project contributors ++ * Copyright 2022-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -110,10 +110,13 @@ int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node); + * + * \param[in,out] api Connection to pacemaker-attrd + * \param[in] node Look up the attribute for this node +- * (or NULL for all nodes) ++ * (or NULL for the local node) + * \param[in] name Attribute name + * \param[in] options Bitmask of pcmk__node_attr_opts + * ++ * \note Passing pcmk__node_attr_query_all will cause the function to query ++ * the value of \p name on all nodes, regardless of the value of \p node. ++ * + * \return Standard Pacemaker return code + */ + int pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, +diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c +index 4606509..dece49b 100644 +--- a/lib/common/ipc_attrd.c ++++ b/lib/common/ipc_attrd.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2011-2022 the Pacemaker project contributors ++ * Copyright 2011-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -332,10 +332,14 @@ pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, + return EINVAL; + } + +- target = pcmk__node_attr_target(node); ++ if (pcmk_is_set(options, pcmk__node_attr_query_all)) { ++ node = NULL; ++ } else { ++ target = pcmk__node_attr_target(node); + +- if (target != NULL) { +- node = target; ++ if (target != NULL) { ++ node = target; ++ } + } + + request = create_attrd_op(NULL); +diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c +index 3cd766d..cbd341d 100644 +--- a/tools/attrd_updater.c ++++ b/tools/attrd_updater.c +@@ -376,6 +376,7 @@ attrd_event_cb(pcmk_ipc_api_t *attrd_api, enum pcmk_ipc_event event_type, + static int + send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_node, gboolean query_all) + { ++ uint32_t options = pcmk__node_attr_none; + pcmk_ipc_api_t *attrd_api = NULL; + int rc = pcmk_rc_ok; + +@@ -400,10 +401,10 @@ send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_no + + /* Decide which node(s) to query */ + if (query_all == TRUE) { +- attr_node = NULL; ++ options |= pcmk__node_attr_query_all; + } + +- rc = pcmk__attrd_api_query(attrd_api, attr_node, attr_name, 0); ++ rc = pcmk__attrd_api_query(attrd_api, attr_node, attr_name, options); + + if (rc != pcmk_rc_ok) { + g_set_error(&error, PCMK__RC_ERROR, rc, "Could not query value of %s: %s (%d)", +-- +2.31.1 + diff --git a/SOURCES/005-schema.patch b/SOURCES/005-schema.patch deleted file mode 100644 index 57f6309..0000000 --- a/SOURCES/005-schema.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 5b6280ac1a213e176aee6d61945b3283ea060a88 Mon Sep 17 00:00:00 2001 -From: Petr Pavlu -Date: Tue, 24 May 2022 18:02:31 +0200 -Subject: [PATCH] Feature: tools: report CRM feature set of nodes by crm_mon - -Enable crm_mon to report when CRM feature set is not consistent among -online nodes and output a version of each node if --show-detail is -specified. ---- - xml/api/crm_mon-2.21.rng | 3 + - xml/api/nodes-2.21.rng | 3 + - 9 files changed, 508 insertions(+), 125 deletions(-) - create mode 100644 cts/cli/crm_mon-feature_set.xml - create mode 100644 cts/cli/regression.feature_set.exp - -diff --git a/xml/api/crm_mon-2.21.rng b/xml/api/crm_mon-2.21.rng -index 37036d665..e99bcc3d7 100644 ---- a/xml/api/crm_mon-2.21.rng -+++ b/xml/api/crm_mon-2.21.rng -@@ -54,6 +54,9 @@ - - - -+ -+ -+ - - - -diff --git a/xml/api/nodes-2.21.rng b/xml/api/nodes-2.21.rng -index df4c77f37..7e236ba63 100644 ---- a/xml/api/nodes-2.21.rng -+++ b/xml/api/nodes-2.21.rng -@@ -25,6 +25,9 @@ - - - -+ -+ -+ - - - --- -2.31.1 - diff --git a/SOURCES/006-crm_resource.patch b/SOURCES/006-crm_resource.patch deleted file mode 100644 index 577264b..0000000 --- a/SOURCES/006-crm_resource.patch +++ /dev/null @@ -1,1686 +0,0 @@ -From a467f0953c61bd56a9b34a98c71855d3cfbf6ba4 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 5 Apr 2022 16:26:30 -0500 -Subject: [PATCH 01/14] Refactor: tools: use a flag to indicate locked - resources in crm_resource - -... to make the handling consistent with other checks. This also allows some of -the code to be simplified. ---- - tools/crm_resource.h | 13 +++++++++---- - tools/crm_resource_print.c | 21 ++++++++------------- - tools/crm_resource_runtime.c | 7 +++---- - 3 files changed, 20 insertions(+), 21 deletions(-) - -diff --git a/tools/crm_resource.h b/tools/crm_resource.h -index 71a978893..b5fdd1bb5 100644 ---- a/tools/crm_resource.h -+++ b/tools/crm_resource.h -@@ -8,6 +8,10 @@ - */ - - #include -+ -+#include -+#include -+ - #include - - #include -@@ -31,13 +35,14 @@ typedef struct node_info_s { - enum resource_check_flags { - rsc_remain_stopped = (1 << 0), - rsc_unpromotable = (1 << 1), -- rsc_unmanaged = (1 << 2) -+ rsc_unmanaged = (1 << 2), -+ rsc_locked = (1 << 3), - }; - - typedef struct resource_checks_s { -- pe_resource_t *rsc; -- unsigned int flags; -- const char *lock_node; -+ pe_resource_t *rsc; // Resource being checked -+ uint32_t flags; // Group of enum resource_check_flags -+ const char *lock_node; // Node that resource is shutdown-locked to, if any - } resource_checks_t; - - resource_checks_t *cli_check_resource(pe_resource_t *rsc, char *role_s, char *managed); -diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c -index 5abf3df0c..f63fc952d 100644 ---- a/tools/crm_resource_print.c -+++ b/tools/crm_resource_print.c -@@ -450,14 +450,13 @@ resource_check_list_default(pcmk__output_t *out, va_list args) { - resource_checks_t *checks = va_arg(args, resource_checks_t *); - - pe_resource_t *parent = uber_parent(checks->rsc); -- int rc = pcmk_rc_no_output; -- bool printed = false; - -- if (checks->flags != 0 || checks->lock_node != NULL) { -- printed = true; -- out->begin_list(out, NULL, NULL, "Resource Checks"); -+ if (checks->flags == 0) { -+ return pcmk_rc_no_output; - } - -+ out->begin_list(out, NULL, NULL, "Resource Checks"); -+ - if (pcmk_is_set(checks->flags, rsc_remain_stopped)) { - out->list_item(out, "check", "Configuration specifies '%s' should remain stopped", - parent->id); -@@ -473,17 +472,13 @@ resource_check_list_default(pcmk__output_t *out, va_list args) { - parent->id); - } - -- if (checks->lock_node) { -+ if (pcmk_is_set(checks->flags, rsc_locked)) { - out->list_item(out, "check", "'%s' is locked to node %s due to shutdown", - parent->id, checks->lock_node); - } - -- if (printed) { -- out->end_list(out); -- rc = pcmk_rc_ok; -- } -- -- return rc; -+ out->end_list(out); -+ return pcmk_rc_ok; - } - - PCMK__OUTPUT_ARGS("resource-check-list", "resource_checks_t *") -@@ -509,7 +504,7 @@ resource_check_list_xml(pcmk__output_t *out, va_list args) { - pcmk__xe_set_bool_attr(node, "unmanaged", true); - } - -- if (checks->lock_node) { -+ if (pcmk_is_set(checks->flags, rsc_locked)) { - crm_xml_add(node, "locked-to", checks->lock_node); - } - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 9e7e1fe74..b5bccadaf 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -36,7 +36,8 @@ cli_check_resource(pe_resource_t *rsc, char *role_s, char *managed) - rc->flags |= rsc_unmanaged; - } - -- if (rsc->lock_node) { -+ if (rsc->lock_node != NULL) { -+ rc->flags |= rsc_locked; - rc->lock_node = rsc->lock_node->details->uname; - } - -@@ -914,9 +915,7 @@ cli_resource_check(pcmk__output_t *out, cib_t * cib_conn, pe_resource_t *rsc) - - checks = cli_check_resource(rsc, role_s, managed); - -- if (checks->flags != 0 || checks->lock_node != NULL) { -- rc = out->message(out, "resource-check-list", checks); -- } -+ rc = out->message(out, "resource-check-list", checks); - - free(role_s); - free(managed); --- -2.31.1 - - -From 7f8f94d0a1086e592e39f3a1a812b1a65225c09b Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 5 Apr 2022 16:48:03 -0500 -Subject: [PATCH 02/14] Refactor: tools: functionize individual resource checks - in crm_resource - -... rather than have one check-everything function, to make the code simpler -and more readable. ---- - tools/crm_resource_runtime.c | 101 ++++++++++++++++++++--------------- - 1 file changed, 57 insertions(+), 44 deletions(-) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index b5bccadaf..d47f959f5 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -15,36 +15,6 @@ - #include - #include - --resource_checks_t * --cli_check_resource(pe_resource_t *rsc, char *role_s, char *managed) --{ -- pe_resource_t *parent = uber_parent(rsc); -- resource_checks_t *rc = calloc(1, sizeof(resource_checks_t)); -- -- if (role_s) { -- enum rsc_role_e role = text2role(role_s); -- -- if (role == RSC_ROLE_STOPPED) { -- rc->flags |= rsc_remain_stopped; -- } else if (pcmk_is_set(parent->flags, pe_rsc_promotable) && -- (role == RSC_ROLE_UNPROMOTED)) { -- rc->flags |= rsc_unpromotable; -- } -- } -- -- if (managed && !crm_is_true(managed)) { -- rc->flags |= rsc_unmanaged; -- } -- -- if (rsc->lock_node != NULL) { -- rc->flags |= rsc_locked; -- rc->lock_node = rsc->lock_node->details->uname; -- } -- -- rc->rsc = rsc; -- return rc; --} -- - static GList * - build_node_info_list(pe_resource_t *rsc) - { -@@ -898,29 +868,72 @@ cli_cleanup_all(pcmk_ipc_api_t *controld_api, const char *node_name, - return rc; - } - --int --cli_resource_check(pcmk__output_t *out, cib_t * cib_conn, pe_resource_t *rsc) -+static void -+check_role(pcmk__output_t *out, cib_t *cib_conn, resource_checks_t *checks) - { - char *role_s = NULL; -- char *managed = NULL; -- pe_resource_t *parent = uber_parent(rsc); -- int rc = pcmk_rc_no_output; -- resource_checks_t *checks = NULL; -- -- find_resource_attr(out, cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, -- NULL, NULL, NULL, XML_RSC_ATTR_MANAGED, &managed); -+ pe_resource_t *parent = uber_parent(checks->rsc); - - find_resource_attr(out, cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, - NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &role_s); -+ if (role_s == NULL) { -+ return; -+ } - -- checks = cli_check_resource(rsc, role_s, managed); -+ switch (text2role(role_s)) { -+ case RSC_ROLE_STOPPED: -+ checks->flags |= rsc_remain_stopped; -+ break; - -- rc = out->message(out, "resource-check-list", checks); -+ case RSC_ROLE_UNPROMOTED: -+ if (pcmk_is_set(parent->flags, pe_rsc_promotable)) { -+ checks->flags |= rsc_unpromotable; -+ } -+ break; - -+ default: -+ break; -+ } - free(role_s); -- free(managed); -- free(checks); -- return rc; -+} -+ -+static void -+check_managed(pcmk__output_t *out, cib_t *cib_conn, resource_checks_t *checks) -+{ -+ char *managed_s = NULL; -+ pe_resource_t *parent = uber_parent(checks->rsc); -+ -+ find_resource_attr(out, cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, -+ NULL, NULL, NULL, XML_RSC_ATTR_MANAGED, &managed_s); -+ if (managed_s == NULL) { -+ return; -+ } -+ -+ if (!crm_is_true(managed_s)) { -+ checks->flags |= rsc_unmanaged; -+ } -+ free(managed_s); -+} -+ -+static void -+check_locked(resource_checks_t *checks) -+{ -+ if (checks->rsc->lock_node != NULL) { -+ checks->flags |= rsc_locked; -+ checks->lock_node = checks->rsc->lock_node->details->uname; -+ } -+} -+ -+int -+cli_resource_check(pcmk__output_t *out, cib_t * cib_conn, pe_resource_t *rsc) -+{ -+ resource_checks_t checks = { .rsc = rsc }; -+ -+ check_role(out, cib_conn, &checks); -+ check_managed(out, cib_conn, &checks); -+ check_locked(&checks); -+ -+ return out->message(out, "resource-check-list", &checks); - } - - // \return Standard Pacemaker return code --- -2.31.1 - - -From 32414475281d909cd808f723a41d88a5e0d2b254 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 5 Apr 2022 17:11:07 -0500 -Subject: [PATCH 03/14] Fix: tools: crm_resource target-role check should use - meta-attribute table - -Previously, check_role() searched the CIB for the uber-parent's target-role -attribute. That could give incorrect results if target-role was set on a -different resource in the ancestry chain (e.g. the resource itself for a group -member, or the group for a cloned group), or if there were multiple target-role -settings (e.g. using rules). - -Now, target-role is checked in rsc->meta, which should be fully evaluated for -inheritance and rules. ---- - tools/crm_resource_runtime.c | 15 ++++++--------- - 1 file changed, 6 insertions(+), 9 deletions(-) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index d47f959f5..e9d05cb77 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -869,24 +869,22 @@ cli_cleanup_all(pcmk_ipc_api_t *controld_api, const char *node_name, - } - - static void --check_role(pcmk__output_t *out, cib_t *cib_conn, resource_checks_t *checks) -+check_role(resource_checks_t *checks) - { -- char *role_s = NULL; -- pe_resource_t *parent = uber_parent(checks->rsc); -+ const char *role_s = g_hash_table_lookup(checks->rsc->meta, -+ XML_RSC_ATTR_TARGET_ROLE); - -- find_resource_attr(out, cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, -- NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &role_s); - if (role_s == NULL) { - return; - } -- - switch (text2role(role_s)) { - case RSC_ROLE_STOPPED: - checks->flags |= rsc_remain_stopped; - break; - - case RSC_ROLE_UNPROMOTED: -- if (pcmk_is_set(parent->flags, pe_rsc_promotable)) { -+ if (pcmk_is_set(uber_parent(checks->rsc)->flags, -+ pe_rsc_promotable)) { - checks->flags |= rsc_unpromotable; - } - break; -@@ -894,7 +892,6 @@ check_role(pcmk__output_t *out, cib_t *cib_conn, resource_checks_t *checks) - default: - break; - } -- free(role_s); - } - - static void -@@ -929,7 +926,7 @@ cli_resource_check(pcmk__output_t *out, cib_t * cib_conn, pe_resource_t *rsc) - { - resource_checks_t checks = { .rsc = rsc }; - -- check_role(out, cib_conn, &checks); -+ check_role(&checks); - check_managed(out, cib_conn, &checks); - check_locked(&checks); - --- -2.31.1 - - -From 0fd133680f7b2c25a946cf3fb25f4ee9ffeeaf93 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 5 Apr 2022 17:15:43 -0500 -Subject: [PATCH 04/14] Fix: tools: crm_resource is-managed check should use - meta-attribute table - -Previously, check_managed() searched the CIB for the uber-parent's is-managed -attribute. That could give incorrect results if is-managed was set on a -different resource in the ancestry chain (e.g. the resource itself for a group -member, or the group for a cloned group), or if there were multiple is-managed -settings (e.g. using rules). - -Now, is-managed is checked in rsc->meta, which should be fully evaluated for -inheritance and rules. ---- - tools/crm_resource_runtime.c | 17 +++++------------ - 1 file changed, 5 insertions(+), 12 deletions(-) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index e9d05cb77..4f62b4b2e 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -895,21 +895,14 @@ check_role(resource_checks_t *checks) - } - - static void --check_managed(pcmk__output_t *out, cib_t *cib_conn, resource_checks_t *checks) -+check_managed(resource_checks_t *checks) - { -- char *managed_s = NULL; -- pe_resource_t *parent = uber_parent(checks->rsc); -+ const char *managed_s = g_hash_table_lookup(checks->rsc->meta, -+ XML_RSC_ATTR_MANAGED); - -- find_resource_attr(out, cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, -- NULL, NULL, NULL, XML_RSC_ATTR_MANAGED, &managed_s); -- if (managed_s == NULL) { -- return; -- } -- -- if (!crm_is_true(managed_s)) { -+ if ((managed_s != NULL) && !crm_is_true(managed_s)) { - checks->flags |= rsc_unmanaged; - } -- free(managed_s); - } - - static void -@@ -927,7 +920,7 @@ cli_resource_check(pcmk__output_t *out, cib_t * cib_conn, pe_resource_t *rsc) - resource_checks_t checks = { .rsc = rsc }; - - check_role(&checks); -- check_managed(out, cib_conn, &checks); -+ check_managed(&checks); - check_locked(&checks); - - return out->message(out, "resource-check-list", &checks); --- -2.31.1 - - -From e9523c1b238492c8cf8b453ba6710f13bf81cd28 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 5 Apr 2022 17:18:44 -0500 -Subject: [PATCH 05/14] Refactor: tools: drop unused argument from - cli_resource_check() - ---- - tools/crm_resource.c | 4 ++-- - tools/crm_resource.h | 2 +- - tools/crm_resource_print.c | 24 ++++++++++++------------ - tools/crm_resource_runtime.c | 2 +- - 4 files changed, 16 insertions(+), 16 deletions(-) - -diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index 883563df9..bf5326b40 100644 ---- a/tools/crm_resource.c -+++ b/tools/crm_resource.c -@@ -1019,7 +1019,7 @@ cleanup(pcmk__output_t *out, pe_resource_t *rsc) - - if ((rc == pcmk_rc_ok) && !out->is_quiet(out)) { - // Show any reasons why resource might stay stopped -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - } - - if (rc == pcmk_rc_ok) { -@@ -1326,7 +1326,7 @@ refresh_resource(pcmk__output_t *out, pe_resource_t *rsc) - - if ((rc == pcmk_rc_ok) && !out->is_quiet(out)) { - // Show any reasons why resource might stay stopped -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - } - - if (rc == pcmk_rc_ok) { -diff --git a/tools/crm_resource.h b/tools/crm_resource.h -index b5fdd1bb5..bcff2b5f6 100644 ---- a/tools/crm_resource.h -+++ b/tools/crm_resource.h -@@ -68,7 +68,7 @@ int cli_resource_print_operations(const char *rsc_id, const char *host_uname, - bool active, pe_working_set_t * data_set); - - /* runtime */ --int cli_resource_check(pcmk__output_t *out, cib_t * cib, pe_resource_t *rsc); -+int cli_resource_check(pcmk__output_t *out, pe_resource_t *rsc); - int cli_resource_fail(pcmk_ipc_api_t *controld_api, const char *host_uname, - const char *rsc_id, pe_working_set_t *data_set); - GList *cli_resource_search(pe_resource_t *rsc, const char *requested_name, -diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c -index f63fc952d..f025cbddd 100644 ---- a/tools/crm_resource_print.c -+++ b/tools/crm_resource_print.c -@@ -587,7 +587,7 @@ PCMK__OUTPUT_ARGS("resource-reasons-list", "cib_t *", "GList *", "pe_resource_t - static int - resource_reasons_list_default(pcmk__output_t *out, va_list args) - { -- cib_t *cib_conn = va_arg(args, cib_t *); -+ cib_t *cib_conn G_GNUC_UNUSED = va_arg(args, cib_t *); - GList *resources = va_arg(args, GList *); - pe_resource_t *rsc = va_arg(args, pe_resource_t *); - pe_node_t *node = va_arg(args, pe_node_t *); -@@ -610,7 +610,7 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - out->list_item(out, "reason", "Resource %s is running", rsc->id); - } - -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - g_list_free(hosts); - hosts = NULL; - } -@@ -624,7 +624,7 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - rsc->id, host_uname); - } - -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - - } else if ((rsc == NULL) && (host_uname != NULL)) { - const char* host_uname = node->details->uname; -@@ -637,14 +637,14 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - pe_resource_t *rsc = (pe_resource_t *) lpc->data; - out->list_item(out, "reason", "Resource %s is running on host %s", - rsc->id, host_uname); -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - } - - for(lpc = unactiveResources; lpc != NULL; lpc = lpc->next) { - pe_resource_t *rsc = (pe_resource_t *) lpc->data; - out->list_item(out, "reason", "Resource %s is assigned to host %s but not running", - rsc->id, host_uname); -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - } - - g_list_free(allResources); -@@ -657,7 +657,7 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - rsc->fns->location(rsc, &hosts, TRUE); - out->list_item(out, "reason", "Resource %s is %srunning", - rsc->id, (hosts? "" : "not ")); -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - g_list_free(hosts); - } - -@@ -670,7 +670,7 @@ PCMK__OUTPUT_ARGS("resource-reasons-list", "cib_t *", "GList *", "pe_resource_t - static int - resource_reasons_list_xml(pcmk__output_t *out, va_list args) - { -- cib_t *cib_conn = va_arg(args, cib_t *); -+ cib_t *cib_conn G_GNUC_UNUSED = va_arg(args, cib_t *); - GList *resources = va_arg(args, GList *); - pe_resource_t *rsc = va_arg(args, pe_resource_t *); - pe_node_t *node = va_arg(args, pe_node_t *); -@@ -695,7 +695,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - "running", pcmk__btoa(hosts != NULL), - NULL); - -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - pcmk__output_xml_pop_parent(out); - g_list_free(hosts); - hosts = NULL; -@@ -708,7 +708,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - crm_xml_add(xml_node, "running_on", host_uname); - } - -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - - } else if ((rsc == NULL) && (host_uname != NULL)) { - const char* host_uname = node->details->uname; -@@ -728,7 +728,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - "host", host_uname, - NULL); - -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - pcmk__output_xml_pop_parent(out); - } - -@@ -741,7 +741,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - "host", host_uname, - NULL); - -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - pcmk__output_xml_pop_parent(out); - } - -@@ -755,7 +755,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - - rsc->fns->location(rsc, &hosts, TRUE); - crm_xml_add(xml_node, "running", pcmk__btoa(hosts != NULL)); -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - g_list_free(hosts); - } - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 4f62b4b2e..47653a060 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -915,7 +915,7 @@ check_locked(resource_checks_t *checks) - } - - int --cli_resource_check(pcmk__output_t *out, cib_t * cib_conn, pe_resource_t *rsc) -+cli_resource_check(pcmk__output_t *out, pe_resource_t *rsc) - { - resource_checks_t checks = { .rsc = rsc }; - --- -2.31.1 - - -From b1a1a07f3e44bc74575eab325277ea8c1f3391b2 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 5 Apr 2022 17:20:06 -0500 -Subject: [PATCH 06/14] Refactor: tools: drop unused argument from - resource-reasons-list message - ---- - tools/crm_resource.c | 2 +- - tools/crm_resource_print.c | 6 ++---- - 2 files changed, 3 insertions(+), 5 deletions(-) - -diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index bf5326b40..7f656a20d 100644 ---- a/tools/crm_resource.c -+++ b/tools/crm_resource.c -@@ -1941,7 +1941,7 @@ main(int argc, char **argv) - if ((options.host_uname != NULL) && (node == NULL)) { - rc = pcmk_rc_node_unknown; - } else { -- rc = out->message(out, "resource-reasons-list", cib_conn, -+ rc = out->message(out, "resource-reasons-list", - data_set->resources, rsc, node); - } - break; -diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c -index f025cbddd..580f9c71a 100644 ---- a/tools/crm_resource_print.c -+++ b/tools/crm_resource_print.c -@@ -582,12 +582,11 @@ resource_search_list_xml(pcmk__output_t *out, va_list args) - return pcmk_rc_ok; - } - --PCMK__OUTPUT_ARGS("resource-reasons-list", "cib_t *", "GList *", "pe_resource_t *", -+PCMK__OUTPUT_ARGS("resource-reasons-list", "GList *", "pe_resource_t *", - "pe_node_t *") - static int - resource_reasons_list_default(pcmk__output_t *out, va_list args) - { -- cib_t *cib_conn G_GNUC_UNUSED = va_arg(args, cib_t *); - GList *resources = va_arg(args, GList *); - pe_resource_t *rsc = va_arg(args, pe_resource_t *); - pe_node_t *node = va_arg(args, pe_node_t *); -@@ -665,12 +664,11 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - return pcmk_rc_ok; - } - --PCMK__OUTPUT_ARGS("resource-reasons-list", "cib_t *", "GList *", "pe_resource_t *", -+PCMK__OUTPUT_ARGS("resource-reasons-list", "GList *", "pe_resource_t *", - "pe_node_t *") - static int - resource_reasons_list_xml(pcmk__output_t *out, va_list args) - { -- cib_t *cib_conn G_GNUC_UNUSED = va_arg(args, cib_t *); - GList *resources = va_arg(args, GList *); - pe_resource_t *rsc = va_arg(args, pe_resource_t *); - pe_node_t *node = va_arg(args, pe_node_t *); --- -2.31.1 - - -From 973eb2694b334b4e9e6967f6c7ceaebec10693db Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 23 Jun 2022 10:08:37 -0500 -Subject: [PATCH 07/14] Refactor: tools: pass node to cli_resource_check() - -The node is not used yet ---- - tools/crm_resource.c | 12 ++++++------ - tools/crm_resource.h | 3 ++- - tools/crm_resource_print.c | 20 ++++++++++---------- - tools/crm_resource_runtime.c | 2 +- - 4 files changed, 19 insertions(+), 18 deletions(-) - -diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index 7f656a20d..756a06268 100644 ---- a/tools/crm_resource.c -+++ b/tools/crm_resource.c -@@ -1004,7 +1004,7 @@ ban_or_move(pcmk__output_t *out, pe_resource_t *rsc, const char *move_lifetime) - } - - static void --cleanup(pcmk__output_t *out, pe_resource_t *rsc) -+cleanup(pcmk__output_t *out, pe_resource_t *rsc, pe_node_t *node) - { - int rc = pcmk_rc_ok; - -@@ -1019,7 +1019,7 @@ cleanup(pcmk__output_t *out, pe_resource_t *rsc) - - if ((rc == pcmk_rc_ok) && !out->is_quiet(out)) { - // Show any reasons why resource might stay stopped -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, node); - } - - if (rc == pcmk_rc_ok) { -@@ -1311,7 +1311,7 @@ refresh(pcmk__output_t *out) - } - - static void --refresh_resource(pcmk__output_t *out, pe_resource_t *rsc) -+refresh_resource(pcmk__output_t *out, pe_resource_t *rsc, pe_node_t *node) - { - int rc = pcmk_rc_ok; - -@@ -1326,7 +1326,7 @@ refresh_resource(pcmk__output_t *out, pe_resource_t *rsc) - - if ((rc == pcmk_rc_ok) && !out->is_quiet(out)) { - // Show any reasons why resource might stay stopped -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, node); - } - - if (rc == pcmk_rc_ok) { -@@ -2075,7 +2075,7 @@ main(int argc, char **argv) - start_mainloop(controld_api); - } - } else { -- cleanup(out, rsc); -+ cleanup(out, rsc, node); - } - break; - -@@ -2083,7 +2083,7 @@ main(int argc, char **argv) - if (rsc == NULL) { - rc = refresh(out); - } else { -- refresh_resource(out, rsc); -+ refresh_resource(out, rsc, node); - } - break; - -diff --git a/tools/crm_resource.h b/tools/crm_resource.h -index bcff2b5f6..f7e44476d 100644 ---- a/tools/crm_resource.h -+++ b/tools/crm_resource.h -@@ -68,7 +68,8 @@ int cli_resource_print_operations(const char *rsc_id, const char *host_uname, - bool active, pe_working_set_t * data_set); - - /* runtime */ --int cli_resource_check(pcmk__output_t *out, pe_resource_t *rsc); -+int cli_resource_check(pcmk__output_t *out, pe_resource_t *rsc, -+ pe_node_t *node); - int cli_resource_fail(pcmk_ipc_api_t *controld_api, const char *host_uname, - const char *rsc_id, pe_working_set_t *data_set); - GList *cli_resource_search(pe_resource_t *rsc, const char *requested_name, -diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c -index 580f9c71a..087819601 100644 ---- a/tools/crm_resource_print.c -+++ b/tools/crm_resource_print.c -@@ -609,7 +609,7 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - out->list_item(out, "reason", "Resource %s is running", rsc->id); - } - -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, NULL); - g_list_free(hosts); - hosts = NULL; - } -@@ -623,7 +623,7 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - rsc->id, host_uname); - } - -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, node); - - } else if ((rsc == NULL) && (host_uname != NULL)) { - const char* host_uname = node->details->uname; -@@ -636,14 +636,14 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - pe_resource_t *rsc = (pe_resource_t *) lpc->data; - out->list_item(out, "reason", "Resource %s is running on host %s", - rsc->id, host_uname); -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, node); - } - - for(lpc = unactiveResources; lpc != NULL; lpc = lpc->next) { - pe_resource_t *rsc = (pe_resource_t *) lpc->data; - out->list_item(out, "reason", "Resource %s is assigned to host %s but not running", - rsc->id, host_uname); -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, node); - } - - g_list_free(allResources); -@@ -656,7 +656,7 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - rsc->fns->location(rsc, &hosts, TRUE); - out->list_item(out, "reason", "Resource %s is %srunning", - rsc->id, (hosts? "" : "not ")); -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, NULL); - g_list_free(hosts); - } - -@@ -693,7 +693,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - "running", pcmk__btoa(hosts != NULL), - NULL); - -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, NULL); - pcmk__output_xml_pop_parent(out); - g_list_free(hosts); - hosts = NULL; -@@ -706,7 +706,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - crm_xml_add(xml_node, "running_on", host_uname); - } - -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, node); - - } else if ((rsc == NULL) && (host_uname != NULL)) { - const char* host_uname = node->details->uname; -@@ -726,7 +726,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - "host", host_uname, - NULL); - -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, node); - pcmk__output_xml_pop_parent(out); - } - -@@ -739,7 +739,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - "host", host_uname, - NULL); - -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, node); - pcmk__output_xml_pop_parent(out); - } - -@@ -753,7 +753,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - - rsc->fns->location(rsc, &hosts, TRUE); - crm_xml_add(xml_node, "running", pcmk__btoa(hosts != NULL)); -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, NULL); - g_list_free(hosts); - } - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 47653a060..68e899c45 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -915,7 +915,7 @@ check_locked(resource_checks_t *checks) - } - - int --cli_resource_check(pcmk__output_t *out, pe_resource_t *rsc) -+cli_resource_check(pcmk__output_t *out, pe_resource_t *rsc, pe_node_t *node) - { - resource_checks_t checks = { .rsc = rsc }; - --- -2.31.1 - - -From c3bfde0536f2eb51c81bf34fa957c38dc88f9cc3 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 23 Jun 2022 09:49:03 -0500 -Subject: [PATCH 08/14] Feature: tools: crm_resource --why now checks node - health status - -Closes T65 ---- - tools/crm_resource.h | 1 + - tools/crm_resource_print.c | 13 +++++++++ - tools/crm_resource_runtime.c | 56 ++++++++++++++++++++++++++++++++++++ - 3 files changed, 70 insertions(+) - -diff --git a/tools/crm_resource.h b/tools/crm_resource.h -index f7e44476d..ae4b02a98 100644 ---- a/tools/crm_resource.h -+++ b/tools/crm_resource.h -@@ -37,6 +37,7 @@ enum resource_check_flags { - rsc_unpromotable = (1 << 1), - rsc_unmanaged = (1 << 2), - rsc_locked = (1 << 3), -+ rsc_node_health = (1 << 4), - }; - - typedef struct resource_checks_s { -diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c -index 087819601..27fd76aaf 100644 ---- a/tools/crm_resource_print.c -+++ b/tools/crm_resource_print.c -@@ -477,6 +477,15 @@ resource_check_list_default(pcmk__output_t *out, va_list args) { - parent->id, checks->lock_node); - } - -+ if (pcmk_is_set(checks->flags, rsc_node_health)) { -+ out->list_item(out, "check", -+ "'%s' cannot run on unhealthy nodes due to " -+ PCMK__OPT_NODE_HEALTH_STRATEGY "='%s'", -+ parent->id, -+ pe_pref(checks->rsc->cluster->config_hash, -+ PCMK__OPT_NODE_HEALTH_STRATEGY)); -+ } -+ - out->end_list(out); - return pcmk_rc_ok; - } -@@ -508,6 +517,10 @@ resource_check_list_xml(pcmk__output_t *out, va_list args) { - crm_xml_add(node, "locked-to", checks->lock_node); - } - -+ if (pcmk_is_set(checks->flags, rsc_node_health)) { -+ pcmk__xe_set_bool_attr(node, "unhealthy", true); -+ } -+ - return pcmk_rc_ok; - } - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 68e899c45..2aa3efe38 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -914,6 +914,61 @@ check_locked(resource_checks_t *checks) - } - } - -+static bool -+node_is_unhealthy(pe_node_t *node) -+{ -+ switch (pe__health_strategy(node->details->data_set)) { -+ case pcmk__health_strategy_none: -+ break; -+ -+ case pcmk__health_strategy_no_red: -+ if (pe__node_health(node) < 0) { -+ return true; -+ } -+ break; -+ -+ case pcmk__health_strategy_only_green: -+ if (pe__node_health(node) <= 0) { -+ return true; -+ } -+ break; -+ -+ case pcmk__health_strategy_progressive: -+ case pcmk__health_strategy_custom: -+ /* @TODO These are finite scores, possibly with rules, and possibly -+ * combining with other scores, so attributing these as a cause is -+ * nontrivial. -+ */ -+ break; -+ } -+ return false; -+} -+ -+static void -+check_node_health(resource_checks_t *checks, pe_node_t *node) -+{ -+ if (node == NULL) { -+ GHashTableIter iter; -+ bool allowed = false; -+ bool all_nodes_unhealthy = true; -+ -+ g_hash_table_iter_init(&iter, checks->rsc->allowed_nodes); -+ while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { -+ allowed = true; -+ if (!node_is_unhealthy(node)) { -+ all_nodes_unhealthy = false; -+ break; -+ } -+ } -+ if (allowed && all_nodes_unhealthy) { -+ checks->flags |= rsc_node_health; -+ } -+ -+ } else if (node_is_unhealthy(node)) { -+ checks->flags |= rsc_node_health; -+ } -+} -+ - int - cli_resource_check(pcmk__output_t *out, pe_resource_t *rsc, pe_node_t *node) - { -@@ -922,6 +977,7 @@ cli_resource_check(pcmk__output_t *out, pe_resource_t *rsc, pe_node_t *node) - check_role(&checks); - check_managed(&checks); - check_locked(&checks); -+ check_node_health(&checks, node); - - return out->message(out, "resource-check-list", &checks); - } --- -2.31.1 - - -From 48730fd51a22e109514764a039e5c89fd204ad4c Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 23 Jun 2022 10:41:48 -0500 -Subject: [PATCH 09/14] Low: schemas: copy crm_resource API schema in - preparation for changes - ---- - include/crm/common/output_internal.h | 2 +- - xml/api/crm_resource-2.22.rng | 303 +++++++++++++++++++++++++++ - 2 files changed, 304 insertions(+), 1 deletion(-) - create mode 100644 xml/api/crm_resource-2.22.rng - -diff --git a/include/crm/common/output_internal.h b/include/crm/common/output_internal.h -index ca16227fe..bdcae8ad6 100644 ---- a/include/crm/common/output_internal.h -+++ b/include/crm/common/output_internal.h -@@ -28,7 +28,7 @@ extern "C" { - */ - - --# define PCMK__API_VERSION "2.21" -+# define PCMK__API_VERSION "2.22" - - #if defined(PCMK__WITH_ATTRIBUTE_OUTPUT_ARGS) - # define PCMK__OUTPUT_ARGS(ARGS...) __attribute__((output_args(ARGS))) -diff --git a/xml/api/crm_resource-2.22.rng b/xml/api/crm_resource-2.22.rng -new file mode 100644 -index 000000000..cd74da0d8 ---- /dev/null -+++ b/xml/api/crm_resource-2.22.rng -@@ -0,0 +1,303 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ promoted -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ ocf -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ true -+ false -+ -+ -+ -+ true -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ Stopped -+ Started -+ Promoted -+ Unpromoted -+ -+ -+ Master -+ Slave -+ -+ -+ --- -2.31.1 - - -From 75a885d9da92c84038e3abf732c11cf3fb6a79a7 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 23 Jun 2022 11:33:50 -0500 -Subject: [PATCH 10/14] Fix: tools: correct crm_resource --why schema to match - actual output - -If both a resource and node name are specified, "running_on" is optional ---- - xml/api/crm_resource-2.22.rng | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/xml/api/crm_resource-2.22.rng b/xml/api/crm_resource-2.22.rng -index cd74da0d8..e89d850da 100644 ---- a/xml/api/crm_resource-2.22.rng -+++ b/xml/api/crm_resource-2.22.rng -@@ -126,7 +126,9 @@ - - - -- -+ -+ -+ - - - --- -2.31.1 - - -From 5e4f993859dd68a3f88cb0648ace7b3837316288 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 23 Jun 2022 11:20:03 -0500 -Subject: [PATCH 11/14] Low: schemas: simplify crm_resource --why schema - ---- - xml/api/crm_resource-2.22.rng | 64 ++++++++++++----------------------- - 1 file changed, 22 insertions(+), 42 deletions(-) - -diff --git a/xml/api/crm_resource-2.22.rng b/xml/api/crm_resource-2.22.rng -index e89d850da..2d2ba839f 100644 ---- a/xml/api/crm_resource-2.22.rng -+++ b/xml/api/crm_resource-2.22.rng -@@ -102,56 +102,36 @@ - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - -+ - - - -- -- -- - -- -- -- -- -- -- -- -- -- -- -- -- -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - -- -- -- -- -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - --- -2.31.1 - - -From 79bdbbde27ad340c2054089aaecf5e0b49296e59 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 23 Jun 2022 11:28:11 -0500 -Subject: [PATCH 12/14] Test: cts-cli: use validated XML output for - crm_resource --why test - ---- - cts/cli/regression.tools.exp | 8 ++++++-- - cts/cts-cli.in | 4 ++-- - 2 files changed, 8 insertions(+), 4 deletions(-) - -diff --git a/cts/cli/regression.tools.exp b/cts/cli/regression.tools.exp -index 0d1cfa2ab..4237a3ec5 100644 ---- a/cts/cli/regression.tools.exp -+++ b/cts/cli/regression.tools.exp -@@ -888,8 +888,12 @@ Deleted 'dummy' option: id=dummy-meta_attributes-is-managed name=is-managed - =#=#=#= End test: Create another resource meta attribute - OK (0) =#=#=#= - * Passed: crm_resource - Create another resource meta attribute - =#=#=#= Begin test: Show why a resource is not running =#=#=#= --Resource dummy is not running --Configuration specifies 'dummy' should remain stopped -+ -+ -+ -+ -+ -+ - =#=#=#= End test: Show why a resource is not running - OK (0) =#=#=#= - * Passed: crm_resource - Show why a resource is not running - =#=#=#= Begin test: Remove another resource meta attribute =#=#=#= -diff --git a/cts/cts-cli.in b/cts/cts-cli.in -index 8565c485a..289ac966f 100755 ---- a/cts/cts-cli.in -+++ b/cts/cts-cli.in -@@ -657,8 +657,8 @@ function test_tools() { - test_assert_validate $CRM_EX_OK 0 - - desc="Show why a resource is not running" -- cmd="crm_resource -Y -r dummy" -- test_assert $CRM_EX_OK 0 -+ cmd="crm_resource -Y -r dummy --output-as=xml" -+ test_assert_validate $CRM_EX_OK 0 - - desc="Remove another resource meta attribute" - cmd="crm_resource -r dummy --meta -d target-role --output-as=xml" --- -2.31.1 - - -From 929d1b40e82f186e7e31e380db2620e7e23968f1 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 23 Jun 2022 10:43:22 -0500 -Subject: [PATCH 13/14] Low: schemas: update crm_resource --why schema for new - health check - ---- - xml/api/crm_resource-2.22.rng | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/xml/api/crm_resource-2.22.rng b/xml/api/crm_resource-2.22.rng -index 2d2ba839f..8a4667559 100644 ---- a/xml/api/crm_resource-2.22.rng -+++ b/xml/api/crm_resource-2.22.rng -@@ -157,6 +157,9 @@ - - - -+ -+ true -+ - - - --- -2.31.1 - - -From 6630e55abc7b26be294ab6d42f12cdb7e2c69b55 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 23 Jun 2022 11:07:20 -0500 -Subject: [PATCH 14/14] Test: cts-cli: add tests for checking resource status - on unhealthy node - ---- - cts/cli/regression.tools.exp | 112 ++++++++++++++++++++++++++++++++++- - cts/cts-cli.in | 12 ++++ - 2 files changed, 122 insertions(+), 2 deletions(-) - -diff --git a/cts/cli/regression.tools.exp b/cts/cli/regression.tools.exp -index 4237a3ec5..89ae4e97d 100644 ---- a/cts/cli/regression.tools.exp -+++ b/cts/cli/regression.tools.exp -@@ -3406,13 +3406,14 @@ Removing constraint: cli-prefer-dummy - - =#=#=#= End test: Clear all implicit constraints for dummy - OK (0) =#=#=#= - * Passed: crm_resource - Clear all implicit constraints for dummy --=#=#=#= Begin test: Delete a resource =#=#=#= --=#=#=#= Current cib after: Delete a resource =#=#=#= -+=#=#=#= Begin test: Set a node health strategy =#=#=#= -+=#=#=#= Current cib after: Set a node health strategy =#=#=#= - - - - - -+ - - - -@@ -3427,6 +3428,113 @@ Removing constraint: cli-prefer-dummy - - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Set a node health strategy - OK (0) =#=#=#= -+* Passed: crm_attribute - Set a node health strategy -+=#=#=#= Begin test: Set a node health attribute =#=#=#= -+=#=#=#= Current cib after: Set a node health attribute =#=#=#= -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Set a node health attribute - OK (0) =#=#=#= -+* Passed: crm_attribute - Set a node health attribute -+=#=#=#= Begin test: Show why a resource is not running on an unhealthy node =#=#=#= -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Show why a resource is not running on an unhealthy node - OK (0) =#=#=#= -+* Passed: crm_resource - Show why a resource is not running on an unhealthy node -+=#=#=#= Begin test: Delete a resource =#=#=#= -+=#=#=#= Current cib after: Delete a resource =#=#=#= -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - -diff --git a/cts/cts-cli.in b/cts/cts-cli.in -index 289ac966f..990d37cf7 100755 ---- a/cts/cts-cli.in -+++ b/cts/cts-cli.in -@@ -883,6 +883,18 @@ function test_tools() { - cmd="crm_resource -r dummy -U" - test_assert $CRM_EX_OK - -+ desc="Set a node health strategy" -+ cmd="crm_attribute -n node-health-strategy -v migrate-on-red" -+ test_assert $CRM_EX_OK -+ -+ desc="Set a node health attribute" -+ cmd="crm_attribute -N node3 -n '#health-cts-cli' -v red" -+ test_assert $CRM_EX_OK -+ -+ desc="Show why a resource is not running on an unhealthy node" -+ cmd="crm_resource -N node3 -Y -r dummy --output-as=xml" -+ test_assert_validate $CRM_EX_OK 0 -+ - desc="Delete a resource" - cmd="crm_resource -D -r dummy -t primitive" - test_assert $CRM_EX_OK --- -2.31.1 - diff --git a/SOURCES/007-stonith_admin.patch b/SOURCES/007-stonith_admin.patch deleted file mode 100644 index bddba16..0000000 --- a/SOURCES/007-stonith_admin.patch +++ /dev/null @@ -1,108 +0,0 @@ -From d6294dd28b6d95ad3844824996717f9959d97ac6 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Thu, 30 Jun 2022 11:07:32 -0700 -Subject: [PATCH 1/2] Fix: Use correct boolean in stonith__validate_agent_xml - -This fixes a regression introduced by 91a2b2e that flips the boolean -values for "valid" in the XML output. - -Resolves: RHBZ#2102292 (partial) - -Signed-off-by: Reid Wahl ---- - lib/fencing/st_output.c | 7 +++---- - 1 file changed, 3 insertions(+), 4 deletions(-) - -diff --git a/lib/fencing/st_output.c b/lib/fencing/st_output.c -index e0ff848c2..eb10ad0c5 100644 ---- a/lib/fencing/st_output.c -+++ b/lib/fencing/st_output.c -@@ -528,10 +528,9 @@ validate_agent_xml(pcmk__output_t *out, va_list args) { - char *error_output = va_arg(args, char *); - int rc = va_arg(args, int); - -- xmlNodePtr node = pcmk__output_create_xml_node(out, "validate", -- "agent", agent, -- "valid", pcmk__btoa(rc), -- NULL); -+ xmlNodePtr node = pcmk__output_create_xml_node( -+ out, "validate", "agent", agent, "valid", pcmk__btoa(rc == pcmk_ok), -+ NULL); - - if (device != NULL) { - crm_xml_add(node, "device", device); --- -2.31.1 - - -From 81e83683e69b4f147f40f5353f8e68032758a104 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Wed, 29 Jun 2022 18:15:33 -0700 -Subject: [PATCH 2/2] Fix: Use failed action result in rhcs_validate and - _get_metadata - -If an action failed but has a non-NULL result, get the rc and other -attributes from that result. - -This fixes a regression introduced by b441925, in which failure XML -output now contains a CRM_EX_CONNECTED rc instead of the correct one and -does not contain stdout/stderr. That commit caused -services__execute_file() to return a proper rc instead of TRUE. A -non-pcmk_ok bubbled up the call chain causing -internal_stonith_action_execute() to return -ECONNABORTED. Then -rhcs_validate() and _get_metadata() would use this rc instead of the one -attached to the result. - -Resolves: RHBZ#2102292 - -Signed-off-by: Reid Wahl ---- - lib/fencing/st_rhcs.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c -index 39485013e..029c97eea 100644 ---- a/lib/fencing/st_rhcs.c -+++ b/lib/fencing/st_rhcs.c -@@ -130,16 +130,15 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata) - stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, - 5, NULL, NULL, NULL); - int rc = stonith__execute(action); -+ result = stonith__action_result(action); - -- if (rc < 0) { -+ if (rc < 0 && result == NULL) { - crm_warn("Could not execute metadata action for %s: %s " - CRM_XS " rc=%d", agent, pcmk_strerror(rc), rc); - stonith__destroy_action(action); - return rc; - } - -- result = stonith__action_result(action); -- - if (result->execution_status != PCMK_EXEC_DONE) { - crm_warn("Could not execute metadata action for %s: %s", - agent, pcmk_exec_status_str(result->execution_status)); -@@ -262,6 +261,7 @@ stonith__rhcs_validate(stonith_t *st, int call_options, const char *target, - int remaining_timeout = timeout; - xmlNode *metadata = NULL; - stonith_action_t *action = NULL; -+ pcmk__action_result_t *result = NULL; - - if (host_arg == NULL) { - time_t start_time = time(NULL); -@@ -298,9 +298,9 @@ stonith__rhcs_validate(stonith_t *st, int call_options, const char *target, - NULL, host_arg); - - rc = stonith__execute(action); -- if (rc == pcmk_ok) { -- pcmk__action_result_t *result = stonith__action_result(action); -+ result = stonith__action_result(action); - -+ if (result != NULL) { - rc = pcmk_rc2legacy(stonith__result2rc(result)); - - // Take ownership of output so stonith__destroy_action() doesn't free it --- -2.31.1 - diff --git a/SOURCES/008-metadata.patch b/SOURCES/008-metadata.patch deleted file mode 100644 index 5dc9e27..0000000 --- a/SOURCES/008-metadata.patch +++ /dev/null @@ -1,34 +0,0 @@ -From e4d9c795dfe2d6737c777a265292864da98dae8f Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Thu, 30 Jun 2022 14:40:31 -0700 -Subject: [PATCH] Low: Always null-check result in stonith__rhcs_get_metadata - -Null-check result even if rc == 0. - -Signed-off-by: Reid Wahl ---- - lib/fencing/st_rhcs.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c -index 029c97eea..dfccff2cb 100644 ---- a/lib/fencing/st_rhcs.c -+++ b/lib/fencing/st_rhcs.c -@@ -132,9 +132,11 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata) - int rc = stonith__execute(action); - result = stonith__action_result(action); - -- if (rc < 0 && result == NULL) { -- crm_warn("Could not execute metadata action for %s: %s " -- CRM_XS " rc=%d", agent, pcmk_strerror(rc), rc); -+ if (result == NULL) { -+ if (rc < 0) { -+ crm_warn("Could not execute metadata action for %s: %s " -+ CRM_XS " rc=%d", agent, pcmk_strerror(rc), rc); -+ } - stonith__destroy_action(action); - return rc; - } --- -2.31.1 - diff --git a/SOURCES/009-validate.patch b/SOURCES/009-validate.patch deleted file mode 100644 index a5d01f5..0000000 --- a/SOURCES/009-validate.patch +++ /dev/null @@ -1,94 +0,0 @@ -From d00a6abde7e6a41f8bc6085c875cb8072aff499b Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 30 Jun 2022 09:25:05 -0400 -Subject: [PATCH 1/2] Fix: libstonithd: Add the "Agent not found..." message to - formatted output. - ---- - lib/fencing/st_client.c | 11 ++++++++--- - 1 file changed, 8 insertions(+), 3 deletions(-) - -diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c -index 137642af7..971bbe9a5 100644 ---- a/lib/fencing/st_client.c -+++ b/lib/fencing/st_client.c -@@ -1763,9 +1763,14 @@ stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id, - default: - rc = -EINVAL; - errno = EINVAL; -- crm_perror(LOG_ERR, -- "Agent %s not found or does not support validation", -- agent); -+ -+ if (error_output) { -+ *error_output = crm_strdup_printf("Agent %s not found or does not support validation", -+ agent); -+ } else { -+ crm_err("Agent %s not found or does not support validation", agent); -+ } -+ - break; - } - g_hash_table_destroy(params_table); --- -2.31.1 - - -From f3a5fc961c30556b975011773e4cebf323bec38e Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 1 Jul 2022 10:38:45 -0400 -Subject: [PATCH 2/2] Refactor: libstonithd: Split apart error conditions when - validating. - -The "not found" and "can't validate" cases were previously jumbled -together. Now, return ENOENT if the agent is not found and EOPNOTSUPP -if it can't validate. The only caller appears to be handling both cases -correctly already, so no changes are needed there. ---- - lib/fencing/st_client.c | 21 +++++++++++++++++---- - 1 file changed, 17 insertions(+), 4 deletions(-) - -diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c -index 971bbe9a5..192334812 100644 ---- a/lib/fencing/st_client.c -+++ b/lib/fencing/st_client.c -@@ -1760,19 +1760,32 @@ stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id, - break; - #endif - -+ case st_namespace_invalid: -+ errno = ENOENT; -+ rc = -errno; -+ -+ if (error_output) { -+ *error_output = crm_strdup_printf("Agent %s not found", agent); -+ } else { -+ crm_err("Agent %s not found", agent); -+ } -+ -+ break; -+ - default: -- rc = -EINVAL; -- errno = EINVAL; -+ errno = EOPNOTSUPP; -+ rc = -errno; - - if (error_output) { -- *error_output = crm_strdup_printf("Agent %s not found or does not support validation", -+ *error_output = crm_strdup_printf("Agent %s does not support validation", - agent); - } else { -- crm_err("Agent %s not found or does not support validation", agent); -+ crm_err("Agent %s does not support validation", agent); - } - - break; - } -+ - g_hash_table_destroy(params_table); - return rc; - } --- -2.31.1 - diff --git a/SOURCES/010-regression.patch b/SOURCES/010-regression.patch deleted file mode 100644 index e40ff0e..0000000 --- a/SOURCES/010-regression.patch +++ /dev/null @@ -1,47 +0,0 @@ -From e5f80059c7f1c0ad3264dc2a2a61e64cded0fe0f Mon Sep 17 00:00:00 2001 -From: Hideo Yamauchi -Date: Tue, 12 Jul 2022 14:45:55 +0900 -Subject: [PATCH] High: scheduler: Resolves an issue where STONITH devices - cannot be registered. - ---- - lib/pacemaker/pcmk_sched_allocate.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c -index 85df6ace8..a7fe9c8d6 100644 ---- a/lib/pacemaker/pcmk_sched_allocate.c -+++ b/lib/pacemaker/pcmk_sched_allocate.c -@@ -724,12 +724,18 @@ log_unrunnable_actions(pe_working_set_t *data_set) - static void - unpack_cib(xmlNode *cib, unsigned long long flags, pe_working_set_t *data_set) - { -+ const char* localhost_save = NULL; -+ - if (pcmk_is_set(data_set->flags, pe_flag_have_status)) { - crm_trace("Reusing previously calculated cluster status"); - pe__set_working_set_flags(data_set, flags); - return; - } - -+ if (data_set->localhost) { -+ localhost_save = data_set->localhost; -+ } -+ - CRM_ASSERT(cib != NULL); - crm_trace("Calculating cluster status"); - -@@ -740,6 +746,10 @@ unpack_cib(xmlNode *cib, unsigned long long flags, pe_working_set_t *data_set) - */ - set_working_set_defaults(data_set); - -+ if (localhost_save) { -+ data_set->localhost = localhost_save; -+ } -+ - pe__set_working_set_flags(data_set, flags); - data_set->input = cib; - cluster_status(data_set); // Sets pe_flag_have_status --- -2.31.1 - diff --git a/SOURCES/011-unfencing.patch b/SOURCES/011-unfencing.patch deleted file mode 100644 index 01255df..0000000 --- a/SOURCES/011-unfencing.patch +++ /dev/null @@ -1,178 +0,0 @@ -From b1094468ab0f7c6d2f5b457b721f3a852a9cae2c Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Thu, 14 Jul 2022 13:09:51 +0200 -Subject: [PATCH 1/2] Fix: do unfencing equally for cluster-nodes & remotes - -Fixes T28 ---- - lib/pengine/utils.c | 8 ++------ - 1 file changed, 2 insertions(+), 6 deletions(-) - -diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c -index 0c2eb3c16..83f76cccf 100644 ---- a/lib/pengine/utils.c -+++ b/lib/pengine/utils.c -@@ -1201,12 +1201,8 @@ pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason, - add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id); - add_hash_param(stonith_op->meta, "stonith_action", op); - -- if (pe__is_guest_or_remote_node(node) -- && pcmk_is_set(data_set->flags, pe_flag_enable_unfencing)) { -- /* Extra work to detect device changes on remotes -- * -- * We may do this for all nodes in the future, but for now -- * the pcmk__check_action_config() based stuff works fine. -+ if (pcmk_is_set(data_set->flags, pe_flag_enable_unfencing)) { -+ /* Extra work to detect device changes - */ - long max = 1024; - long digests_all_offset = 0; --- -2.31.1 - - -From f5db6e2c94273623a49f36f1bdb6c39315c53cab Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Thu, 14 Jul 2022 11:29:05 +0200 -Subject: [PATCH 2/2] Test: cts-scheduler: update expected output for changes - in unfencing - ---- - cts/scheduler/exp/start-then-stop-with-unfence.exp | 2 +- - cts/scheduler/exp/unfence-definition.exp | 6 +++--- - cts/scheduler/exp/unfence-device.exp | 6 +++--- - cts/scheduler/exp/unfence-parameters.exp | 6 +++--- - cts/scheduler/exp/unfence-startup.exp | 4 ++-- - 5 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/cts/scheduler/exp/start-then-stop-with-unfence.exp b/cts/scheduler/exp/start-then-stop-with-unfence.exp -index b1868586f..69cfb63de 100644 ---- a/cts/scheduler/exp/start-then-stop-with-unfence.exp -+++ b/cts/scheduler/exp/start-then-stop-with-unfence.exp -@@ -151,7 +151,7 @@ - - - -- -+ - - - -diff --git a/cts/scheduler/exp/unfence-definition.exp b/cts/scheduler/exp/unfence-definition.exp -index 840a8d212..6a098ed3c 100644 ---- a/cts/scheduler/exp/unfence-definition.exp -+++ b/cts/scheduler/exp/unfence-definition.exp -@@ -373,7 +373,7 @@ - - - -- -+ - - - -@@ -384,7 +384,7 @@ - - - -- -+ - - - -@@ -392,7 +392,7 @@ - - - -- -+ - - - -diff --git a/cts/scheduler/exp/unfence-device.exp b/cts/scheduler/exp/unfence-device.exp -index a39fc758f..452351d98 100644 ---- a/cts/scheduler/exp/unfence-device.exp -+++ b/cts/scheduler/exp/unfence-device.exp -@@ -76,7 +76,7 @@ - - - -- -+ - - - -@@ -84,7 +84,7 @@ - - - -- -+ - - - -@@ -92,7 +92,7 @@ - - - -- -+ - - - -diff --git a/cts/scheduler/exp/unfence-parameters.exp b/cts/scheduler/exp/unfence-parameters.exp -index 3e70cb8e9..268bf008e 100644 ---- a/cts/scheduler/exp/unfence-parameters.exp -+++ b/cts/scheduler/exp/unfence-parameters.exp -@@ -357,7 +357,7 @@ - - - -- -+ - - - -@@ -368,7 +368,7 @@ - - - -- -+ - - - -@@ -376,7 +376,7 @@ - - - -- -+ - - - -diff --git a/cts/scheduler/exp/unfence-startup.exp b/cts/scheduler/exp/unfence-startup.exp -index 6745bff4b..f2d38e80c 100644 ---- a/cts/scheduler/exp/unfence-startup.exp -+++ b/cts/scheduler/exp/unfence-startup.exp -@@ -173,7 +173,7 @@ - - - -- -+ - - - -@@ -184,7 +184,7 @@ - - - -- -+ - - - --- -2.31.1 - diff --git a/SOURCES/012-crm_resource.patch b/SOURCES/012-crm_resource.patch deleted file mode 100644 index a087b3f..0000000 --- a/SOURCES/012-crm_resource.patch +++ /dev/null @@ -1,38 +0,0 @@ -From fe9150bc4b740b3748fec34fe668df4f8c0d0e25 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 9 Aug 2022 15:38:03 -0500 -Subject: [PATCH] Fix: tools: correct minimum execution status shown by - crm_resource -O - -regression introduced in 2.1.0 by 5ef28b946 - -Fixes T533 ---- - lib/pengine/pe_output.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/lib/pengine/pe_output.c b/lib/pengine/pe_output.c -index 5d716fe6cb..dbb49637c9 100644 ---- a/lib/pengine/pe_output.c -+++ b/lib/pengine/pe_output.c -@@ -1878,7 +1878,7 @@ node_and_op(pcmk__output_t *out, va_list args) { - time_t last_change = 0; - - pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), -- &status, 0); -+ &status, PCMK_EXEC_UNKNOWN); - - rsc = pe_find_resource(data_set->resources, op_rsc); - -@@ -1932,7 +1932,7 @@ node_and_op_xml(pcmk__output_t *out, va_list args) { - xmlNode *node = NULL; - - pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), -- &status, 0); -+ &status, PCMK_EXEC_UNKNOWN); - node = pcmk__output_create_xml_node(out, "operation", - "op", op_key ? op_key : ID(xml_op), - "node", crm_element_value(xml_op, XML_ATTR_UNAME), --- -2.31.1 - diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec index 1d81429..2be7c1f 100644 --- a/SPECS/pacemaker.spec +++ b/SPECS/pacemaker.spec @@ -35,11 +35,11 @@ ## Upstream pacemaker version, and its package version (specversion ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) -%global pcmkversion 2.1.4 -%global specversion 5 +%global pcmkversion 2.1.5 +%global specversion 7 ## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build -%global commit dc6eb4362e67c1497a413434eba097063bf1ef83 +%global commit a3f44794f94e1571c6ba0042915ade369b4ce4b1 ## Since git v2.11, the extent of abbreviation is autoscaled by default ## (used to be constant of 7), so we need to convey it for non-tags, too. @@ -65,9 +65,6 @@ ## Add option to create binaries suitable for use with profiling tools %bcond_with profiling -## Add option to create binaries with coverage analysis -%bcond_with coverage - ## Allow deprecated option to skip (or enable, on RHEL) documentation %if 0%{?rhel} %bcond_with doc @@ -124,9 +121,14 @@ %define archive_version %(c=%{commit}; echo ${c:10}) %define archive_github_url %{commit}#/%{name}-%{archive_version}.tar.gz %else +%if "%{commit}" == "DIST" +%define archive_version %{pcmkversion} +%define archive_github_url %{archive_version}#/%{name}-%{pcmkversion}.tar.gz +%else %define archive_version %(c=%{commit}; echo ${c:0:%{commit_abbrev}}) %define archive_github_url %{archive_version}#/%{name}-%{archive_version}.tar.gz %endif +%endif ### Always use a simple release number %define pcmk_release %{specversion} @@ -246,18 +248,11 @@ Source0: https://codeload.github.com/%{github_owner}/%{name}/tar.gz/%{arch Source1: https://codeload.github.com/%{github_owner}/%{nagios_name}/tar.gz/%{nagios_archive_github_url} # upstream commits -Patch001: 001-stonith-enabled.patch -Patch002: 002-acl_group.patch -Patch003: 003-regression.patch -Patch004: 004-schema.patch -Patch005: 005-schema.patch -Patch006: 006-crm_resource.patch -Patch007: 007-stonith_admin.patch -Patch008: 008-metadata.patch -Patch009: 009-validate.patch -Patch010: 010-regression.patch -Patch011: 011-unfencing.patch -Patch012: 012-crm_resource.patch +Patch001: 001-sync-points.patch +Patch002: 002-remote-regression.patch +Patch003: 003-history-cleanup.patch +Patch004: 004-g_source_remove.patch +Patch005: 005-query-null.patch Requires: resource-agents Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} @@ -336,6 +331,9 @@ BuildRequires: inkscape BuildRequires: %{python_name}-sphinx %endif +# Booth requires this +Provides: pacemaker-ticket-support = 2.0 + Provides: pcmk-cluster-manager = %{version}-%{release} Provides: pcmk-cluster-manager%{?_isa} = %{version}-%{release} @@ -357,7 +355,8 @@ when related resources fail and can be configured to periodically check resource health. Available rpmbuild rebuild options: - --with(out) : cibsecrets doc hardening nls pre_release profiling stonithd + --with(out) : cibsecrets hardening nls pre_release profiling + stonithd %package cli License: GPLv2+ and LGPLv2+ @@ -461,6 +460,7 @@ Requires: %{pkgname_pcmk_libs} = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release} Requires: %{pkgname_procps} Requires: psmisc +Requires: %{python_name}-psutil BuildArch: noarch # systemd Python bindings are a separate package in some distros @@ -855,6 +855,43 @@ exit 0 %license %{nagios_name}-%{nagios_hash}/COPYING %changelog +* Wed Feb 22 2023 Chris Lumens - 2.1.5-7 +- Additional fixes for SIGABRT during pacemaker-fenced shutdown +- Backport fix for attrd_updater -QA not displaying all nodes +- Related: rhbz2166967 +- Resolves: rhbz2169829 + +* Thu Feb 9 2023 Chris Lumens - 2.1.5-6 +- Backport fix for migration history cleanup causing resource recovery +- Backport fix for SIGABRT during pacemaker-fenced shutdown +- Resolves: rhbz2166393 +- Resolves: rhbz2166967 + +* Tue Jan 24 2023 Ken Gaillot - 2.1.5-5 +- Backport fix for remote node shutdown regression +- Resolves: rhbz2163450 + +* Mon Dec 12 2022 Chris Lumens - 2.1.5-4 +- Rebase pacemaker on upstream 2.1.5 final release +- Add support for sync points to attribute daemon +- Resolves: rhbz2122353 + +* Tue Dec 06 2022 Chris Lumens - 2.1.5-3 +- Fix errors found by covscan +- Related: rhbz2122353 + +* Wed Nov 23 2022 Chris Lumens - 2.1.5-2 +- Rebase on upstream 2.1.5-rc3 release +- Related: rhbz2122353 + +* Tue Nov 15 2022 Chris Lumens - 2.1.5-1 +- Rebase on upstream 2.1.5-rc2 release +- Resolves: rhbz2123727 +- Resolves: rhbz2125337 +- Resolves: rhbz2125344 +- Resolves: rhbz2133546 +- Resolves: rhbz2142683 + * Wed Aug 10 2022 Ken Gaillot - 2.1.4-5 - Fix regression in crm_resource -O - Resolves: rhbz2089353