From 6db78713b210f5d20759c1a7fcd17a78b110cd04 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Aug 10 2021 04:24:36 +0000 Subject: import pacemaker-2.1.0-5.el8 --- diff --git a/SOURCES/014-str-list.patch b/SOURCES/014-str-list.patch new file mode 100644 index 0000000..e6993ab --- /dev/null +++ b/SOURCES/014-str-list.patch @@ -0,0 +1,465 @@ +From 45813df3eb4c8ad8b1744fa5dd56af86ad0fb3dd Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 17 Jun 2021 16:07:55 -0400 +Subject: [PATCH] Refactor: libs: pcmk__str_in_list should support pcmk__str_* + flags. + +--- + include/crm/common/strings_internal.h | 2 +- + lib/common/strings.c | 34 +++++++++++++++++++++++---- + lib/fencing/st_output.c | 10 ++++---- + lib/pengine/bundle.c | 8 +++---- + lib/pengine/clone.c | 28 +++++++++++----------- + lib/pengine/group.c | 18 +++++++------- + lib/pengine/native.c | 4 ++-- + lib/pengine/pe_output.c | 22 ++++++++--------- + lib/pengine/utils.c | 6 ++--- + 9 files changed, 79 insertions(+), 53 deletions(-) + +diff --git a/include/crm/common/strings_internal.h b/include/crm/common/strings_internal.h +index 94982cb4e..687079814 100644 +--- a/include/crm/common/strings_internal.h ++++ b/include/crm/common/strings_internal.h +@@ -117,7 +117,7 @@ pcmk__intkey_table_remove(GHashTable *hash_table, int key) + return g_hash_table_remove(hash_table, GINT_TO_POINTER(key)); + } + +-gboolean pcmk__str_in_list(GList *lst, const gchar *s); ++gboolean pcmk__str_in_list(GList *lst, const gchar *s, uint32_t flags); + + bool pcmk__strcase_any_of(const char *s, ...) G_GNUC_NULL_TERMINATED; + bool pcmk__str_any_of(const char *s, ...) G_GNUC_NULL_TERMINATED; +diff --git a/lib/common/strings.c b/lib/common/strings.c +index 3264db5b6..e1e98803b 100644 +--- a/lib/common/strings.c ++++ b/lib/common/strings.c +@@ -872,14 +872,30 @@ pcmk__parse_ll_range(const char *srcstring, long long *start, long long *end) + * Search \p lst for \p s, taking case into account. As a special case, + * if "*" is the only element of \p lst, the search is successful. + * +- * \param[in] lst List to search +- * \param[in] s String to search for ++ * Behavior can be changed with various flags: ++ * ++ * - pcmk__str_casei - By default, comparisons are done taking case into ++ * account. This flag makes comparisons case-insensitive. ++ * - pcmk__str_null_matches - If the input string is NULL, return TRUE. ++ * ++ * \note The special "*" matching rule takes precedence over flags. In ++ * particular, "*" will match a NULL input string even without ++ * pcmk__str_null_matches being specified. ++ * ++ * \note No matter what input string or flags are provided, an empty ++ * list will always return FALSE. ++ * ++ * \param[in] lst List to search ++ * \param[in] s String to search for ++ * \param[in] flags A bitfield of pcmk__str_flags to modify operation + * + * \return \c TRUE if \p s is in \p lst, or \c FALSE otherwise + */ + gboolean +-pcmk__str_in_list(GList *lst, const gchar *s) ++pcmk__str_in_list(GList *lst, const gchar *s, uint32_t flags) + { ++ GCompareFunc fn; ++ + if (lst == NULL) { + return FALSE; + } +@@ -888,7 +904,17 @@ pcmk__str_in_list(GList *lst, const gchar *s) + return TRUE; + } + +- return g_list_find_custom(lst, s, (GCompareFunc) strcmp) != NULL; ++ if (s == NULL) { ++ return pcmk_is_set(flags, pcmk__str_null_matches); ++ } ++ ++ if (pcmk_is_set(flags, pcmk__str_casei)) { ++ fn = (GCompareFunc) strcasecmp; ++ } else { ++ fn = (GCompareFunc) strcmp; ++ } ++ ++ return g_list_find_custom(lst, s, fn) != NULL; + } + + static bool +diff --git a/lib/fencing/st_output.c b/lib/fencing/st_output.c +index 568ae46a8..e1ae8ac87 100644 +--- a/lib/fencing/st_output.c ++++ b/lib/fencing/st_output.c +@@ -47,7 +47,7 @@ stonith__failed_history(pcmk__output_t *out, va_list args) { + continue; + } + +- if (!pcmk__str_in_list(only_node, hp->target)) { ++ if (!pcmk__str_in_list(only_node, hp->target, pcmk__str_none)) { + continue; + } + +@@ -72,7 +72,7 @@ stonith__history(pcmk__output_t *out, va_list args) { + int rc = pcmk_rc_no_output; + + for (stonith_history_t *hp = history; hp; hp = hp->next) { +- if (!pcmk__str_in_list(only_node, hp->target)) { ++ if (!pcmk__str_in_list(only_node, hp->target, pcmk__str_none)) { + continue; + } + +@@ -101,7 +101,7 @@ stonith__full_history(pcmk__output_t *out, va_list args) { + int rc = pcmk_rc_no_output; + + for (stonith_history_t *hp = history; hp; hp = hp->next) { +- if (!pcmk__str_in_list(only_node, hp->target)) { ++ if (!pcmk__str_in_list(only_node, hp->target, pcmk__str_none)) { + continue; + } + +@@ -129,7 +129,7 @@ full_history_xml(pcmk__output_t *out, va_list args) { + + if (history_rc == 0) { + for (stonith_history_t *hp = history; hp; hp = hp->next) { +- if (!pcmk__str_in_list(only_node, hp->target)) { ++ if (!pcmk__str_in_list(only_node, hp->target, pcmk__str_none)) { + continue; + } + +@@ -218,7 +218,7 @@ stonith__pending_actions(pcmk__output_t *out, va_list args) { + int rc = pcmk_rc_no_output; + + for (stonith_history_t *hp = history; hp; hp = hp->next) { +- if (!pcmk__str_in_list(only_node, hp->target)) { ++ if (!pcmk__str_in_list(only_node, hp->target, pcmk__str_none)) { + continue; + } + +diff --git a/lib/pengine/bundle.c b/lib/pengine/bundle.c +index 9237392e4..6ba786ae6 100644 +--- a/lib/pengine/bundle.c ++++ b/lib/pengine/bundle.c +@@ -1492,7 +1492,7 @@ pe__bundle_xml(pcmk__output_t *out, va_list args) + return rc; + } + +- print_everything = pcmk__str_in_list(only_rsc, rsc->id); ++ print_everything = pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none); + + for (GList *gIter = bundle_data->replicas; gIter != NULL; + gIter = gIter->next) { +@@ -1614,7 +1614,7 @@ pe__bundle_html(pcmk__output_t *out, va_list args) + return rc; + } + +- print_everything = pcmk__str_in_list(only_rsc, rsc->id); ++ print_everything = pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none); + + for (GList *gIter = bundle_data->replicas; gIter != NULL; + gIter = gIter->next) { +@@ -1742,7 +1742,7 @@ pe__bundle_text(pcmk__output_t *out, va_list args) + return rc; + } + +- print_everything = pcmk__str_in_list(only_rsc, rsc->id); ++ print_everything = pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none); + + for (GList *gIter = bundle_data->replicas; gIter != NULL; + gIter = gIter->next) { +@@ -2044,7 +2044,7 @@ pe__bundle_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_paren + gboolean passes = FALSE; + pe__bundle_variant_data_t *bundle_data = NULL; + +- if (pcmk__str_in_list(only_rsc, rsc_printable_id(rsc))) { ++ if (pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none)) { + passes = TRUE; + } else { + get_bundle_variant_data(bundle_data, rsc); +diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c +index 5662338f3..5a6bfa61f 100644 +--- a/lib/pengine/clone.c ++++ b/lib/pengine/clone.c +@@ -624,8 +624,8 @@ pe__clone_xml(pcmk__output_t *out, va_list args) + return rc; + } + +- print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) || +- (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id)); ++ print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none) || ++ (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none)); + + for (; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; +@@ -693,8 +693,8 @@ pe__clone_html(pcmk__output_t *out, va_list args) + return rc; + } + +- print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) || +- (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id)); ++ print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none) || ++ (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none)); + + out->begin_list(out, NULL, NULL, "Clone Set: %s [%s]%s%s%s%s", + rsc->id, ID(clone_data->xml_obj_child), +@@ -801,7 +801,7 @@ pe__clone_html(pcmk__output_t *out, va_list args) + for (gIter = promoted_list; gIter; gIter = gIter->next) { + pe_node_t *host = gIter->data; + +- if (!pcmk__str_in_list(only_node, host->details->uname)) { ++ if (!pcmk__str_in_list(only_node, host->details->uname, pcmk__str_none)) { + continue; + } + +@@ -822,7 +822,7 @@ pe__clone_html(pcmk__output_t *out, va_list args) + for (gIter = started_list; gIter; gIter = gIter->next) { + pe_node_t *host = gIter->data; + +- if (!pcmk__str_in_list(only_node, host->details->uname)) { ++ if (!pcmk__str_in_list(only_node, host->details->uname, pcmk__str_none)) { + continue; + } + +@@ -884,7 +884,7 @@ pe__clone_html(pcmk__output_t *out, va_list args) + pe_node_t *node = (pe_node_t *)nIter->data; + + if (pe_find_node(rsc->running_on, node->details->uname) == NULL && +- pcmk__str_in_list(only_node, node->details->uname)) { ++ pcmk__str_in_list(only_node, node->details->uname, pcmk__str_none)) { + pcmk__add_word(&stopped_list, &stopped_list_len, + node->details->uname); + } +@@ -933,8 +933,8 @@ pe__clone_text(pcmk__output_t *out, va_list args) + return rc; + } + +- print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) || +- (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id)); ++ print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none) || ++ (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none)); + + out->begin_list(out, NULL, NULL, "Clone Set: %s [%s]%s%s%s%s", + rsc->id, ID(clone_data->xml_obj_child), +@@ -1041,7 +1041,7 @@ pe__clone_text(pcmk__output_t *out, va_list args) + for (gIter = promoted_list; gIter; gIter = gIter->next) { + pe_node_t *host = gIter->data; + +- if (!pcmk__str_in_list(only_node, host->details->uname)) { ++ if (!pcmk__str_in_list(only_node, host->details->uname, pcmk__str_none)) { + continue; + } + +@@ -1062,7 +1062,7 @@ pe__clone_text(pcmk__output_t *out, va_list args) + for (gIter = started_list; gIter; gIter = gIter->next) { + pe_node_t *host = gIter->data; + +- if (!pcmk__str_in_list(only_node, host->details->uname)) { ++ if (!pcmk__str_in_list(only_node, host->details->uname, pcmk__str_none)) { + continue; + } + +@@ -1120,7 +1120,7 @@ pe__clone_text(pcmk__output_t *out, va_list args) + pe_node_t *node = (pe_node_t *)nIter->data; + + if (pe_find_node(rsc->running_on, node->details->uname) == NULL && +- pcmk__str_in_list(only_node, node->details->uname)) { ++ pcmk__str_in_list(only_node, node->details->uname, pcmk__str_none)) { + pcmk__add_word(&stopped_list, &stopped_list_len, + node->details->uname); + } +@@ -1220,11 +1220,11 @@ pe__clone_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_parent + gboolean passes = FALSE; + clone_variant_data_t *clone_data = NULL; + +- if (pcmk__str_in_list(only_rsc, rsc_printable_id(rsc))) { ++ if (pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none)) { + passes = TRUE; + } else { + get_clone_variant_data(clone_data, rsc); +- passes = pcmk__str_in_list(only_rsc, ID(clone_data->xml_obj_child)); ++ passes = pcmk__str_in_list(only_rsc, ID(clone_data->xml_obj_child), pcmk__str_none); + + if (!passes) { + for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) { +diff --git a/lib/pengine/group.c b/lib/pengine/group.c +index 23a72cff7..5f9aa83ce 100644 +--- a/lib/pengine/group.c ++++ b/lib/pengine/group.c +@@ -201,8 +201,8 @@ pe__group_xml(pcmk__output_t *out, va_list args) + return rc; + } + +- print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) || +- (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id)); ++ print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none) || ++ (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none)); + + for (; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; +@@ -248,8 +248,8 @@ pe__group_html(pcmk__output_t *out, va_list args) + return rc; + } + +- print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) || +- (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id)); ++ print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none) || ++ (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none)); + + if (options & pe_print_brief) { + GList *rscs = pe__filter_rsc_list(rsc->children, only_rsc); +@@ -303,8 +303,8 @@ pe__group_text(pcmk__output_t *out, va_list args) + return rc; + } + +- print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) || +- (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id)); ++ print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none) || ++ (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none)); + + if (options & pe_print_brief) { + GList *rscs = pe__filter_rsc_list(rsc->children, only_rsc); +@@ -387,11 +387,11 @@ pe__group_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_parent + { + gboolean passes = FALSE; + +- if (check_parent && pcmk__str_in_list(only_rsc, rsc_printable_id(uber_parent(rsc)))) { ++ if (check_parent && pcmk__str_in_list(only_rsc, rsc_printable_id(uber_parent(rsc)), pcmk__str_none)) { + passes = TRUE; +- } else if (pcmk__str_in_list(only_rsc, rsc_printable_id(rsc))) { ++ } else if (pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none)) { + passes = TRUE; +- } else if (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id)) { ++ } else if (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none)) { + passes = TRUE; + } else { + for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) { +diff --git a/lib/pengine/native.c b/lib/pengine/native.c +index c2333d0d2..56054fc4a 100644 +--- a/lib/pengine/native.c ++++ b/lib/pengine/native.c +@@ -1338,8 +1338,8 @@ pe__rscs_brief_output(pcmk__output_t *out, GList *rsc_list, unsigned int show_op + gboolean + pe__native_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_parent) + { +- if (pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) || +- pcmk__str_in_list(only_rsc, rsc->id)) { ++ if (pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none) || ++ pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none)) { + return FALSE; + } else if (check_parent) { + pe_resource_t *up = uber_parent(rsc); +diff --git a/lib/pengine/pe_output.c b/lib/pengine/pe_output.c +index 727475735..a6dc4ade8 100644 +--- a/lib/pengine/pe_output.c ++++ b/lib/pengine/pe_output.c +@@ -670,8 +670,8 @@ ban_list(pcmk__output_t *out, va_list args) { + continue; + } + +- if (!pcmk__str_in_list(only_rsc, rsc_printable_id(location->rsc_lh)) && +- !pcmk__str_in_list(only_rsc, rsc_printable_id(uber_parent(location->rsc_lh)))) { ++ if (!pcmk__str_in_list(only_rsc, rsc_printable_id(location->rsc_lh), pcmk__str_none) && ++ !pcmk__str_in_list(only_rsc, rsc_printable_id(uber_parent(location->rsc_lh)), pcmk__str_none)) { + continue; + } + +@@ -1254,7 +1254,7 @@ failed_action_list(pcmk__output_t *out, va_list args) { + xml_op = pcmk__xml_next(xml_op)) { + char *rsc = NULL; + +- if (!pcmk__str_in_list(only_node, crm_element_value(xml_op, XML_ATTR_UNAME))) { ++ if (!pcmk__str_in_list(only_node, crm_element_value(xml_op, XML_ATTR_UNAME), pcmk__str_none)) { + continue; + } + +@@ -1263,7 +1263,7 @@ failed_action_list(pcmk__output_t *out, va_list args) { + continue; + } + +- if (!pcmk__str_in_list(only_rsc, rsc)) { ++ if (!pcmk__str_in_list(only_rsc, rsc, pcmk__str_none)) { + free(rsc); + continue; + } +@@ -1738,7 +1738,7 @@ node_attribute_list(pcmk__output_t *out, va_list args) { + continue; + } + +- if (!pcmk__str_in_list(only_node, node->details->uname)) { ++ if (!pcmk__str_in_list(only_node, node->details->uname, pcmk__str_none)) { + g_list_free(attr_list); + continue; + } +@@ -1835,8 +1835,8 @@ node_history_list(pcmk__output_t *out, va_list args) { + * For other resource types, is_filtered is okay. + */ + if (uber_parent(rsc)->variant == pe_group) { +- if (!pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) && +- !pcmk__str_in_list(only_rsc, rsc_printable_id(uber_parent(rsc)))) { ++ if (!pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none) && ++ !pcmk__str_in_list(only_rsc, rsc_printable_id(uber_parent(rsc)), pcmk__str_none)) { + continue; + } + } else { +@@ -1899,7 +1899,7 @@ node_list_html(pcmk__output_t *out, va_list args) { + for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { + pe_node_t *node = (pe_node_t *) gIter->data; + +- if (!pcmk__str_in_list(only_node, node->details->uname)) { ++ if (!pcmk__str_in_list(only_node, node->details->uname, pcmk__str_none)) { + continue; + } + +@@ -1940,7 +1940,7 @@ pe__node_list_text(pcmk__output_t *out, va_list args) { + const char *node_mode = NULL; + char *node_name = pe__node_display_name(node, print_clone_detail); + +- if (!pcmk__str_in_list(only_node, node->details->uname)) { ++ if (!pcmk__str_in_list(only_node, node->details->uname, pcmk__str_none)) { + free(node_name); + continue; + } +@@ -2059,7 +2059,7 @@ node_list_xml(pcmk__output_t *out, va_list args) { + for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { + pe_node_t *node = (pe_node_t *) gIter->data; + +- if (!pcmk__str_in_list(only_node, node->details->uname)) { ++ if (!pcmk__str_in_list(only_node, node->details->uname, pcmk__str_none)) { + continue; + } + +@@ -2097,7 +2097,7 @@ node_summary(pcmk__output_t *out, va_list args) { + continue; + } + +- if (!pcmk__str_in_list(only_node, node->details->uname)) { ++ if (!pcmk__str_in_list(only_node, node->details->uname, pcmk__str_none)) { + continue; + } + +diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c +index 450d8348c..d1be9e4ca 100644 +--- a/lib/pengine/utils.c ++++ b/lib/pengine/utils.c +@@ -2394,7 +2394,7 @@ pe__rsc_running_on_any_node_in_list(pe_resource_t *rsc, GList *node_list) + { + for (GList *ele = rsc->running_on; ele; ele = ele->next) { + pe_node_t *node = (pe_node_t *) ele->data; +- if (pcmk__str_in_list(node_list, node->details->uname)) { ++ if (pcmk__str_in_list(node_list, node->details->uname, pcmk__str_none)) { + return true; + } + } +@@ -2419,8 +2419,8 @@ pe__filter_rsc_list(GList *rscs, GList *filter) + /* I think the second condition is safe here for all callers of this + * function. If not, it needs to move into pe__node_text. + */ +- if (pcmk__str_in_list(filter, rsc_printable_id(rsc)) || +- (rsc->parent && pcmk__str_in_list(filter, rsc_printable_id(rsc->parent)))) { ++ if (pcmk__str_in_list(filter, rsc_printable_id(rsc), pcmk__str_none) || ++ (rsc->parent && pcmk__str_in_list(filter, rsc_printable_id(rsc->parent), pcmk__str_none))) { + retval = g_list_prepend(retval, rsc); + } + } +-- +2.27.0 + diff --git a/SOURCES/015-sbd.patch b/SOURCES/015-sbd.patch new file mode 100644 index 0000000..9f47c35 --- /dev/null +++ b/SOURCES/015-sbd.patch @@ -0,0 +1,1312 @@ +From b49f49576ef9d801a48ce7a01a78c72e65be7880 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Fri, 30 Jul 2021 18:07:25 +0200 +Subject: [PATCH 1/3] Fix, Refactor: fenced: add return value to + get_agent_metadata + +Used to distinguish between empty metadata per design, +case of failed getting metadata that might succeed on a +retry and fatal failure. +Fixes as well regression that leads to endless retries getting +metadata for #watchdog - not superserious as it happens with +delays in between but still undesirable. +--- + daemons/fenced/fenced_commands.c | 92 +++++++++++++++++++------------- + 1 file changed, 55 insertions(+), 37 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index a778801b1..cd9968f1a 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -69,7 +69,7 @@ static void stonith_send_reply(xmlNode * reply, int call_options, const char *re + static void search_devices_record_result(struct device_search_s *search, const char *device, + gboolean can_fence); + +-static xmlNode * get_agent_metadata(const char *agent); ++static int get_agent_metadata(const char *agent, xmlNode **metadata); + static void read_action_metadata(stonith_device_t *device); + + typedef struct async_command_s { +@@ -323,19 +323,26 @@ fork_cb(GPid pid, gpointer user_data) + static int + get_agent_metadata_cb(gpointer data) { + stonith_device_t *device = data; ++ guint period_ms; + +- device->agent_metadata = get_agent_metadata(device->agent); +- if (device->agent_metadata) { +- read_action_metadata(device); +- stonith__device_parameter_flags(&(device->flags), device->id, ++ switch (get_agent_metadata(device->agent, &device->agent_metadata)) { ++ case pcmk_rc_ok: ++ if (device->agent_metadata) { ++ read_action_metadata(device); ++ stonith__device_parameter_flags(&(device->flags), device->id, + device->agent_metadata); +- return G_SOURCE_REMOVE; +- } else { +- guint period_ms = pcmk__mainloop_timer_get_period(device->timer); +- if (period_ms < 160 * 1000) { +- mainloop_timer_set_period(device->timer, 2 * period_ms); +- } +- return G_SOURCE_CONTINUE; ++ } ++ return G_SOURCE_REMOVE; ++ ++ case EAGAIN: ++ period_ms = pcmk__mainloop_timer_get_period(device->timer); ++ if (period_ms < 160 * 1000) { ++ mainloop_timer_set_period(device->timer, 2 * period_ms); ++ } ++ return G_SOURCE_CONTINUE; ++ ++ default: ++ return G_SOURCE_REMOVE; + } + } + +@@ -700,38 +707,41 @@ init_metadata_cache(void) { + } + } + +-static xmlNode * +-get_agent_metadata(const char *agent) ++int ++get_agent_metadata(const char *agent, xmlNode ** metadata) + { +- xmlNode *xml = NULL; + char *buffer = NULL; + ++ if (metadata == NULL) { ++ return EINVAL; ++ } ++ *metadata = NULL; ++ if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) { ++ return pcmk_rc_ok; ++ } + init_metadata_cache(); + buffer = g_hash_table_lookup(metadata_cache, agent); +- if(pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) { +- return NULL; +- +- } else if(buffer == NULL) { ++ if (buffer == NULL) { + stonith_t *st = stonith_api_new(); + int rc; + + if (st == NULL) { + crm_warn("Could not get agent meta-data: " + "API memory allocation failed"); +- return NULL; ++ return EAGAIN; + } +- rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); ++ rc = st->cmds->metadata(st, st_opt_sync_call, agent, ++ NULL, &buffer, 10); + stonith_api_delete(st); + if (rc || !buffer) { + crm_err("Could not retrieve metadata for fencing agent %s", agent); +- return NULL; ++ return EAGAIN; + } + g_hash_table_replace(metadata_cache, strdup(agent), buffer); + } + +- xml = string2xml(buffer); +- +- return xml; ++ *metadata = string2xml(buffer); ++ return pcmk_rc_ok; + } + + static gboolean +@@ -962,19 +972,27 @@ build_device_from_xml(xmlNode * msg) + g_list_free_full(device->targets, free); + device->targets = NULL; + } +- device->agent_metadata = get_agent_metadata(device->agent); +- if (device->agent_metadata) { +- read_action_metadata(device); +- stonith__device_parameter_flags(&(device->flags), device->id, +- device->agent_metadata); +- } else { +- if (device->timer == NULL) { +- device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000, ++ switch (get_agent_metadata(device->agent, &device->agent_metadata)) { ++ case pcmk_rc_ok: ++ if (device->agent_metadata) { ++ read_action_metadata(device); ++ stonith__device_parameter_flags(&(device->flags), device->id, ++ device->agent_metadata); ++ } ++ break; ++ ++ case EAGAIN: ++ if (device->timer == NULL) { ++ device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000, + TRUE, get_agent_metadata_cb, device); +- } +- if (!mainloop_timer_running(device->timer)) { +- mainloop_timer_start(device->timer); +- } ++ } ++ if (!mainloop_timer_running(device->timer)) { ++ mainloop_timer_start(device->timer); ++ } ++ break; ++ ++ default: ++ break; + } + + value = g_hash_table_lookup(device->params, "nodeid"); +-- +2.27.0 + + +From 5dd1e4459335764e0adf5fa78d81c875ae2332e9 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Fri, 30 Jul 2021 18:15:10 +0200 +Subject: [PATCH 2/3] feature: watchdog-fencing: allow restriction to certain + nodes + +Bump CRM_FEATURE_SET to 3.11.0 to encourage cluster being +fully upgraded to a version that supports the feature +before explicitly adding a watchdog-fence-device. +--- + configure.ac | 1 + + daemons/controld/controld_control.c | 2 +- + daemons/controld/controld_fencing.c | 14 ++ + daemons/controld/controld_fencing.h | 1 + + daemons/fenced/Makefile.am | 2 +- + daemons/fenced/fence_watchdog.in | 283 ++++++++++++++++++++++++++++ + daemons/fenced/fenced_commands.c | 141 +++++++++++--- + daemons/fenced/fenced_remote.c | 71 ++++--- + daemons/fenced/pacemaker-fenced.c | 131 +++++++++---- + daemons/fenced/pacemaker-fenced.h | 5 +- + include/crm/crm.h | 2 +- + include/crm/fencing/internal.h | 8 +- + lib/fencing/st_client.c | 61 ++++++ + lib/lrmd/lrmd_client.c | 6 +- + rpm/pacemaker.spec.in | 3 + + 16 files changed, 635 insertions(+), 97 deletions(-) + create mode 100755 daemons/fenced/fence_watchdog.in + +diff --git a/configure.ac b/configure.ac +index 436100c81..013562e46 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -1972,6 +1972,7 @@ CONFIG_FILES_EXEC([cts/cts-cli], + [cts/support/fence_dummy], + [cts/support/pacemaker-cts-dummyd], + [daemons/fenced/fence_legacy], ++ [daemons/fenced/fence_watchdog], + [doc/abi-check], + [extra/resources/ClusterMon], + [extra/resources/HealthSMART], +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 45a70bb92..b5da6a46c 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -615,7 +615,7 @@ static pcmk__cluster_option_t crmd_opts[] = { + }, + { + "stonith-watchdog-timeout", NULL, "time", NULL, +- "0", pcmk__valid_sbd_timeout, ++ "0", controld_verify_stonith_watchdog_timeout, + "How long to wait before we can assume nodes are safely down " + "when watchdog-based self-fencing via SBD is in use", + "If nonzero, along with `have-watchdog=true` automatically set by the " +diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c +index 0fba6613b..6c2a6c550 100644 +--- a/daemons/controld/controld_fencing.c ++++ b/daemons/controld/controld_fencing.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -886,6 +887,19 @@ te_fence_node(crm_graph_t *graph, crm_action_t *action) + return TRUE; + } + ++bool ++controld_verify_stonith_watchdog_timeout(const char *value) ++{ ++ gboolean rv = TRUE; ++ ++ if (stonith_api && (stonith_api->state != stonith_disconnected) && ++ stonith__watchdog_fencing_enabled_for_node_api(stonith_api, ++ fsa_our_uname)) { ++ rv = pcmk__valid_sbd_timeout(value); ++ } ++ return rv; ++} ++ + /* end stonith API client functions */ + + +diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h +index d0ecc8234..ef68a0c83 100644 +--- a/daemons/controld/controld_fencing.h ++++ b/daemons/controld/controld_fencing.h +@@ -24,6 +24,7 @@ void update_stonith_max_attempts(const char* value); + void controld_trigger_fencer_connect(void); + void controld_disconnect_fencer(bool destroy); + gboolean te_fence_node(crm_graph_t *graph, crm_action_t *action); ++bool controld_verify_stonith_watchdog_timeout(const char *value); + + // stonith cleanup list + void add_stonith_cleanup(const char *target); +diff --git a/daemons/fenced/Makefile.am b/daemons/fenced/Makefile.am +index 43413e11d..2923d7c9b 100644 +--- a/daemons/fenced/Makefile.am ++++ b/daemons/fenced/Makefile.am +@@ -15,7 +15,7 @@ halibdir = $(CRM_DAEMON_DIR) + + halib_PROGRAMS = pacemaker-fenced cts-fence-helper + +-sbin_SCRIPTS = fence_legacy ++sbin_SCRIPTS = fence_legacy fence_watchdog + + noinst_HEADERS = pacemaker-fenced.h + +diff --git a/daemons/fenced/fence_watchdog.in b/daemons/fenced/fence_watchdog.in +new file mode 100755 +index 000000000..c83304f1d +--- /dev/null ++++ b/daemons/fenced/fence_watchdog.in +@@ -0,0 +1,283 @@ ++#!@PYTHON@ ++"""Dummy watchdog fence agent for providing meta-data for the pacemaker internal agent ++""" ++ ++__copyright__ = "Copyright 2012-2021 the Pacemaker project contributors" ++__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" ++ ++import io ++import os ++import re ++import sys ++import atexit ++import getopt ++ ++SHORT_DESC = "Dummy watchdog fence agent" ++LONG_DESC = """fence_watchdog just provides ++meta-data - actual fencing is done by the pacemaker internal watchdog agent.""" ++ ++ALL_OPT = { ++ "version" : { ++ "getopt" : "V", ++ "longopt" : "version", ++ "help" : "-V, --version Display version information and exit", ++ "required" : "0", ++ "shortdesc" : "Display version information and exit", ++ "order" : 53 ++ }, ++ "help" : { ++ "getopt" : "h", ++ "longopt" : "help", ++ "help" : "-h, --help Display this help and exit", ++ "required" : "0", ++ "shortdesc" : "Display help and exit", ++ "order" : 54 ++ }, ++ "action" : { ++ "getopt" : "o:", ++ "longopt" : "action", ++ "help" : "-o, --action=[action] Action: metadata", ++ "required" : "1", ++ "shortdesc" : "Fencing Action", ++ "default" : "metadata", ++ "order" : 1 ++ }, ++ "nodename" : { ++ "getopt" : "N:", ++ "longopt" : "nodename", ++ "help" : "-N, --nodename Node name of fence victim (ignored)", ++ "required" : "0", ++ "shortdesc" : "Ignored", ++ "order" : 2 ++ }, ++ "plug" : { ++ "getopt" : "n:", ++ "longopt" : "plug", ++ "help" : "-n, --plug=[id] Physical plug number on device (ignored)", ++ "required" : "1", ++ "shortdesc" : "Ignored", ++ "order" : 4 ++ } ++} ++ ++ ++def agent(): ++ """ Return name this file was run as. """ ++ ++ return os.path.basename(sys.argv[0]) ++ ++ ++def fail_usage(message): ++ """ Print a usage message and exit. """ ++ ++ sys.exit("%s\nPlease use '-h' for usage" % message) ++ ++ ++def show_docs(options): ++ """ Handle informational options (display info and exit). """ ++ ++ device_opt = options["device_opt"] ++ ++ if "-h" in options: ++ usage(device_opt) ++ sys.exit(0) ++ ++ if "-o" in options and options["-o"].lower() == "metadata": ++ metadata(device_opt, options) ++ sys.exit(0) ++ ++ if "-V" in options: ++ print(AGENT_VERSION) ++ sys.exit(0) ++ ++ ++def sorted_options(avail_opt): ++ """ Return a list of all options, in their internally specified order. """ ++ ++ sorted_list = [(key, ALL_OPT[key]) for key in avail_opt] ++ sorted_list.sort(key=lambda x: x[1]["order"]) ++ return sorted_list ++ ++ ++def usage(avail_opt): ++ """ Print a usage message. """ ++ print(LONG_DESC) ++ print() ++ print("Usage:") ++ print("\t" + agent() + " [options]") ++ print("Options:") ++ ++ for dummy, value in sorted_options(avail_opt): ++ if len(value["help"]) != 0: ++ print(" " + value["help"]) ++ ++ ++def metadata(avail_opt, options): ++ """ Print agent metadata. """ ++ ++ print(""" ++ ++%s ++""" % (agent(), SHORT_DESC, LONG_DESC)) ++ ++ for option, dummy in sorted_options(avail_opt): ++ if "shortdesc" in ALL_OPT[option]: ++ print(' ') ++ ++ default = "" ++ default_name_arg = "-" + ALL_OPT[option]["getopt"][:-1] ++ default_name_no_arg = "-" + ALL_OPT[option]["getopt"] ++ ++ if "default" in ALL_OPT[option]: ++ default = 'default="%s"' % str(ALL_OPT[option]["default"]) ++ elif default_name_arg in options: ++ if options[default_name_arg]: ++ try: ++ default = 'default="%s"' % options[default_name_arg] ++ except TypeError: ++ ## @todo/@note: Currently there is no clean way how to handle lists ++ ## we can create a string from it but we can't set it on command line ++ default = 'default="%s"' % str(options[default_name_arg]) ++ elif default_name_no_arg in options: ++ default = 'default="true"' ++ ++ mixed = ALL_OPT[option]["help"] ++ ## split it between option and help text ++ res = re.compile(r"^(.*--\S+)\s+", re.IGNORECASE | re.S).search(mixed) ++ if None != res: ++ mixed = res.group(1) ++ mixed = mixed.replace("<", "<").replace(">", ">") ++ print(' ') ++ ++ if ALL_OPT[option]["getopt"].count(":") > 0: ++ print(' ') ++ else: ++ print(' ') ++ ++ print(' ' + ALL_OPT[option]["shortdesc"] + '') ++ print(' ') ++ ++ print(' \n ') ++ print(' ') ++ print(' ') ++ print(' ') ++ print(' ') ++ print(' ') ++ print(' ') ++ print(' ') ++ print('') ++ ++ ++def option_longopt(option): ++ """ Return the getopt-compatible long-option name of the given option. """ ++ ++ if ALL_OPT[option]["getopt"].endswith(":"): ++ return ALL_OPT[option]["longopt"] + "=" ++ else: ++ return ALL_OPT[option]["longopt"] ++ ++ ++def opts_from_command_line(argv, avail_opt): ++ """ Read options from command-line arguments. """ ++ ++ # Prepare list of options for getopt ++ getopt_string = "" ++ longopt_list = [] ++ for k in avail_opt: ++ if k in ALL_OPT: ++ getopt_string += ALL_OPT[k]["getopt"] ++ else: ++ fail_usage("Parse error: unknown option '" + k + "'") ++ ++ if k in ALL_OPT and "longopt" in ALL_OPT[k]: ++ longopt_list.append(option_longopt(k)) ++ ++ try: ++ opt, dummy = getopt.gnu_getopt(argv, getopt_string, longopt_list) ++ except getopt.GetoptError as error: ++ fail_usage("Parse error: " + error.msg) ++ ++ # Transform longopt to short one which are used in fencing agents ++ old_opt = opt ++ opt = {} ++ for old_option in dict(old_opt).keys(): ++ if old_option.startswith("--"): ++ for option in ALL_OPT.keys(): ++ if "longopt" in ALL_OPT[option] and "--" + ALL_OPT[option]["longopt"] == old_option: ++ opt["-" + ALL_OPT[option]["getopt"].rstrip(":")] = dict(old_opt)[old_option] ++ else: ++ opt[old_option] = dict(old_opt)[old_option] ++ ++ return opt ++ ++ ++def opts_from_stdin(avail_opt): ++ """ Read options from standard input. """ ++ ++ opt = {} ++ name = "" ++ for line in sys.stdin.readlines(): ++ line = line.strip() ++ if line.startswith("#") or (len(line) == 0): ++ continue ++ ++ (name, value) = (line + "=").split("=", 1) ++ value = value[:-1] ++ ++ if name not in avail_opt: ++ print("Parse error: Ignoring unknown option '%s'" % line, ++ file=sys.stderr) ++ continue ++ ++ if ALL_OPT[name]["getopt"].endswith(":"): ++ opt["-"+ALL_OPT[name]["getopt"].rstrip(":")] = value ++ elif value.lower() in ["1", "yes", "on", "true"]: ++ opt["-"+ALL_OPT[name]["getopt"]] = "1" ++ ++ return opt ++ ++ ++def process_input(avail_opt): ++ """ Set standard environment variables, and parse all options. """ ++ ++ # Set standard environment ++ os.putenv("LANG", "C") ++ os.putenv("LC_ALL", "C") ++ ++ # Read options from command line or standard input ++ if len(sys.argv) > 1: ++ return opts_from_command_line(sys.argv[1:], avail_opt) ++ else: ++ return opts_from_stdin(avail_opt) ++ ++ ++def atexit_handler(): ++ """ Close stdout on exit. """ ++ ++ try: ++ sys.stdout.close() ++ os.close(1) ++ except IOError: ++ sys.exit("%s failed to close standard output" % agent()) ++ ++ ++def main(): ++ """ Make it so! """ ++ ++ device_opt = ALL_OPT.keys() ++ ++ ## Defaults for fence agent ++ atexit.register(atexit_handler) ++ options = process_input(device_opt) ++ options["device_opt"] = device_opt ++ show_docs(options) ++ ++ print("Watchdog fencing may be initiated only by the cluster, not this agent.", ++ file=sys.stderr) ++ ++ sys.exit(1) ++ ++ ++if __name__ == "__main__": ++ main() +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index cd9968f1a..9470ea2c1 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -397,15 +397,13 @@ stonith_device_execute(stonith_device_t * device) + return TRUE; + } + +- if(pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) { +- if(pcmk__str_eq(cmd->action, "reboot", pcmk__str_casei)) { +- pcmk__panic(__func__); +- goto done; +- +- } else if(pcmk__str_eq(cmd->action, "off", pcmk__str_casei)) { +- pcmk__panic(__func__); +- goto done; +- ++ if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, ++ STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { ++ if (pcmk__strcase_any_of(cmd->action, "reboot", "off", NULL)) { ++ if (node_does_watchdog_fencing(stonith_our_uname)) { ++ pcmk__panic(__func__); ++ goto done; ++ } + } else { + crm_info("Faking success for %s watchdog operation", cmd->action); + cmd->done_cb(0, 0, NULL, cmd); +@@ -716,7 +714,7 @@ get_agent_metadata(const char *agent, xmlNode ** metadata) + return EINVAL; + } + *metadata = NULL; +- if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) { ++ if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) { + return pcmk_rc_ok; + } + init_metadata_cache(); +@@ -1050,24 +1048,6 @@ schedule_internal_command(const char *origin, + schedule_stonith_command(cmd, device); + } + +-gboolean +-string_in_list(GList *list, const char *item) +-{ +- int lpc = 0; +- int max = g_list_length(list); +- +- for (lpc = 0; lpc < max; lpc++) { +- const char *value = g_list_nth_data(list, lpc); +- +- if (pcmk__str_eq(item, value, pcmk__str_casei)) { +- return TRUE; +- } else { +- crm_trace("%d: '%s' != '%s'", lpc, item, value); +- } +- } +- return FALSE; +-} +- + static void + status_search_cb(GPid pid, int rc, const char *output, gpointer user_data) + { +@@ -1144,7 +1124,7 @@ dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data) + if (!alias) { + alias = search->host; + } +- if (string_in_list(dev->targets, alias)) { ++ if (pcmk__str_in_list(dev->targets, alias, pcmk__str_casei)) { + can_fence = TRUE; + } + } +@@ -1215,9 +1195,62 @@ stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib) + stonith_device_t *dup = NULL; + stonith_device_t *device = build_device_from_xml(msg); + guint ndevices = 0; ++ int rv = pcmk_ok; + + CRM_CHECK(device != NULL, return -ENOMEM); + ++ /* do we have a watchdog-device? */ ++ if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) || ++ pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, ++ STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do { ++ if (stonith_watchdog_timeout_ms <= 0) { ++ crm_err("Ignoring watchdog fence device without " ++ "stonith-watchdog-timeout set."); ++ rv = -ENODEV; ++ /* fall through to cleanup & return */ ++ } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, ++ STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { ++ crm_err("Ignoring watchdog fence device with unknown " ++ "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.", ++ device->agent?device->agent:""); ++ rv = -ENODEV; ++ /* fall through to cleanup & return */ ++ } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, ++ pcmk__str_none)) { ++ crm_err("Ignoring watchdog fence device " ++ "named %s !='"STONITH_WATCHDOG_ID"'.", ++ device->id?device->id:""); ++ rv = -ENODEV; ++ /* fall through to cleanup & return */ ++ } else { ++ if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, ++ pcmk__str_none)) { ++ /* this either has an empty list or the targets ++ configured for watchdog-fencing ++ */ ++ g_list_free_full(stonith_watchdog_targets, free); ++ stonith_watchdog_targets = device->targets; ++ device->targets = NULL; ++ } ++ if (node_does_watchdog_fencing(stonith_our_uname)) { ++ g_list_free_full(device->targets, free); ++ device->targets = stonith__parse_targets(stonith_our_uname); ++ g_hash_table_replace(device->params, ++ strdup(PCMK_STONITH_HOST_LIST), ++ strdup(stonith_our_uname)); ++ /* proceed as with any other stonith-device */ ++ break; ++ } ++ ++ crm_debug("Skip registration of watchdog fence device on node not in host-list."); ++ /* cleanup and fall through to more cleanup and return */ ++ device->targets = NULL; ++ stonith_device_remove(device->id, from_cib); ++ } ++ free_device(device); ++ return rv; ++ } while (0); ++ + dup = device_has_duplicate(device); + if (dup) { + ndevices = g_hash_table_size(device_list); +@@ -1598,6 +1631,39 @@ stonith_level_remove(xmlNode *msg, char **desc) + * (CIB registration is not sufficient), because monitor should not be + * possible unless the device is "started" (API registered). + */ ++ ++static char * ++list_to_string(GList *list, const char *delim, gboolean terminate_with_delim) ++{ ++ int max = g_list_length(list); ++ size_t delim_len = delim?strlen(delim):0; ++ size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0); ++ char *rv; ++ GList *gIter; ++ ++ for (gIter = list; gIter != NULL; gIter = gIter->next) { ++ const char *value = (const char *) gIter->data; ++ ++ alloc_size += strlen(value); ++ } ++ rv = calloc(alloc_size, sizeof(char)); ++ if (rv) { ++ char *pos = rv; ++ const char *lead_delim = ""; ++ ++ for (gIter = list; gIter != NULL; gIter = gIter->next) { ++ const char *value = (const char *) gIter->data; ++ ++ pos = &pos[sprintf(pos, "%s%s", lead_delim, value)]; ++ lead_delim = delim; ++ } ++ if (max && terminate_with_delim) { ++ sprintf(pos, "%s", delim); ++ } ++ } ++ return rv; ++} ++ + static int + stonith_device_action(xmlNode * msg, char **output) + { +@@ -1615,6 +1681,19 @@ stonith_device_action(xmlNode * msg, char **output) + return -EPROTO; + } + ++ if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) { ++ if (stonith_watchdog_timeout_ms <= 0) { ++ return -ENODEV; ++ } else { ++ if (pcmk__str_eq(action, "list", pcmk__str_casei)) { ++ *output = list_to_string(stonith_watchdog_targets, "\n", TRUE); ++ return pcmk_ok; ++ } else if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) { ++ return pcmk_ok; ++ } ++ } ++ } ++ + device = g_hash_table_lookup(device_list, id); + if ((device == NULL) + || (!device->api_registered && !strcmp(action, "monitor"))) { +@@ -1742,7 +1821,7 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc + * Only use if all hosts on which the device can be active can always fence all listed hosts + */ + +- if (string_in_list(dev->targets, host)) { ++ if (pcmk__str_in_list(dev->targets, host, pcmk__str_casei)) { + can = TRUE; + } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP) + && g_hash_table_lookup(dev->aliases, host)) { +@@ -1763,7 +1842,7 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc + return; + } + +- if (string_in_list(dev->targets, alias)) { ++ if (pcmk__str_in_list(dev->targets, alias, pcmk__str_casei)) { + can = TRUE; + } + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index cf91acaed..224f2baba 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1522,6 +1522,25 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, + } + } + ++static gboolean ++check_watchdog_fencing_and_wait(remote_fencing_op_t * op) ++{ ++ if (node_does_watchdog_fencing(op->target)) { ++ ++ crm_notice("Waiting %lds for %s to self-fence (%s) for " ++ "client %s " CRM_XS " id=%.8s", ++ (stonith_watchdog_timeout_ms / 1000), ++ op->target, op->action, op->client_name, op->id); ++ op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, ++ remote_op_watchdog_done, op); ++ return TRUE; ++ } else { ++ crm_debug("Skipping fallback to watchdog-fencing as %s is " ++ "not in host-list", op->target); ++ } ++ return FALSE; ++} ++ + void + call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc) + { +@@ -1592,26 +1611,33 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc) + g_source_remove(op->op_timer_one); + } + +- if(stonith_watchdog_timeout_ms > 0 && device && pcmk__str_eq(device, "watchdog", pcmk__str_casei)) { +- crm_notice("Waiting %lds for %s to self-fence (%s) for client %s " +- CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000), +- op->target, op->action, op->client_name, op->id); +- op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); +- +- /* TODO check devices to verify watchdog will be in use */ +- } else if(stonith_watchdog_timeout_ms > 0 +- && pcmk__str_eq(peer->host, op->target, pcmk__str_casei) +- && !pcmk__str_eq(op->action, "on", pcmk__str_casei)) { +- crm_notice("Waiting %lds for %s to self-fence (%s) for client %s " +- CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000), +- op->target, op->action, op->client_name, op->id); +- op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); +- +- } else { ++ if (!(stonith_watchdog_timeout_ms > 0 && ( ++ (pcmk__str_eq(device, STONITH_WATCHDOG_ID, ++ pcmk__str_none)) || ++ (pcmk__str_eq(peer->host, op->target, pcmk__str_casei) ++ && !pcmk__str_eq(op->action, "on", pcmk__str_casei))) && ++ check_watchdog_fencing_and_wait(op))) { ++ ++ /* Some thoughts about self-fencing cases reaching this point: ++ - Actually check in check_watchdog_fencing_and_wait ++ shouldn't fail if STONITH_WATCHDOG_ID is ++ chosen as fencing-device and it being present implies ++ watchdog-fencing is enabled anyway ++ - If watchdog-fencing is disabled either in general or for ++ a specific target - detected in check_watchdog_fencing_and_wait - ++ for some other kind of self-fencing we can't expect ++ a success answer but timeout is fine if the node doesn't ++ come back in between ++ - Delicate might be the case where we have watchdog-fencing ++ enabled for a node but the watchdog-fencing-device isn't ++ explicitly chosen for suicide. Local pe-execution in sbd ++ may detect the node as unclean and lead to timely suicide. ++ Otherwise the selection of stonith-watchdog-timeout at ++ least is questionable. ++ */ + op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); + } + +- + send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE); + peer->tried = TRUE; + free_xml(remote_op); +@@ -1645,12 +1671,11 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc) + * but we have all the expected replies, then no devices + * are available to execute the fencing operation. */ + +- if(stonith_watchdog_timeout_ms && pcmk__str_eq(device, "watchdog", pcmk__str_null_matches | pcmk__str_casei)) { +- crm_notice("Waiting %lds for %s to self-fence (%s) for client %s " +- CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000), +- op->target, op->action, op->client_name, op->id); +- op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); +- return; ++ if(stonith_watchdog_timeout_ms > 0 && pcmk__str_eq(device, ++ STONITH_WATCHDOG_ID, pcmk__str_null_matches)) { ++ if (check_watchdog_fencing_and_wait(op)) { ++ return; ++ } + } + + if (op->state == st_query) { +diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c +index 39738d8be..7f8b427d9 100644 +--- a/daemons/fenced/pacemaker-fenced.c ++++ b/daemons/fenced/pacemaker-fenced.c +@@ -42,6 +42,7 @@ + + char *stonith_our_uname = NULL; + long stonith_watchdog_timeout_ms = 0; ++GList *stonith_watchdog_targets = NULL; + + static GMainLoop *mainloop = NULL; + +@@ -578,7 +579,44 @@ our_node_allowed_for(pe_resource_t *rsc) + } + + static void +-watchdog_device_update(xmlNode *cib) ++watchdog_device_update(void) ++{ ++ if (stonith_watchdog_timeout_ms > 0) { ++ if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) && ++ !stonith_watchdog_targets) { ++ /* getting here watchdog-fencing enabled, no device there yet ++ and reason isn't stonith_watchdog_targets preventing that ++ */ ++ int rc; ++ xmlNode *xml; ++ ++ xml = create_device_registration_xml( ++ STONITH_WATCHDOG_ID, ++ st_namespace_internal, ++ STONITH_WATCHDOG_AGENT, ++ NULL, /* stonith_device_register will add our ++ own name as PCMK_STONITH_HOST_LIST param ++ so we can skip that here ++ */ ++ NULL); ++ rc = stonith_device_register(xml, NULL, TRUE); ++ free_xml(xml); ++ if (rc != pcmk_ok) { ++ crm_crit("Cannot register watchdog pseudo fence agent"); ++ crm_exit(CRM_EX_FATAL); ++ } ++ } ++ ++ } else { ++ /* be silent if no device - todo parameter to stonith_device_remove */ ++ if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID)) { ++ stonith_device_remove(STONITH_WATCHDOG_ID, TRUE); ++ } ++ } ++} ++ ++static void ++update_stonith_watchdog_timeout_ms(xmlNode *cib) + { + xmlNode *stonith_enabled_xml = NULL; + const char *stonith_enabled_s = NULL; +@@ -608,33 +646,7 @@ watchdog_device_update(xmlNode *cib) + } + } + +- if (timeout_ms != stonith_watchdog_timeout_ms) { +- crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000); +- stonith_watchdog_timeout_ms = timeout_ms; +- +- if (stonith_watchdog_timeout_ms > 0) { +- int rc; +- xmlNode *xml; +- stonith_key_value_t *params = NULL; +- +- params = stonith_key_value_add(params, PCMK_STONITH_HOST_LIST, +- stonith_our_uname); +- +- xml = create_device_registration_xml("watchdog", st_namespace_internal, +- STONITH_WATCHDOG_AGENT, params, +- NULL); +- stonith_key_value_freeall(params, 1, 1); +- rc = stonith_device_register(xml, NULL, FALSE); +- free_xml(xml); +- if (rc != pcmk_ok) { +- crm_crit("Cannot register watchdog pseudo fence agent"); +- crm_exit(CRM_EX_FATAL); +- } +- +- } else { +- stonith_device_remove("watchdog", FALSE); +- } +- } ++ stonith_watchdog_timeout_ms = timeout_ms; + } + + /*! +@@ -677,6 +689,16 @@ static void cib_device_update(pe_resource_t *rsc, pe_working_set_t *data_set) + return; + } + ++ /* if watchdog-fencing is disabled handle any watchdog-fence ++ resource as if it was disabled ++ */ ++ if ((stonith_watchdog_timeout_ms <= 0) && ++ pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { ++ crm_info("Watchdog-fencing disabled thus handling " ++ "device %s as disabled", rsc->id); ++ return; ++ } ++ + /* Check whether our node is allowed for this resource (and its parent if in a group) */ + node = our_node_allowed_for(rsc); + if (rsc->parent && (rsc->parent->variant == pe_group)) { +@@ -772,6 +794,12 @@ cib_devices_update(void) + } + } + ++ /* have list repopulated if cib has a watchdog-fencing-resource ++ TODO: keep a cached list for queries happening while we are refreshing ++ */ ++ g_list_free_full(stonith_watchdog_targets, free); ++ stonith_watchdog_targets = NULL; ++ + for (gIter = fenced_data_set->resources; gIter != NULL; gIter = gIter->next) { + cib_device_update(gIter->data, fenced_data_set); + } +@@ -825,6 +853,8 @@ update_cib_stonith_devices_v2(const char *event, xmlNode * msg) + if (search != NULL) { + *search = 0; + stonith_device_remove(rsc_id, TRUE); ++ /* watchdog_device_update called afterwards ++ to fall back to implicit definition if needed */ + } else { + crm_warn("Ignoring malformed CIB update (resource deletion)"); + } +@@ -968,6 +998,24 @@ node_has_attr(const char *node, const char *name, const char *value) + return (match != NULL); + } + ++/*! ++ * \internal ++ * \brief Check whether a node does watchdog-fencing ++ * ++ * \param[in] node Name of node to check ++ * ++ * \return TRUE if node found in stonith_watchdog_targets ++ * or stonith_watchdog_targets is empty indicating ++ * all nodes are doing watchdog-fencing ++ */ ++gboolean ++node_does_watchdog_fencing(const char *node) ++{ ++ return ((stonith_watchdog_targets == NULL) || ++ pcmk__str_in_list(stonith_watchdog_targets, node, pcmk__str_casei)); ++} ++ ++ + static void + update_fencing_topology(const char *event, xmlNode * msg) + { +@@ -1073,6 +1121,8 @@ update_cib_cache_cb(const char *event, xmlNode * msg) + xmlNode *stonith_enabled_xml = NULL; + const char *stonith_enabled_s = NULL; + static gboolean stonith_enabled_saved = TRUE; ++ long timeout_ms_saved = stonith_watchdog_timeout_ms; ++ gboolean need_full_refresh = FALSE; + + if(!have_cib_devices) { + crm_trace("Skipping updates until we get a full dump"); +@@ -1127,6 +1177,7 @@ update_cib_cache_cb(const char *event, xmlNode * msg) + } + + pcmk__refresh_node_caches_from_cib(local_cib); ++ update_stonith_watchdog_timeout_ms(local_cib); + + stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", + local_cib, LOG_NEVER); +@@ -1134,23 +1185,30 @@ update_cib_cache_cb(const char *event, xmlNode * msg) + stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE); + } + +- watchdog_device_update(local_cib); +- + if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) { + crm_trace("Ignoring CIB updates while fencing is disabled"); + stonith_enabled_saved = FALSE; +- return; + + } else if (stonith_enabled_saved == FALSE) { + crm_info("Updating fencing device and topology lists " + "now that fencing is enabled"); + stonith_enabled_saved = TRUE; +- fencing_topology_init(); +- cib_devices_update(); ++ need_full_refresh = TRUE; + + } else { +- update_fencing_topology(event, msg); +- update_cib_stonith_devices(event, msg); ++ if (timeout_ms_saved != stonith_watchdog_timeout_ms) { ++ need_full_refresh = TRUE; ++ } else { ++ update_fencing_topology(event, msg); ++ update_cib_stonith_devices(event, msg); ++ watchdog_device_update(); ++ } ++ } ++ ++ if (need_full_refresh) { ++ fencing_topology_init(); ++ cib_devices_update(); ++ watchdog_device_update(); + } + } + +@@ -1162,10 +1220,11 @@ init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *us + local_cib = copy_xml(output); + + pcmk__refresh_node_caches_from_cib(local_cib); ++ update_stonith_watchdog_timeout_ms(local_cib); + + fencing_topology_init(); +- watchdog_device_update(local_cib); + cib_devices_update(); ++ watchdog_device_update(); + } + + static void +diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h +index d330fda4d..14e085e98 100644 +--- a/daemons/fenced/pacemaker-fenced.h ++++ b/daemons/fenced/pacemaker-fenced.h +@@ -260,14 +260,15 @@ bool fencing_peer_active(crm_node_t *peer); + + int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op); + +-gboolean string_in_list(GList *list, const char *item); +- + gboolean node_has_attr(const char *node, const char *name, const char *value); + ++gboolean node_does_watchdog_fencing(const char *node); ++ + extern char *stonith_our_uname; + extern gboolean stand_alone; + extern GHashTable *device_list; + extern GHashTable *topology; + extern long stonith_watchdog_timeout_ms; ++extern GList *stonith_watchdog_targets; + + extern GHashTable *stonith_remote_op_list; +diff --git a/include/crm/crm.h b/include/crm/crm.h +index ee52c3630..7861c160e 100644 +--- a/include/crm/crm.h ++++ b/include/crm/crm.h +@@ -66,7 +66,7 @@ extern "C" { + * >=3.0.13: Fail counts include operation name and interval + * >=3.2.0: DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED + */ +-# define CRM_FEATURE_SET "3.10.2" ++# define CRM_FEATURE_SET "3.11.0" + + /* Pacemaker's CPG protocols use fixed-width binary fields for the sender and + * recipient of a CPG message. This imposes an arbitrary limit on cluster node +diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h +index 8bcb544d8..f222edba3 100644 +--- a/include/crm/fencing/internal.h ++++ b/include/crm/fencing/internal.h +@@ -164,7 +164,10 @@ void stonith__device_parameter_flags(uint32_t *device_flags, + # define STONITH_OP_LEVEL_ADD "st_level_add" + # define STONITH_OP_LEVEL_DEL "st_level_remove" + +-# define STONITH_WATCHDOG_AGENT "#watchdog" ++# define STONITH_WATCHDOG_AGENT "fence_watchdog" ++/* Don't change 2 below as it would break rolling upgrade */ ++# define STONITH_WATCHDOG_AGENT_INTERNAL "#watchdog" ++# define STONITH_WATCHDOG_ID "watchdog" + + # ifdef HAVE_STONITH_STONITH_H + // utilities from st_lha.c +@@ -211,4 +214,7 @@ stonith__op_state_pending(enum op_state state) + return state != st_failed && state != st_done; + } + ++gboolean stonith__watchdog_fencing_enabled_for_node(const char *node); ++gboolean stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node); ++ + #endif +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index e285f51e2..0ff98157b 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -195,6 +195,67 @@ stonith_get_namespace(const char *agent, const char *namespace_s) + return st_namespace_invalid; + } + ++gboolean ++stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node) ++{ ++ gboolean rv = FALSE; ++ stonith_t *stonith_api = st?st:stonith_api_new(); ++ char *list = NULL; ++ ++ if(stonith_api) { ++ if (stonith_api->state == stonith_disconnected) { ++ int rc = stonith_api->cmds->connect(stonith_api, "stonith-api", NULL); ++ ++ if (rc != pcmk_ok) { ++ crm_err("Failed connecting to Stonith-API for watchdog-fencing-query."); ++ } ++ } ++ ++ if (stonith_api->state != stonith_disconnected) { ++ /* caveat!!! ++ * this might fail when when stonithd is just updating the device-list ++ * probably something we should fix as well for other api-calls */ ++ int rc = stonith_api->cmds->list(stonith_api, st_opt_sync_call, STONITH_WATCHDOG_ID, &list, 0); ++ if ((rc != pcmk_ok) || (list == NULL)) { ++ /* due to the race described above it can happen that ++ * we drop in here - so as not to make remote nodes ++ * panic on that answer ++ */ ++ crm_warn("watchdog-fencing-query failed"); ++ } else if (list[0] == '\0') { ++ crm_warn("watchdog-fencing-query returned an empty list - any node"); ++ rv = TRUE; ++ } else { ++ GList *targets = stonith__parse_targets(list); ++ rv = pcmk__str_in_list(targets, node, pcmk__str_casei); ++ g_list_free_full(targets, free); ++ } ++ free(list); ++ if (!st) { ++ /* if we're provided the api we still might have done the ++ * connection - but let's assume the caller won't bother ++ */ ++ stonith_api->cmds->disconnect(stonith_api); ++ } ++ } ++ ++ if (!st) { ++ stonith_api_delete(stonith_api); ++ } ++ } else { ++ crm_err("Stonith-API for watchdog-fencing-query couldn't be created."); ++ } ++ crm_trace("Pacemaker assumes node %s %sto do watchdog-fencing.", ++ node, rv?"":"not "); ++ return rv; ++} ++ ++gboolean ++stonith__watchdog_fencing_enabled_for_node(const char *node) ++{ ++ return stonith__watchdog_fencing_enabled_for_node_api(NULL, node); ++} ++ + static void + log_action(stonith_action_t *action, pid_t pid) + { +diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c +index 87d050ed1..bf4bceb42 100644 +--- a/lib/lrmd/lrmd_client.c ++++ b/lib/lrmd/lrmd_client.c +@@ -34,6 +34,7 @@ + #include + + #include ++#include + + #ifdef HAVE_GNUTLS_GNUTLS_H + # undef KEYFILE +@@ -934,7 +935,10 @@ lrmd__validate_remote_settings(lrmd_t *lrmd, GHashTable *hash) + crm_xml_add(data, F_LRMD_ORIGIN, __func__); + + value = g_hash_table_lookup(hash, "stonith-watchdog-timeout"); +- crm_xml_add(data, F_LRMD_WATCHDOG, value); ++ if ((value) && ++ (stonith__watchdog_fencing_enabled_for_node(native->remote_nodename))) { ++ crm_xml_add(data, F_LRMD_WATCHDOG, value); ++ } + + rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0, + (native->type == pcmk__client_ipc)); +diff --git a/rpm/pacemaker.spec.in b/rpm/pacemaker.spec.in +index 79e78ede9..f58357a77 100644 +--- a/rpm/pacemaker.spec.in ++++ b/rpm/pacemaker.spec.in +@@ -744,6 +744,7 @@ exit 0 + %doc %{_mandir}/man8/crm_attribute.* + %doc %{_mandir}/man8/crm_master.* + %doc %{_mandir}/man8/fence_legacy.* ++%doc %{_mandir}/man8/fence_watchdog.* + %doc %{_mandir}/man8/pacemakerd.* + + %doc %{_datadir}/pacemaker/alerts +@@ -796,6 +797,7 @@ exit 0 + %{_sbindir}/crm_simulate + %{_sbindir}/crm_report + %{_sbindir}/crm_ticket ++%{_sbindir}/fence_watchdog + %{_sbindir}/stonith_admin + # "dirname" is owned by -schemas, which is a prerequisite + %{_datadir}/pacemaker/report.collector +@@ -822,6 +824,7 @@ exit 0 + %exclude %{_mandir}/man8/crm_attribute.* + %exclude %{_mandir}/man8/crm_master.* + %exclude %{_mandir}/man8/fence_legacy.* ++%exclude %{_mandir}/man8/fence_watchdog.* + %exclude %{_mandir}/man8/pacemakerd.* + %exclude %{_mandir}/man8/pacemaker-remoted.* + +-- +2.27.0 + + +From 53dd360f096e5f005e3221e8d44d82d3654b5172 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Wed, 4 Aug 2021 15:57:23 +0200 +Subject: [PATCH 3/3] Fix: watchdog-fencing: Silence warning without node + restriction + +--- + lib/fencing/st_client.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 0ff98157b..14fa7b2a6 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -223,7 +223,6 @@ stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node) + */ + crm_warn("watchdog-fencing-query failed"); + } else if (list[0] == '\0') { +- crm_warn("watchdog-fencing-query returned an empty list - any node"); + rv = TRUE; + } else { + GList *targets = stonith__parse_targets(list); +-- +2.27.0 + diff --git a/SOURCES/016-cts.patch b/SOURCES/016-cts.patch new file mode 100644 index 0000000..195afc3 --- /dev/null +++ b/SOURCES/016-cts.patch @@ -0,0 +1,59 @@ +From b37391fef92548f31822f9df2a9b5fa2a61b4514 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 23 Jun 2021 15:17:54 -0500 +Subject: [PATCH] Fix: CTS: handle longer Corosync token timeouts + +Previously, startall() would call cluster_stable() immediately after detecting +the "controller successfully started" message. If the Corosync token timeout is +small enough, this will be fine. However with a token timeout of more than +about 1 second, the controllers will not have formed a membership by this +point, causing cluster_stable() to think there are multiple partitions, and +wait for a DC to be elected in each one, when really they will unite into a +single partition in a short time, and only elect a single DC. + +Now, startall() waits until seeing that each node is a cluster member before +calling cluster_stable(). +--- + cts/lab/CTS.py.in | 3 ++- + cts/lab/patterns.py | 2 ++ + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/cts/lab/CTS.py.in b/cts/lab/CTS.py.in +index abcb9d285..d9924437b 100644 +--- a/cts/lab/CTS.py.in ++++ b/cts/lab/CTS.py.in +@@ -628,9 +628,10 @@ class ClusterManager(UserDict): + watchpats = [ ] + watchpats.append(self.templates["Pat:DC_IDLE"]) + for node in nodelist: +- watchpats.append(self.templates["Pat:Local_started"] % node) + watchpats.append(self.templates["Pat:InfraUp"] % node) + watchpats.append(self.templates["Pat:PacemakerUp"] % node) ++ watchpats.append(self.templates["Pat:Local_started"] % node) ++ watchpats.append(self.templates["Pat:They_up"] % (nodelist[0], node)) + + # Start all the nodes - at about the same time... + watch = LogWatcher(self.Env["LogFileName"], watchpats, "fast-start", self.Env["DeadTime"]+10, hosts=self.Env["nodes"], kind=self.Env["LogWatcher"]) +diff --git a/cts/lab/patterns.py b/cts/lab/patterns.py +index e21a016ff..400fd3dc8 100644 +--- a/cts/lab/patterns.py ++++ b/cts/lab/patterns.py +@@ -61,6 +61,7 @@ class BasePatterns(object): + "Pat:We_stopped" : "%s\W.*OVERRIDE THIS PATTERN", + "Pat:They_stopped" : "%s\W.*LOST:.* %s ", + "Pat:They_dead" : "node %s.*: is dead", ++ "Pat:They_up" : "%s %s\W.*OVERRIDE THIS PATTERN", + "Pat:TransitionComplete" : "Transition status: Complete: complete", + + "Pat:Fencing_start" : r"Requesting peer fencing .* targeting %s", +@@ -130,6 +131,7 @@ class crm_corosync(BasePatterns): + "Pat:We_stopped" : "%s\W.*Unloading all Corosync service engines", + "Pat:They_stopped" : "%s\W.*pacemaker-controld.*Node %s(\[|\s).*state is now lost", + "Pat:They_dead" : "pacemaker-controld.*Node %s(\[|\s).*state is now lost", ++ "Pat:They_up" : "\W%s\W.*pacemaker-controld.*Node %s state is now member", + + "Pat:ChildExit" : r"\[[0-9]+\] exited with status [0-9]+ \(", + # "with signal 9" == pcmk_child_exit(), "$" == check_active_before_startup_processes() +-- +2.27.0 + diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec index 41f71c5..b0d0723 100644 --- a/SPECS/pacemaker.spec +++ b/SPECS/pacemaker.spec @@ -36,14 +36,18 @@ ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) %global pcmkversion 2.1.0 -%global specversion 4 +%global specversion 5 ## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build %global commit 7c3f660707a495a1331716ad32cd3ac9d9f8ff58 ## Since git v2.11, the extent of abbreviation is autoscaled by default ## (used to be constant of 7), so we need to convey it for non-tags, too. +%if (0%{?fedora} >= 26) || (0%{?rhel} >= 9) +%global commit_abbrev 9 +%else %global commit_abbrev 7 +%endif ## Nagios source control identifiers %global nagios_name nagios-agents-metadata @@ -146,14 +150,6 @@ %define gnutls_priorities %{?pcmk_gnutls_priorities}%{!?pcmk_gnutls_priorities:@SYSTEM} %endif -%if !%{defined _rundir} -%if 0%{?fedora} >= 15 || 0%{?rhel} >= 7 || 0%{?suse_version} >= 1200 -%define _rundir /run -%else -%define _rundir /var/run -%endif -%endif - %if 0%{?fedora} > 22 || 0%{?rhel} > 7 %global supports_recommends 1 %endif @@ -280,6 +276,9 @@ Patch10: 010-probe-pending.patch Patch11: 011-crm_attribute-regression.patch Patch12: 012-string-arguments.patch Patch13: 013-leaks.patch +Patch14: 014-str-list.patch +Patch15: 015-sbd.patch +Patch16: 016-cts.patch # downstream-only commits #Patch1xx: 1xx-xxxx.patch @@ -310,18 +309,34 @@ Requires: libqb >= 0.17.0 BuildRequires: libqb-devel >= 0.17.0 # Required basic build tools -BuildRequires: coreutils findutils grep sed -BuildRequires: autoconf automake gcc make pkgconfig -BuildRequires: libtool %{?pkgname_libtool_devel} +BuildRequires: autoconf +BuildRequires: automake +BuildRequires: coreutils +BuildRequires: findutils +BuildRequires: gcc +BuildRequires: grep +BuildRequires: libtool +%if %{defined pkgname_libtool_devel} +BuildRequires: %{?pkgname_libtool_devel} +%endif +BuildRequires: make +BuildRequires: pkgconfig +BuildRequires: sed # Required for core functionality BuildRequires: pkgconfig(glib-2.0) >= 2.42 -BuildRequires: libxml2-devel libxslt-devel libuuid-devel +BuildRequires: libxml2-devel +BuildRequires: libxslt-devel +BuildRequires: libuuid-devel BuildRequires: %{pkgname_bzip2_devel} # Enables optional functionality -BuildRequires: ncurses-devel %{pkgname_docbook_xsl} -BuildRequires: help2man %{pkgname_gnutls_devel} pam-devel pkgconfig(dbus-1) +BuildRequires: pkgconfig(dbus-1) +BuildRequires: %{pkgname_docbook_xsl} +BuildRequires: %{pkgname_gnutls_devel} +BuildRequires: help2man +BuildRequires: ncurses-devel +BuildRequires: pam-devel %if %{systemd_native} BuildRequires: pkgconfig(systemd) @@ -338,7 +353,9 @@ BuildRequires: %{pkgname_glue_libs}-devel %endif %if %{with doc} -BuildRequires: asciidoc inkscape %{python_name}-sphinx +BuildRequires: asciidoc +BuildRequires: inkscape +BuildRequires: %{python_name}-sphinx %endif Provides: pcmk-cluster-manager = %{version}-%{release} @@ -368,12 +385,9 @@ Available rpmbuild rebuild options: License: GPLv2+ and LGPLv2+ Summary: Command line tools for controlling Pacemaker clusters Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} -%if 0%{?supports_recommends} -#Recommends: pcmk-cluster-manager = %{version}-%{release} # For crm_report Requires: tar Requires: bzip2 -%endif Requires: perl-TimeDate Requires: %{pkgname_procps} Requires: psmisc @@ -448,11 +462,16 @@ License: GPLv2+ and LGPLv2+ Summary: Pacemaker development package Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} Requires: %{name}-cluster-libs%{?_isa} = %{version}-%{release} -Requires: %{?pkgname_libtool_devel_arch} libuuid-devel%{?_isa} -Requires: libxml2-devel%{?_isa} libxslt-devel%{?_isa} -Requires: %{pkgname_bzip2_devel}%{?_isa} glib2-devel%{?_isa} -Requires: libqb-devel%{?_isa} +Requires: %{pkgname_bzip2_devel}%{?_isa} Requires: corosync-devel >= 2.0.0 +Requires: glib2-devel%{?_isa} +Requires: libqb-devel%{?_isa} +%if %{defined pkgname_libtool_devel_arch} +Requires: %{?pkgname_libtool_devel_arch} +%endif +Requires: libuuid-devel%{?_isa} +Requires: libxml2-devel%{?_isa} +Requires: libxslt-devel%{?_isa} %description -n %{pkgname_pcmk_libs}-devel Pacemaker is an advanced, scalable High-Availability cluster resource @@ -476,7 +495,6 @@ BuildArch: noarch %if 0%{?fedora} > 22 || 0%{?rhel} > 7 Requires: %{python_name}-systemd %endif - %endif %description cts @@ -559,6 +577,7 @@ export LDFLAGS_HARDENED_LIB="%{?_hardening_ldflags}" %{?concurrent_fencing} \ %{?resource_stickiness} \ %{?compat20} \ + --disable-static \ --with-initdir=%{_initrddir} \ --with-runstatedir=%{_rundir} \ --localstatedir=%{_var} \ @@ -609,8 +628,7 @@ done mkdir -p ${RPM_BUILD_ROOT}%{_localstatedir}/lib/rpm-state/%{name} %endif -# Don't package static libs -find %{buildroot} -name '*.a' -type f -print0 | xargs -0 rm -f +# Don't package libtool archives find %{buildroot} -name '*.la' -type f -print0 | xargs -0 rm -f # Do not package these either @@ -782,6 +800,7 @@ exit 0 %{_sbindir}/crm_attribute %{_sbindir}/crm_master +%{_sbindir}/fence_watchdog %doc %{_mandir}/man7/pacemaker-controld.* %doc %{_mandir}/man7/pacemaker-schedulerd.* @@ -790,6 +809,7 @@ exit 0 %doc %{_mandir}/man7/ocf_pacemaker_remote.* %doc %{_mandir}/man8/crm_attribute.* %doc %{_mandir}/man8/crm_master.* +%doc %{_mandir}/man8/fence_watchdog.* %doc %{_mandir}/man8/pacemakerd.* %doc %{_datadir}/pacemaker/alerts @@ -841,6 +861,7 @@ exit 0 %{_sbindir}/crm_simulate %{_sbindir}/crm_report %{_sbindir}/crm_ticket +%{_sbindir}/fence_watchdog %{_sbindir}/stonith_admin # "dirname" is owned by -schemas, which is a prerequisite %{_datadir}/pacemaker/report.collector @@ -864,6 +885,7 @@ exit 0 %doc %{_mandir}/man8/* %exclude %{_mandir}/man8/crm_attribute.* %exclude %{_mandir}/man8/crm_master.* +%exclude %{_mandir}/man8/fence_watchdog.* %exclude %{_mandir}/man8/pacemakerd.* %exclude %{_mandir}/man8/pacemaker-remoted.* @@ -955,6 +977,10 @@ exit 0 %license %{nagios_name}-%{nagios_hash}/COPYING %changelog +* Fri Aug 06 2021 Ken Gaillot - 2.1.0-5 +- Allow configuring specific nodes to use watchdog-only sbd for fencing +- Resolves: rhbz1443666 + * Fri Jul 30 2021 Ken Gaillot - 2.1.0-4 - Show better error messages in crm_resource with invalid resource types - Avoid selecting wrong device when dynamic-list fencing is used with host map