diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..302b56c --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +SOURCES/nagios-agents-metadata-105ab8a.tar.gz +SOURCES/pacemaker-4b1f869.tar.gz diff --git a/.pacemaker.metadata b/.pacemaker.metadata new file mode 100644 index 0000000..1c52241 --- /dev/null +++ b/.pacemaker.metadata @@ -0,0 +1,2 @@ +ea6c0a27fd0ae8ce02f84a11f08a0d79377041c3 SOURCES/nagios-agents-metadata-105ab8a.tar.gz +dfd19e7ec7aa96520f4948fc37d48ea69835bbdb SOURCES/pacemaker-4b1f869.tar.gz diff --git a/SOURCES/001-status-deletion.patch b/SOURCES/001-status-deletion.patch new file mode 100644 index 0000000..ca35c21 --- /dev/null +++ b/SOURCES/001-status-deletion.patch @@ -0,0 +1,420 @@ +From 6c529bb624ad548f66ce6ef1fa80b77c688918f4 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 22 Nov 2019 16:39:54 -0600 +Subject: [PATCH 1/4] Refactor: controller: rename struct recurring_op_s to + active_op_t + +... because it holds both recurring and pending non-recurring actions, +and the name was confusing +--- + daemons/controld/controld_execd.c | 18 +++++++++--------- + daemons/controld/controld_execd_state.c | 4 ++-- + daemons/controld/controld_lrm.h | 8 ++++---- + 3 files changed, 15 insertions(+), 15 deletions(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 9e8dd36..48f35dd 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -403,7 +403,7 @@ lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, + GHashTableIter gIter; + const char *key = NULL; + rsc_history_t *entry = NULL; +- struct recurring_op_s *pending = NULL; ++ active_op_t *pending = NULL; + + crm_debug("Checking for active resources before exit"); + +@@ -909,7 +909,7 @@ static gboolean + lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) + { + const char *rsc = user_data; +- struct recurring_op_s *pending = value; ++ active_op_t *pending = value; + + if (crm_str_eq(rsc, pending->rsc_id, TRUE)) { + crm_info("Removing op %s:%d for deleted resource %s", +@@ -1137,7 +1137,7 @@ cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, + { + int rc = pcmk_ok; + char *local_key = NULL; +- struct recurring_op_s *pending = NULL; ++ active_op_t *pending = NULL; + + CRM_CHECK(op != 0, return FALSE); + CRM_CHECK(rsc_id != NULL, return FALSE); +@@ -1203,7 +1203,7 @@ cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) + { + gboolean remove = FALSE; + struct cancel_data *data = user_data; +- struct recurring_op_s *op = (struct recurring_op_s *)value; ++ active_op_t *op = value; + + if (crm_str_eq(op->op_key, data->key, TRUE)) { + data->done = TRUE; +@@ -2107,7 +2107,7 @@ stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) + { + gboolean remove = FALSE; + struct stop_recurring_action_s *event = user_data; +- struct recurring_op_s *op = (struct recurring_op_s *)value; ++ active_op_t *op = value; + + if ((op->interval_ms != 0) + && crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) { +@@ -2124,7 +2124,7 @@ stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) + { + gboolean remove = FALSE; + lrm_state_t *lrm_state = user_data; +- struct recurring_op_s *op = (struct recurring_op_s *)value; ++ active_op_t *op = value; + + if (op->interval_ms != 0) { + crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, +@@ -2297,9 +2297,9 @@ do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operat + * for them to complete during shutdown + */ + char *call_id_s = make_stop_id(rsc->id, call_id); +- struct recurring_op_s *pending = NULL; ++ active_op_t *pending = NULL; + +- pending = calloc(1, sizeof(struct recurring_op_s)); ++ pending = calloc(1, sizeof(active_op_t)); + crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); + + pending->call_id = call_id; +@@ -2517,7 +2517,7 @@ did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id, + + void + process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, +- struct recurring_op_s *pending, xmlNode *action_xml) ++ active_op_t *pending, xmlNode *action_xml) + { + char *op_id = NULL; + char *op_key = NULL; +diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c +index 0e21d18..473da97 100644 +--- a/daemons/controld/controld_execd_state.c ++++ b/daemons/controld/controld_execd_state.c +@@ -44,7 +44,7 @@ free_deletion_op(gpointer value) + static void + free_recurring_op(gpointer value) + { +- struct recurring_op_s *op = (struct recurring_op_s *)value; ++ active_op_t *op = value; + + free(op->user_data); + free(op->rsc_id); +@@ -61,7 +61,7 @@ fail_pending_op(gpointer key, gpointer value, gpointer user_data) + { + lrmd_event_data_t event = { 0, }; + lrm_state_t *lrm_state = user_data; +- struct recurring_op_s *op = (struct recurring_op_s *)value; ++ active_op_t *op = value; + + crm_trace("Pre-emptively failing " CRM_OP_FMT " on %s (call=%s, %s)", + op->rsc_id, op->op_type, op->interval_ms, +diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h +index 598682b..27df5d7 100644 +--- a/daemons/controld/controld_lrm.h ++++ b/daemons/controld/controld_lrm.h +@@ -33,8 +33,8 @@ typedef struct resource_history_s { + + void history_free(gpointer data); + +-/* TODO - Replace this with lrmd_event_data_t */ +-struct recurring_op_s { ++// In-flight action (recurring or pending) ++typedef struct active_op_s { + guint interval_ms; + int call_id; + gboolean remove; +@@ -45,7 +45,7 @@ struct recurring_op_s { + char *op_key; + char *user_data; + GHashTable *params; +-}; ++} active_op_t; + + typedef struct lrm_state_s { + const char *node_name; +@@ -164,4 +164,4 @@ void remote_ra_process_maintenance_nodes(xmlNode *xml); + gboolean remote_ra_controlling_guest(lrm_state_t * lrm_state); + + void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, +- struct recurring_op_s *pending, xmlNode *action_xml); ++ active_op_t *pending, xmlNode *action_xml); +-- +1.8.3.1 + + +From 93a59f1df8fe11d365032d75f10cb4189ad2f1f8 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 22 Nov 2019 16:45:31 -0600 +Subject: [PATCH 2/4] Refactor: controller: convert active_op_t booleans to + bitmask + +--- + daemons/controld/controld_execd.c | 11 +++++------ + daemons/controld/controld_lrm.h | 8 ++++++-- + 2 files changed, 11 insertions(+), 8 deletions(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 48f35dd..2c9d9c0 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -1148,18 +1148,17 @@ cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, + pending = g_hash_table_lookup(lrm_state->pending_ops, key); + + if (pending) { +- if (remove && pending->remove == FALSE) { +- pending->remove = TRUE; ++ if (remove && is_not_set(pending->flags, active_op_remove)) { ++ set_bit(pending->flags, active_op_remove); + crm_debug("Scheduling %s for removal", key); + } + +- if (pending->cancelled) { ++ if (is_set(pending->flags, active_op_cancelled)) { + crm_debug("Operation %s already cancelled", key); + free(local_key); + return FALSE; + } +- +- pending->cancelled = TRUE; ++ set_bit(pending->flags, active_op_cancelled); + + } else { + crm_info("No pending op found for %s", key); +@@ -2652,7 +2651,7 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, + crm_err("Recurring operation %s was cancelled without transition information", + op_key); + +- } else if (pending->remove) { ++ } else if (is_set(pending->flags, active_op_remove)) { + /* This recurring operation was cancelled (by us) and pending, and we + * have been waiting for it to finish. + */ +diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h +index 27df5d7..3ab7048 100644 +--- a/daemons/controld/controld_lrm.h ++++ b/daemons/controld/controld_lrm.h +@@ -33,12 +33,16 @@ typedef struct resource_history_s { + + void history_free(gpointer data); + ++enum active_op_e { ++ active_op_remove = (1 << 0), ++ active_op_cancelled = (1 << 1), ++}; ++ + // In-flight action (recurring or pending) + typedef struct active_op_s { + guint interval_ms; + int call_id; +- gboolean remove; +- gboolean cancelled; ++ uint32_t flags; // bitmask of active_op_e + time_t start_time; + char *rsc_id; + char *op_type; +-- +1.8.3.1 + + +From 4d087d021d325e26b41a9b36b5b190dc7b25334c Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 22 Nov 2019 16:58:25 -0600 +Subject: [PATCH 3/4] Refactor: controller: remove unused argument + +--- + daemons/controld/controld_execd.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 2c9d9c0..46c1958 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -43,8 +43,8 @@ static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int ca + + static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, + const char *rsc_id, const char *operation); +-static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, +- xmlNode * msg, xmlNode * request); ++static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, ++ const char *operation, xmlNode *msg); + + void send_direct_ack(const char *to_host, const char *to_sys, + lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id); +@@ -1858,7 +1858,7 @@ do_lrm_invoke(long long action, + crm_rsc_delete, user_name); + + } else { +- do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg); ++ do_lrm_rsc_op(lrm_state, rsc, operation, input->xml); + } + + lrmd_free_rsc_info(rsc); +@@ -2170,8 +2170,8 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t + } + + static void +-do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, +- xmlNode * request) ++do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, ++ const char *operation, xmlNode *msg) + { + int call_id = 0; + char *op_id = NULL; +-- +1.8.3.1 + + +From 356b417274918b7da6cdd9c72c036c923160b318 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 6 Dec 2019 12:15:05 -0600 +Subject: [PATCH 4/4] Refactor: scheduler: combine two "if" statements + +... for readability, and ease of adding another block later +--- + lib/pacemaker/pcmk_sched_graph.c | 120 +++++++++++++++++++-------------------- + 1 file changed, 60 insertions(+), 60 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_graph.c b/lib/pacemaker/pcmk_sched_graph.c +index e5a8a01..a6967fe 100644 +--- a/lib/pacemaker/pcmk_sched_graph.c ++++ b/lib/pacemaker/pcmk_sched_graph.c +@@ -1088,71 +1088,71 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set) + return action_xml; + } + +- /* List affected resource */ +- if (action->rsc) { +- if (is_set(action->flags, pe_action_pseudo) == FALSE) { +- int lpc = 0; +- +- xmlNode *rsc_xml = create_xml_node(action_xml, crm_element_name(action->rsc->xml)); +- +- const char *attr_list[] = { +- XML_AGENT_ATTR_CLASS, +- XML_AGENT_ATTR_PROVIDER, +- XML_ATTR_TYPE +- }; +- +- if (is_set(action->rsc->flags, pe_rsc_orphan) && action->rsc->clone_name) { +- /* Do not use the 'instance free' name here as that +- * might interfere with the instance we plan to keep. +- * Ie. if there are more than two named /anonymous/ +- * instances on a given node, we need to make sure the +- * command goes to the right one. +- * +- * Keep this block, even when everyone is using +- * 'instance free' anonymous clone names - it means +- * we'll do the right thing if anyone toggles the +- * unique flag to 'off' +- */ +- crm_debug("Using orphan clone name %s instead of %s", action->rsc->id, +- action->rsc->clone_name); +- crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name); +- crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); ++ if (action->rsc && is_not_set(action->flags, pe_action_pseudo)) { ++ int lpc = 0; ++ xmlNode *rsc_xml = NULL; ++ const char *attr_list[] = { ++ XML_AGENT_ATTR_CLASS, ++ XML_AGENT_ATTR_PROVIDER, ++ XML_ATTR_TYPE ++ }; ++ ++ // List affected resource ++ ++ rsc_xml = create_xml_node(action_xml, ++ crm_element_name(action->rsc->xml)); ++ if (is_set(action->rsc->flags, pe_rsc_orphan) ++ && action->rsc->clone_name) { ++ /* Do not use the 'instance free' name here as that ++ * might interfere with the instance we plan to keep. ++ * Ie. if there are more than two named /anonymous/ ++ * instances on a given node, we need to make sure the ++ * command goes to the right one. ++ * ++ * Keep this block, even when everyone is using ++ * 'instance free' anonymous clone names - it means ++ * we'll do the right thing if anyone toggles the ++ * unique flag to 'off' ++ */ ++ crm_debug("Using orphan clone name %s instead of %s", action->rsc->id, ++ action->rsc->clone_name); ++ crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name); ++ crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); + +- } else if (is_not_set(action->rsc->flags, pe_rsc_unique)) { +- const char *xml_id = ID(action->rsc->xml); +- +- crm_debug("Using anonymous clone name %s for %s (aka. %s)", xml_id, action->rsc->id, +- action->rsc->clone_name); +- +- /* ID is what we'd like client to use +- * ID_LONG is what they might know it as instead +- * +- * ID_LONG is only strictly needed /here/ during the +- * transition period until all nodes in the cluster +- * are running the new software /and/ have rebooted +- * once (meaning that they've only ever spoken to a DC +- * supporting this feature). +- * +- * If anyone toggles the unique flag to 'on', the +- * 'instance free' name will correspond to an orphan +- * and fall into the clause above instead +- */ +- crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id); +- if (action->rsc->clone_name && safe_str_neq(xml_id, action->rsc->clone_name)) { +- crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name); +- } else { +- crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); +- } ++ } else if (is_not_set(action->rsc->flags, pe_rsc_unique)) { ++ const char *xml_id = ID(action->rsc->xml); ++ ++ crm_debug("Using anonymous clone name %s for %s (aka. %s)", xml_id, action->rsc->id, ++ action->rsc->clone_name); + ++ /* ID is what we'd like client to use ++ * ID_LONG is what they might know it as instead ++ * ++ * ID_LONG is only strictly needed /here/ during the ++ * transition period until all nodes in the cluster ++ * are running the new software /and/ have rebooted ++ * once (meaning that they've only ever spoken to a DC ++ * supporting this feature). ++ * ++ * If anyone toggles the unique flag to 'on', the ++ * 'instance free' name will correspond to an orphan ++ * and fall into the clause above instead ++ */ ++ crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id); ++ if (action->rsc->clone_name && safe_str_neq(xml_id, action->rsc->clone_name)) { ++ crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name); + } else { +- CRM_ASSERT(action->rsc->clone_name == NULL); +- crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id); ++ crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); + } + +- for (lpc = 0; lpc < DIMOF(attr_list); lpc++) { +- crm_xml_add(rsc_xml, attr_list[lpc], +- g_hash_table_lookup(action->rsc->meta, attr_list[lpc])); +- } ++ } else { ++ CRM_ASSERT(action->rsc->clone_name == NULL); ++ crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id); ++ } ++ ++ for (lpc = 0; lpc < DIMOF(attr_list); lpc++) { ++ crm_xml_add(rsc_xml, attr_list[lpc], ++ g_hash_table_lookup(action->rsc->meta, attr_list[lpc])); + } + } + +-- +1.8.3.1 + diff --git a/SOURCES/002-status-deletion.patch b/SOURCES/002-status-deletion.patch new file mode 100644 index 0000000..1a31cdc --- /dev/null +++ b/SOURCES/002-status-deletion.patch @@ -0,0 +1,2064 @@ +From 9e4addbcb67ea8e36ba853f1e401d8a6cb6a0aa3 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 20 Dec 2019 11:34:06 -0600 +Subject: [PATCH 1/8] Refactor: scheduler: reduce code duplication when + displaying resources + +Refactor native_output_string() to use GString, for readability and +maintainability. Refactor common_print() to use it, to reduce duplication and +ensure displays are consistent. + +This makes a couple small changes in how things are shown: + +* If pe_print_dev is enabled (a debugging flag not actually used by anything), + the additional resource fields are shown with the resource flags rather than + their own parenthesized list. + +* The new output model is now consistent with the legacy print model in + displaying resource flags with commas (not spaces) between them. +--- + include/crm/pengine/common.h | 24 +-- + lib/pengine/native.c | 410 +++++++++++++++++-------------------------- + 2 files changed, 168 insertions(+), 266 deletions(-) + +diff --git a/include/crm/pengine/common.h b/include/crm/pengine/common.h +index e497f9c..48c2b66 100644 +--- a/include/crm/pengine/common.h ++++ b/include/crm/pengine/common.h +@@ -1,22 +1,12 @@ +-/* +- * Copyright 2004-2018 the Pacemaker project contributors ++/* ++ * Copyright 2004-2019 the Pacemaker project contributors + * + * The version control history for this file may have further details. +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU Lesser General Public +- * License as published by the Free Software Foundation; either +- * version 2 of the License, or (at your option) any later version. +- * +- * This software is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * General Public License for more details. +- * +- * You should have received a copy of the GNU Lesser General Public +- * License along with this library; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ ++ + #ifndef PE_COMMON__H + # define PE_COMMON__H + +@@ -104,7 +94,7 @@ enum pe_print_options { + pe_print_html = 0x0002, + pe_print_ncurses = 0x0004, + pe_print_printf = 0x0008, +- pe_print_dev = 0x0010, ++ pe_print_dev = 0x0010, // Debugging (@COMPAT probably not useful) + pe_print_details = 0x0020, + pe_print_max_details = 0x0040, + pe_print_rsconly = 0x0080, +diff --git a/lib/pengine/native.c b/lib/pengine/native.c +index fdb98e0..8fd98bc 100644 +--- a/lib/pengine/native.c ++++ b/lib/pengine/native.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2019 the Pacemaker project contributors ++ * Copyright 2004-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -490,165 +490,172 @@ native_print_xml(resource_t * rsc, const char *pre_text, long options, void *pri + } + } + +-/* making this inline rather than a macro prevents a coverity "unreachable" +- * warning on the first usage +- */ +-static inline const char * +-comma_if(int i) ++// Append a flag to resource description string's flags list ++static bool ++add_output_flag(GString *s, const char *flag_desc, bool have_flags) + { +- return i? ", " : ""; ++ g_string_append(s, (have_flags? ", " : " (")); ++ g_string_append(s, flag_desc); ++ return true; + } + +-static char * +-flags_string(pe_resource_t *rsc, pe_node_t *node, long options, +- const char *target_role) ++// Append a node name to resource description string's node list ++static bool ++add_output_node(GString *s, const char *node, bool have_nodes) + { +- char *flags[6] = { NULL, }; +- char *result = NULL; +- int ndx = 0; ++ g_string_append(s, (have_nodes? " " : " [ ")); ++ g_string_append(s, node); ++ return true; ++} ++ ++/*! ++ * \internal ++ * \brief Create a string description of a resource ++ * ++ * \param[in] rsc Resource to describe ++ * \param[in] name Desired identifier for the resource ++ * \param[in] node If not NULL, node that resource is "on" ++ * \param[in] options Bitmask of pe_print_* ++ * \param[in] target_role Resource's target role ++ * \param[in] show_nodes Whether to display nodes when multiply active ++ * ++ * \return Newly allocated string description of resource ++ * \note Caller must free the result with g_free(). ++ */ ++static gchar * ++native_output_string(pe_resource_t *rsc, const char *name, pe_node_t *node, ++ long options, const char *target_role, bool show_nodes) ++{ ++ const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); ++ const char *provider = NULL; ++ const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE); ++ char *retval = NULL; ++ GString *outstr = NULL; ++ bool have_flags = false; ++ ++ CRM_CHECK(name != NULL, name = "unknown"); ++ CRM_CHECK(kind != NULL, kind = "unknown"); ++ CRM_CHECK(class != NULL, class = "unknown"); ++ ++ if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) { ++ provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); ++ } + +- if (node && node->details->online == FALSE && node->details->unclean) { +- flags[ndx++] = strdup("UNCLEAN"); ++ if (is_set(options, pe_print_rsconly) ++ || pcmk__list_of_multiple(rsc->running_on)) { ++ node = NULL; + } + ++ // We need a string of at least this size ++ outstr = g_string_sized_new(strlen(name) + strlen(class) + strlen(kind) ++ + (provider? (strlen(provider) + 2) : 0) ++ + (node? strlen(node->details->uname) + 1 : 0) ++ + 11); ++ ++ // Resource name and agent ++ g_string_printf(outstr, "%s\t(%s%s%s:%s):\t", name, class, ++ /* @COMPAT This should be a single ':' (see CLBZ#5395) but ++ * to avoid breaking anything relying on it, we're keeping ++ * it like this until the next minor version bump. ++ */ ++ (provider? "::" : ""), (provider? provider : ""), kind); ++ ++ // State on node ++ if (is_set(rsc->flags, pe_rsc_orphan)) { ++ g_string_append(outstr, " ORPHANED"); ++ } ++ if (is_set(rsc->flags, pe_rsc_failed)) { ++ enum rsc_role_e role = native_displayable_role(rsc); ++ ++ if (role > RSC_ROLE_SLAVE) { ++ g_string_append_printf(outstr, " FAILED %s", role2text(role)); ++ } else { ++ g_string_append(outstr, " FAILED"); ++ } ++ } else { ++ g_string_append(outstr, native_displayable_state(rsc, options)); ++ } ++ if (node) { ++ g_string_append_printf(outstr, " %s", node->details->uname); ++ } ++ ++ // Flags, as: ( [...]) ++ if (node && !(node->details->online) && node->details->unclean) { ++ have_flags = add_output_flag(outstr, "UNCLEAN", have_flags); ++ } + if (is_set(options, pe_print_pending)) { + const char *pending_task = native_pending_task(rsc); + + if (pending_task) { +- flags[ndx++] = strdup(pending_task); ++ have_flags = add_output_flag(outstr, pending_task, have_flags); + } + } +- + if (target_role) { + enum rsc_role_e target_role_e = text2role(target_role); + +- /* Ignore target role Started, as it is the default anyways +- * (and would also allow a Master to be Master). +- * Show if target role limits our abilities. */ ++ /* Only show target role if it limits our abilities (i.e. ignore ++ * Started, as it is the default anyways, and doesn't prevent the ++ * resource from becoming Master). ++ */ + if (target_role_e == RSC_ROLE_STOPPED) { +- flags[ndx++] = strdup("disabled"); ++ have_flags = add_output_flag(outstr, "disabled", have_flags); + + } else if (is_set(uber_parent(rsc)->flags, pe_rsc_promotable) + && target_role_e == RSC_ROLE_SLAVE) { +- flags[ndx++] = crm_strdup_printf("target-role:%s", target_role); ++ have_flags = add_output_flag(outstr, "target-role:", have_flags); ++ g_string_append(outstr, target_role); + } + } +- + if (is_set(rsc->flags, pe_rsc_block)) { +- flags[ndx++] = strdup("blocked"); +- ++ have_flags = add_output_flag(outstr, "blocked", have_flags); + } else if (is_not_set(rsc->flags, pe_rsc_managed)) { +- flags[ndx++] = strdup("unmanaged"); ++ have_flags = add_output_flag(outstr, "unmanaged", have_flags); + } +- + if (is_set(rsc->flags, pe_rsc_failure_ignored)) { +- flags[ndx++] = strdup("failure ignored"); ++ have_flags = add_output_flag(outstr, "failure ignored", have_flags); + } +- +- if (ndx > 0) { +- char *total = g_strjoinv(" ", flags); +- +- result = crm_strdup_printf(" (%s)", total); +- g_free(total); +- } +- +- while (--ndx >= 0) { +- free(flags[ndx]); +- } +- return result; +-} +- +-static char * +-native_output_string(resource_t *rsc, const char *name, node_t *node, long options, +- const char *target_role) { +- const char *desc = NULL; +- const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); +- const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE); +- enum rsc_role_e role = native_displayable_role(rsc); +- +- char *retval = NULL; +- +- char *unames = NULL; +- char *provider = NULL; +- const char *orphan = NULL; +- char *role_s = NULL; +- char *node_s = NULL; +- char *print_dev_s = NULL; +- char *flags_s = NULL; +- +- CRM_ASSERT(kind != NULL); +- +- if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) { +- provider = crm_strdup_printf("::%s", crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER)); ++ if (is_set(options, pe_print_dev)) { ++ if (is_set(options, pe_rsc_provisional)) { ++ have_flags = add_output_flag(outstr, "provisional", have_flags); ++ } ++ if (is_not_set(options, pe_rsc_runnable)) { ++ have_flags = add_output_flag(outstr, "non-startable", have_flags); ++ } ++ have_flags = add_output_flag(outstr, "variant:", have_flags); ++ g_string_append_printf(outstr, "%s priority:%f", ++ crm_element_name(rsc->xml), ++ (double) (rsc->priority)); + } +- +- if (is_set(rsc->flags, pe_rsc_orphan)) { +- orphan = " ORPHANED"; ++ if (have_flags) { ++ g_string_append(outstr, ")"); + } + +- if (role > RSC_ROLE_SLAVE && is_set(rsc->flags, pe_rsc_failed)) { +- role_s = crm_strdup_printf(" FAILED %s", role2text(role)); +- } else if (is_set(rsc->flags, pe_rsc_failed)) { +- role_s = crm_strdup_printf(" FAILED"); +- } else { +- role_s = crm_strdup_printf(" %s", native_displayable_state(rsc, options)); +- } ++ // User-supplied description ++ if (is_set(options, pe_print_rsconly) ++ || pcmk__list_of_multiple(rsc->running_on)) { ++ const char *desc = crm_element_value(rsc->xml, XML_ATTR_DESC); + +- if (node) { +- node_s = crm_strdup_printf(" %s", node->details->uname); ++ if (desc) { ++ g_string_append_printf(outstr, " %s", desc); ++ } + } + +- if (is_set(options, pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { +- desc = crm_element_value(rsc->xml, XML_ATTR_DESC); +- } ++ if (show_nodes && is_not_set(options, pe_print_rsconly) ++ && pcmk__list_of_multiple(rsc->running_on)) { ++ bool have_nodes = false; + +- if (is_not_set(options, pe_print_rsconly) && g_list_length(rsc->running_on) > 1) { +- GListPtr gIter = rsc->running_on; +- gchar **arr = calloc(g_list_length(rsc->running_on)+1, sizeof(gchar *)); +- int i = 0; +- char *total = NULL; ++ for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) { ++ pe_node_t *n = (pe_node_t *) iter->data; + +- for (; gIter != NULL; gIter = gIter->next) { +- node_t *n = (node_t *) gIter->data; +- arr[i] = (gchar *) strdup(n->details->uname); +- i++; ++ have_nodes = add_output_node(outstr, n->details->uname, have_nodes); ++ } ++ if (have_nodes) { ++ g_string_append(outstr, " ]"); + } +- +- total = g_strjoinv(" ", arr); +- unames = crm_strdup_printf(" [ %s ]", total); +- +- g_free(total); +- g_strfreev(arr); + } + +- if (is_set(options, pe_print_dev)) { +- print_dev_s = crm_strdup_printf(" (%s%svariant=%s, priority=%f)", +- is_set(rsc->flags, pe_rsc_provisional) ? "provisional, " : "", +- is_set(rsc->flags, pe_rsc_runnable) ? "" : "non-startable, ", +- crm_element_name(rsc->xml), (double)rsc->priority); +- } +- +- flags_s = flags_string(rsc, node, options, target_role); +- +- retval = crm_strdup_printf("%s\t(%s%s:%s):\t%s%s%s%s%s%s%s%s", +- name, class, +- provider ? provider : "", +- kind, +- orphan ? orphan : "", +- role_s, +- node_s ? node_s : "", +- print_dev_s ? print_dev_s : "", +- flags_s ? flags_s : "", +- desc ? " " : "", desc ? desc : "", +- unames ? unames : ""); +- +- free(provider); +- free(role_s); +- free(node_s); +- free(unames); +- free(print_dev_s); +- free(flags_s); +- ++ retval = outstr->str; ++ g_string_free(outstr, FALSE); + return retval; + } + +@@ -656,7 +663,6 @@ void + pe__common_output_html(pcmk__output_t *out, resource_t * rsc, + const char *name, node_t *node, long options) + { +- char *s = NULL; + const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE); + const char *target_role = NULL; + +@@ -675,10 +681,6 @@ pe__common_output_html(pcmk__output_t *out, resource_t * rsc, + target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); + } + +- if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { +- node = NULL; +- } +- + if (is_not_set(rsc->flags, pe_rsc_managed)) { + cl = "rsc-managed"; + +@@ -698,10 +700,14 @@ pe__common_output_html(pcmk__output_t *out, resource_t * rsc, + cl = "rsc-ok"; + } + +- s = native_output_string(rsc, name, node, options, target_role); +- list_node = pcmk__output_create_html_node(out, "li", NULL, NULL, NULL); +- pcmk_create_html_node(list_node, "span", NULL, cl, s); +- free(s); ++ { ++ gchar *s = native_output_string(rsc, name, node, options, target_role, ++ true); ++ ++ list_node = pcmk__output_create_html_node(out, "li", NULL, NULL, NULL); ++ pcmk_create_html_node(list_node, "span", NULL, cl, s); ++ g_free(s); ++ } + + if (is_set(options, pe_print_details)) { + GHashTableIter iter; +@@ -744,7 +750,6 @@ void + pe__common_output_text(pcmk__output_t *out, resource_t * rsc, + const char *name, node_t *node, long options) + { +- char *s = NULL; + const char *target_role = NULL; + + CRM_ASSERT(rsc->variant == pe_native); +@@ -758,13 +763,13 @@ pe__common_output_text(pcmk__output_t *out, resource_t * rsc, + target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); + } + +- if (is_set(options, pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { +- node = NULL; +- } ++ { ++ gchar *s = native_output_string(rsc, name, node, options, target_role, ++ true); + +- s = native_output_string(rsc, name, node, options, target_role); +- out->list_item(out, NULL, "%s", s); +- free(s); ++ out->list_item(out, NULL, "%s", s); ++ g_free(s); ++ } + + if (is_set(options, pe_print_details)) { + GHashTableIter iter; +@@ -806,22 +811,14 @@ pe__common_output_text(pcmk__output_t *out, resource_t * rsc, + void + common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *node, long options, void *print_data) + { +- const char *desc = NULL; +- const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); +- const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE); + const char *target_role = NULL; +- enum rsc_role_e role = native_displayable_role(rsc); +- +- int offset = 0; +- int flagOffset = 0; +- char buffer[LINE_MAX]; +- char flagBuffer[LINE_MAX]; + + CRM_ASSERT(rsc->variant == pe_native); +- CRM_ASSERT(kind != NULL); + + if (rsc->meta) { +- const char *is_internal = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERNAL_RSC); ++ const char *is_internal = g_hash_table_lookup(rsc->meta, ++ XML_RSC_ATTR_INTERNAL_RSC); ++ + if (crm_is_true(is_internal) && is_not_set(options, pe_print_implicit)) { + crm_trace("skipping print of internal resource %s", rsc->id); + return; +@@ -829,17 +826,13 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n + target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); + } + +- if (pre_text == NULL && (options & pe_print_printf)) { +- pre_text = " "; +- } +- + if (options & pe_print_xml) { + native_print_xml(rsc, pre_text, options, print_data); + return; + } + +- if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { +- node = NULL; ++ if ((pre_text == NULL) && (options & pe_print_printf)) { ++ pre_text = " "; + } + + if (options & pe_print_html) { +@@ -849,10 +842,10 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n + } else if (is_set(rsc->flags, pe_rsc_failed)) { + status_print(""); + +- } else if (rsc->variant == pe_native && (rsc->running_on == NULL)) { ++ } else if (rsc->running_on == NULL) { + status_print(""); + +- } else if (g_list_length(rsc->running_on) > 1) { ++ } else if (pcmk__list_of_multiple(rsc->running_on)) { + status_print(""); + + } else if (is_set(rsc->flags, pe_rsc_failure_ignored)) { +@@ -863,106 +856,29 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n + } + } + +- if(pre_text) { +- offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", pre_text); +- } +- offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", name); +- offset += snprintf(buffer + offset, LINE_MAX - offset, "\t(%s", class); +- if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) { +- const char *prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); +- offset += snprintf(buffer + offset, LINE_MAX - offset, "::%s", prov); +- } +- offset += snprintf(buffer + offset, LINE_MAX - offset, ":%s):\t", kind); +- if(is_set(rsc->flags, pe_rsc_orphan)) { +- offset += snprintf(buffer + offset, LINE_MAX - offset, " ORPHANED "); +- } +- if(role > RSC_ROLE_SLAVE && is_set(rsc->flags, pe_rsc_failed)) { +- offset += snprintf(buffer + offset, LINE_MAX - offset, "FAILED %s", role2text(role)); +- } else if(is_set(rsc->flags, pe_rsc_failed)) { +- offset += snprintf(buffer + offset, LINE_MAX - offset, "FAILED"); +- } else { +- const char *rsc_state = native_displayable_state(rsc, options); +- +- offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_state); +- } +- +- if(node) { +- offset += snprintf(buffer + offset, LINE_MAX - offset, " %s", node->details->uname); +- +- if (node->details->online == FALSE && node->details->unclean) { +- flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, +- "%sUNCLEAN", comma_if(flagOffset)); +- } +- } +- +- if (options & pe_print_pending) { +- const char *pending_task = native_pending_task(rsc); +- +- if (pending_task) { +- flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, +- "%s%s", comma_if(flagOffset), pending_task); +- } +- } +- +- if (target_role) { +- enum rsc_role_e target_role_e = text2role(target_role); +- +- /* Ignore target role Started, as it is the default anyways +- * (and would also allow a Master to be Master). +- * Show if target role limits our abilities. */ +- if (target_role_e == RSC_ROLE_STOPPED) { +- flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, +- "%sdisabled", comma_if(flagOffset)); +- +- } else if (is_set(uber_parent(rsc)->flags, pe_rsc_promotable) +- && target_role_e == RSC_ROLE_SLAVE) { +- flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, +- "%starget-role:%s", comma_if(flagOffset), target_role); +- } +- } +- +- if (is_set(rsc->flags, pe_rsc_block)) { +- flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, +- "%sblocked", comma_if(flagOffset)); +- +- } else if (is_not_set(rsc->flags, pe_rsc_managed)) { +- flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, +- "%sunmanaged", comma_if(flagOffset)); +- } +- +- if(is_set(rsc->flags, pe_rsc_failure_ignored)) { +- flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, +- "%sfailure ignored", comma_if(flagOffset)); +- } +- +- if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { +- desc = crm_element_value(rsc->xml, XML_ATTR_DESC); +- } +- +- CRM_LOG_ASSERT(offset > 0); +- if(flagOffset > 0) { +- status_print("%s (%s)%s%s", buffer, flagBuffer, desc?" ":"", desc?desc:""); +- } else { +- status_print("%s%s%s", buffer, desc?" ":"", desc?desc:""); ++ { ++ gchar *resource_s = native_output_string(rsc, name, node, options, ++ target_role, false); ++ status_print("%s%s", (pre_text? pre_text : ""), resource_s); ++ g_free(resource_s); + } + + #if CURSES_ENABLED +- if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { +- /* Done */ +- +- } else if (options & pe_print_ncurses) { ++ if (is_set(options, pe_print_ncurses) ++ && is_not_set(options, pe_print_rsconly) ++ && !pcmk__list_of_multiple(rsc->running_on)) { + /* coverity[negative_returns] False positive */ + move(-1, 0); + } + #endif + +- if (options & pe_print_html) { ++ if (is_set(options, pe_print_html)) { + status_print(" "); + } + +- if ((options & pe_print_rsconly)) { ++ if (is_not_set(options, pe_print_rsconly) ++ && pcmk__list_of_multiple(rsc->running_on)) { + +- } else if (g_list_length(rsc->running_on) > 1) { + GListPtr gIter = rsc->running_on; + int counter = 0; + +@@ -1025,10 +941,6 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n + GHashTableIter iter; + node_t *n = NULL; + +- status_print("%s\t(%s%svariant=%s, priority=%f)", pre_text, +- is_set(rsc->flags, pe_rsc_provisional) ? "provisional, " : "", +- is_set(rsc->flags, pe_rsc_runnable) ? "" : "non-startable, ", +- crm_element_name(rsc->xml), (double)rsc->priority); + status_print("%s\tAllowed Nodes", pre_text); + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **)&n)) { +-- +1.8.3.1 + + +From 41e911be8ea9151b3f0758c2c22c0e69b8b78d93 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 19 Dec 2019 17:18:41 -0600 +Subject: [PATCH 2/8] Log: scheduler: drop redundant trace messages + +We logged "applying placement constraints" three times. +--- + lib/pacemaker/pcmk_sched_allocate.c | 17 ++++------------- + 1 file changed, 4 insertions(+), 13 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c +index ca43c71..dde8b69 100644 +--- a/lib/pacemaker/pcmk_sched_allocate.c ++++ b/lib/pacemaker/pcmk_sched_allocate.c +@@ -623,21 +623,15 @@ check_actions(pe_working_set_t * data_set) + } + } + +-static gboolean ++static void + apply_placement_constraints(pe_working_set_t * data_set) + { +- GListPtr gIter = NULL; +- +- crm_trace("Applying constraints..."); +- +- for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { ++ for (GList *gIter = data_set->placement_constraints; ++ gIter != NULL; gIter = gIter->next) { + pe__location_t *cons = gIter->data; + + cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons); + } +- +- return TRUE; +- + } + + static gboolean +@@ -994,10 +988,7 @@ stage2(pe_working_set_t * data_set) + { + GListPtr gIter = NULL; + +- crm_trace("Applying placement constraints"); +- +- gIter = data_set->nodes; +- for (; gIter != NULL; gIter = gIter->next) { ++ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { + node_t *node = (node_t *) gIter->data; + + if (node == NULL) { +-- +1.8.3.1 + + +From 7fe136e19b5018d609beb8bad4e34234739572c9 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Sat, 7 Dec 2019 12:13:11 -0600 +Subject: [PATCH 3/8] Refactor: libcrmcommon: convenience functions for list + length comparisons + +... for efficiency and readability +--- + include/crm/common/internal.h | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h +index da2c7d7..484c836 100644 +--- a/include/crm/common/internal.h ++++ b/include/crm/common/internal.h +@@ -126,6 +126,20 @@ crm_getpid_s() + return crm_strdup_printf("%lu", (unsigned long) getpid()); + } + ++// More efficient than g_list_length(list) == 1 ++static inline bool ++pcmk__list_of_1(GList *list) ++{ ++ return list && (list->next == NULL); ++} ++ ++// More efficient than g_list_length(list) > 1 ++static inline bool ++pcmk__list_of_multiple(GList *list) ++{ ++ return list && (list->next != NULL); ++} ++ + /* convenience functions for failure-related node attributes */ + + #define CRM_FAIL_COUNT_PREFIX "fail-count" +-- +1.8.3.1 + + +From 9ff4f6bca540576f0a3333c959e8014ed168353f Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 16 Dec 2019 14:13:30 -0600 +Subject: [PATCH 4/8] Refactor: libcrmcommon: add convenience macros for + plurals + +I've avoided making s_if_plural() an official API due to its hackiness, but +it really is the best solution for now. Promote it to pcmk__plural_s(), along +with a companion macro pcmk__plural_alt() for more complicated plurals. +--- + include/crm/common/internal.h | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h +index 484c836..ee560c9 100644 +--- a/include/crm/common/internal.h ++++ b/include/crm/common/internal.h +@@ -107,6 +107,29 @@ bool crm_compress_string(const char *data, int length, int max, char **result, + unsigned int *result_len); + gint crm_alpha_sort(gconstpointer a, gconstpointer b); + ++/* Correctly displaying singular or plural is complicated; consider "1 node has" ++ * vs. "2 nodes have". A flexible solution is to pluralize entire strings, e.g. ++ * ++ * if (a == 1) { ++ * crm_info("singular message"): ++ * } else { ++ * crm_info("plural message"); ++ * } ++ * ++ * though even that's not sufficient for all languages besides English (if we ++ * ever desire to do translations of output and log messages). But the following ++ * convenience macros are "good enough" and more concise for many cases. ++ */ ++ ++/* Example: ++ * crm_info("Found %d %s", nentries, ++ * pcmk__plural_alt(nentries, "entry", "entries")); ++ */ ++#define pcmk__plural_alt(i, s1, s2) (((i) == 1)? (s1) : (s2)) ++ ++// Example: crm_info("Found %d node%s", nnodes, pcmk__plural_s(nnodes)); ++#define pcmk__plural_s(i) pcmk__plural_alt(i, "", "s") ++ + static inline char * + crm_concat(const char *prefix, const char *suffix, char join) + { +-- +1.8.3.1 + + +From 0378db5030400202e59b2bae0dabd65d00a3e9c8 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 12 Dec 2019 20:50:50 -0600 +Subject: [PATCH 5/8] Log: controller: improve join messages + +--- + daemons/controld/controld_fsa.c | 81 ++++---- + daemons/controld/controld_join_dc.c | 383 +++++++++++++++++++++--------------- + 2 files changed, 268 insertions(+), 196 deletions(-) + +diff --git a/daemons/controld/controld_fsa.c b/daemons/controld/controld_fsa.c +index 6760224..b985fa9 100644 +--- a/daemons/controld/controld_fsa.c ++++ b/daemons/controld/controld_fsa.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2019 the Pacemaker project contributors ++ * Copyright 2004-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -460,12 +460,53 @@ log_fsa_input(fsa_data_t * stored_msg) + } + } + ++static void ++check_join_counts(fsa_data_t *msg_data) ++{ ++ int count; ++ guint npeers; ++ ++ count = crmd_join_phase_count(crm_join_finalized); ++ if (count > 0) { ++ crm_err("%d cluster node%s failed to confirm join", ++ count, pcmk__plural_s(count)); ++ crmd_join_phase_log(LOG_NOTICE); ++ return; ++ } ++ ++ npeers = crm_active_peers(); ++ count = crmd_join_phase_count(crm_join_confirmed); ++ if (count == npeers) { ++ if (npeers == 1) { ++ crm_debug("Sole active cluster node is fully joined"); ++ } else { ++ crm_debug("All %d active cluster nodes are fully joined", count); ++ } ++ ++ } else if (count > npeers) { ++ crm_err("New election needed because more nodes confirmed join " ++ "than are in membership (%d > %u)", count, npeers); ++ register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); ++ ++ } else if (saved_ccm_membership_id != crm_peer_seq) { ++ crm_info("New join needed because membership changed (%llu -> %llu)", ++ saved_ccm_membership_id, crm_peer_seq); ++ register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); ++ ++ } else { ++ crm_warn("Only %d of %u active cluster nodes fully joined " ++ "(%d did not respond to offer)", ++ count, npeers, crmd_join_phase_count(crm_join_welcomed)); ++ } ++} ++ + long long + do_state_transition(long long actions, + enum crmd_fsa_state cur_state, + enum crmd_fsa_state next_state, fsa_data_t * msg_data) + { + int level = LOG_INFO; ++ int count = 0; + long long tmp = actions; + gboolean clear_recovery_bit = TRUE; + +@@ -563,13 +604,14 @@ do_state_transition(long long actions, + crm_warn("Progressed to state %s after %s", + fsa_state2string(next_state), fsa_cause2string(cause)); + } +- if (crmd_join_phase_count(crm_join_welcomed) > 0) { +- crm_warn("%u cluster nodes failed to respond" +- " to the join offer.", crmd_join_phase_count(crm_join_welcomed)); ++ count = crmd_join_phase_count(crm_join_welcomed); ++ if (count > 0) { ++ crm_warn("%d cluster node%s failed to respond to join offer", ++ count, pcmk__plural_s(count)); + crmd_join_phase_log(LOG_NOTICE); + + } else { +- crm_debug("All %d cluster nodes responded to the join offer.", ++ crm_debug("All cluster nodes (%d) responded to join offer", + crmd_join_phase_count(crm_join_integrated)); + } + break; +@@ -581,34 +623,7 @@ do_state_transition(long long actions, + crm_info("Progressed to state %s after %s", + fsa_state2string(next_state), fsa_cause2string(cause)); + } +- +- if (crmd_join_phase_count(crm_join_finalized) > 0) { +- crm_err("%u cluster nodes failed to confirm their join.", +- crmd_join_phase_count(crm_join_finalized)); +- crmd_join_phase_log(LOG_NOTICE); +- +- } else if (crmd_join_phase_count(crm_join_confirmed) +- == crm_active_peers()) { +- crm_debug("All %u cluster nodes are" +- " eligible to run resources.", crm_active_peers()); +- +- } else if (crmd_join_phase_count(crm_join_confirmed) > crm_active_peers()) { +- crm_err("We have more confirmed nodes than our membership does: %d vs. %d", +- crmd_join_phase_count(crm_join_confirmed), crm_active_peers()); +- register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); +- +- } else if (saved_ccm_membership_id != crm_peer_seq) { +- crm_info("Membership changed: %llu -> %llu - join restart", +- saved_ccm_membership_id, crm_peer_seq); +- register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); +- +- } else { +- crm_warn("Only %u of %u cluster " +- "nodes are eligible to run resources - continue %d", +- crmd_join_phase_count(crm_join_confirmed), +- crm_active_peers(), crmd_join_phase_count(crm_join_welcomed)); +- } +-/* initialize_join(FALSE); */ ++ check_join_counts(msg_data); + break; + + case S_STOPPING: +diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c +index 988aaa6..54324b2 100644 +--- a/daemons/controld/controld_join_dc.c ++++ b/daemons/controld/controld_join_dc.c +@@ -26,7 +26,11 @@ void finalize_join_for(gpointer key, gpointer value, gpointer user_data); + void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); + gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); + ++/* Numeric counter used to identify join rounds (an unsigned int would be ++ * appropriate, except we get and set it in XML as int) ++ */ + static int current_join_id = 0; ++ + unsigned long long saved_ccm_membership_id = 0; + + void +@@ -34,12 +38,7 @@ crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase + { + enum crm_join_phase last = 0; + +- if(node == NULL) { +- crm_err("Could not update join because node not specified" +- CRM_XS " join-%u source=%s phase=%s", +- current_join_id, source, crm_join_phase_str(phase)); +- return; +- } ++ CRM_CHECK(node != NULL, return); + + /* Remote nodes do not participate in joins */ + if (is_set(node->flags, crm_remote_node)) { +@@ -49,21 +48,23 @@ crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase + last = node->join; + + if(phase == last) { +- crm_trace("%s: Node %s[%u] - join-%u phase still %s", +- source, node->uname, node->id, current_join_id, +- crm_join_phase_str(last)); ++ crm_trace("Node %s join-%d phase is still %s " ++ CRM_XS " nodeid=%u source=%s", ++ node->uname, current_join_id, crm_join_phase_str(last), ++ node->id, source); + + } else if ((phase <= crm_join_none) || (phase == (last + 1))) { + node->join = phase; +- crm_info("%s: Node %s[%u] - join-%u phase %s -> %s", +- source, node->uname, node->id, current_join_id, +- crm_join_phase_str(last), crm_join_phase_str(phase)); ++ crm_trace("Node %s join-%d phase is now %s (was %s) " ++ CRM_XS " nodeid=%u source=%s", ++ node->uname, current_join_id, crm_join_phase_str(phase), ++ crm_join_phase_str(last), node->id, source); + + } else { +- crm_err("Could not update join for node %s because phase transition invalid " +- CRM_XS " join-%u source=%s node_id=%u last=%s new=%s", +- node->uname, current_join_id, source, node->id, +- crm_join_phase_str(last), crm_join_phase_str(phase)); ++ crm_warn("Rejecting join-%d phase update for node %s because " ++ "can't go from %s to %s " CRM_XS " nodeid=%u source=%s", ++ current_join_id, node->uname, crm_join_phase_str(last), ++ crm_join_phase_str(phase), node->id, source); + } + } + +@@ -73,9 +74,7 @@ initialize_join(gboolean before) + GHashTableIter iter; + crm_node_t *peer = NULL; + +- /* clear out/reset a bunch of stuff */ +- crm_debug("join-%d: Initializing join data (flag=%s)", +- current_join_id, before ? "true" : "false"); ++ crm_debug("Starting new join round join-%d", current_join_id); + + g_hash_table_iter_init(&iter, crm_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { +@@ -128,7 +127,9 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data) + + CRM_ASSERT(member != NULL); + if (crm_is_peer_active(member) == FALSE) { +- crm_info("Not making an offer to %s: not active (%s)", member->uname, member->state); ++ crm_info("Not making join-%d offer to inactive node %s", ++ current_join_id, ++ (member->uname? member->uname : "with unknown name")); + if(member->expected == NULL && safe_str_eq(member->state, CRM_NODE_LOST)) { + /* You would think this unsafe, but in fact this plus an + * active resource is what causes it to be fenced. +@@ -145,17 +146,21 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data) + } + + if (member->uname == NULL) { +- crm_info("No recipient for welcome message.(Node uuid:%s)", member->uuid); ++ crm_info("Not making join-%d offer to node uuid %s with unknown name", ++ current_join_id, member->uuid); + return; + } + + if (saved_ccm_membership_id != crm_peer_seq) { + saved_ccm_membership_id = crm_peer_seq; +- crm_info("Making join offers based on membership %llu", crm_peer_seq); ++ crm_info("Making join-%d offers based on membership event %llu", ++ current_join_id, crm_peer_seq); + } + + if(user_data && member->join > crm_join_none) { +- crm_info("Skipping %s: already known %d", member->uname, member->join); ++ crm_info("Not making join-%d offer to already known node %s (%s)", ++ current_join_id, member->uname, ++ crm_join_phase_str(member->join)); + return; + } + +@@ -166,14 +171,11 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data) + // Advertise our feature set so the joining node can bail if not compatible + crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + +- /* send the welcome */ +- crm_info("join-%d: Sending offer to %s", current_join_id, member->uname); +- ++ crm_info("Sending join-%d offer to %s", current_join_id, member->uname); + send_cluster_message(member, crm_msg_crmd, offer, TRUE); + free_xml(offer); + + crm_update_peer_join(__FUNCTION__, member, crm_join_welcomed); +- /* crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_PENDING); */ + } + + /* A_DC_JOIN_OFFER_ALL */ +@@ -183,6 +185,8 @@ do_dc_join_offer_all(long long action, + enum crmd_fsa_state cur_state, + enum crmd_fsa_input current_input, fsa_data_t * msg_data) + { ++ int count; ++ + /* Reset everyone's status back to down or in_ccm in the CIB. + * Any nodes that are active in the CIB but not in the cluster membership + * will be seen as offline by the scheduler anyway. +@@ -197,9 +201,11 @@ do_dc_join_offer_all(long long action, + } + g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL); + ++ count = crmd_join_phase_count(crm_join_welcomed); ++ crm_info("Waiting on join-%d requests from %d outstanding node%s", ++ current_join_id, count, pcmk__plural_s(count)); ++ + // Don't waste time by invoking the scheduler yet +- crm_info("join-%d: Waiting on %d outstanding join acks", +- current_join_id, crmd_join_phase_count(crm_join_welcomed)); + } + + /* A_DC_JOIN_OFFER_ONE */ +@@ -211,50 +217,40 @@ do_dc_join_offer_one(long long action, + { + crm_node_t *member; + ha_msg_input_t *welcome = NULL; +- +- const char *op = NULL; ++ int count; + const char *join_to = NULL; + +- if (msg_data->data) { +- welcome = fsa_typed_data(fsa_dt_ha_msg); +- +- } else { +- crm_info("An unknown node joined - (re-)offer to any unconfirmed nodes"); ++ if (msg_data->data == NULL) { ++ crm_info("Making join-%d offers to any unconfirmed nodes " ++ "because an unknown node joined", current_join_id); + g_hash_table_foreach(crm_peer_cache, join_make_offer, &member); + check_join_state(cur_state, __FUNCTION__); + return; + } + ++ welcome = fsa_typed_data(fsa_dt_ha_msg); + if (welcome == NULL) { +- crm_err("Attempt to send welcome message without a message to reply to!"); ++ // fsa_typed_data() already logged an error + return; + } + + join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM); + if (join_to == NULL) { +- crm_err("Attempt to send welcome message without a host to reply to!"); ++ crm_err("Can't make join-%d offer to unknown node", current_join_id); + return; + } +- + member = crm_get_peer(0, join_to); +- op = crm_element_value(welcome->msg, F_CRM_TASK); +- if (join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) { +- /* note: it _is_ possible that a node will have been +- * sick or starting up when the original offer was made. +- * however, it will either re-announce itself in due course +- * _or_ we can re-store the original offer on the client. +- */ +- crm_trace("(Re-)offering membership to %s...", join_to); +- } + +- crm_info("join-%d: Processing %s request from %s in state %s", +- current_join_id, op, join_to, fsa_state2string(cur_state)); ++ /* It is possible that a node will have been sick or starting up when the ++ * original offer was made. However, it will either re-announce itself in ++ * due course, or we can re-store the original offer on the client. ++ */ + + crm_update_peer_join(__FUNCTION__, member, crm_join_none); + join_make_offer(NULL, member, NULL); + +- /* always offer to the DC (ourselves) +- * this ensures the correct value for max_generation_from ++ /* If the offer isn't to the local node, make an offer to the local node as ++ * well, to ensure the correct value for max_generation_from. + */ + if (strcmp(join_to, fsa_our_uname) != 0) { + member = crm_get_peer(0, fsa_our_uname); +@@ -266,9 +262,11 @@ do_dc_join_offer_one(long long action, + */ + abort_transition(INFINITY, tg_restart, "Node join", NULL); + ++ count = crmd_join_phase_count(crm_join_welcomed); ++ crm_info("Waiting on join-%d requests from %d outstanding node%s", ++ current_join_id, count, pcmk__plural_s(count)); ++ + // Don't waste time by invoking the scheduler yet +- crm_debug("Waiting on %d outstanding join acks for join-%d", +- crmd_join_phase_count(crm_join_welcomed), current_join_id); + } + + static int +@@ -301,22 +299,31 @@ do_dc_join_filter_offer(long long action, + + int cmp = 0; + int join_id = -1; ++ int count = 0; + gboolean ack_nack_bool = TRUE; +- const char *ack_nack = CRMD_JOINSTATE_MEMBER; + ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); + + const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); + const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE); + const char *join_version = crm_element_value(join_ack->msg, + XML_ATTR_CRM_VERSION); ++ crm_node_t *join_node = NULL; + +- crm_node_t *join_node = crm_get_peer(0, join_from); +- +- crm_debug("Processing req from %s", join_from); ++ if (join_from == NULL) { ++ crm_err("Ignoring invalid join request without node name"); ++ return; ++ } ++ join_node = crm_get_peer(0, join_from); + +- generation = join_ack->xml; + crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); ++ if (join_id != current_join_id) { ++ crm_debug("Ignoring join-%d request from %s because we are on join-%d", ++ join_id, join_from, current_join_id); ++ check_join_state(cur_state, __FUNCTION__); ++ return; ++ } + ++ generation = join_ack->xml; + if (max_generation_xml != NULL && generation != NULL) { + int lpc = 0; + +@@ -331,68 +338,71 @@ do_dc_join_filter_offer(long long action, + } + } + +- if (join_id != current_join_id) { +- crm_debug("Invalid response from %s: join-%d vs. join-%d", +- join_from, join_id, current_join_id); +- check_join_state(cur_state, __FUNCTION__); +- return; ++ if (ref == NULL) { ++ ref = "none"; // for logging only ++ } + +- } else if (join_node == NULL || crm_is_peer_active(join_node) == FALSE) { +- crm_err("Node %s is not a member", join_from); ++ if (crm_is_peer_active(join_node) == FALSE) { ++ crm_err("Rejecting join-%d request from inactive node %s " ++ CRM_XS " ref=%s", join_id, join_from, ref); + ack_nack_bool = FALSE; + + } else if (generation == NULL) { +- crm_err("Generation was NULL"); ++ crm_err("Rejecting invalid join-%d request from node %s " ++ "missing CIB generation " CRM_XS " ref=%s", ++ join_id, join_from, ref); + ack_nack_bool = FALSE; + + } else if ((join_version == NULL) + || !feature_set_compatible(CRM_FEATURE_SET, join_version)) { +- crm_err("Node %s feature set (%s) is incompatible with ours (%s)", +- join_from, (join_version? join_version : "pre-3.1.0"), +- CRM_FEATURE_SET); ++ crm_err("Rejecting join-%d request from node %s because feature set %s" ++ " is incompatible with ours (%s) " CRM_XS " ref=%s", ++ join_id, join_from, (join_version? join_version : "pre-3.1.0"), ++ CRM_FEATURE_SET, ref); + ack_nack_bool = FALSE; + + } else if (max_generation_xml == NULL) { ++ crm_debug("Accepting join-%d request from %s " ++ "(with first CIB generation) " CRM_XS " ref=%s", ++ join_id, join_from, ref); + max_generation_xml = copy_xml(generation); + max_generation_from = strdup(join_from); + + } else if (cmp < 0 || (cmp == 0 && safe_str_eq(join_from, fsa_our_uname))) { +- crm_debug("%s has a better generation number than" +- " the current max %s", join_from, max_generation_from); +- if (max_generation_xml) { +- crm_log_xml_debug(max_generation_xml, "Max generation"); +- } +- crm_log_xml_debug(generation, "Their generation"); ++ crm_debug("Accepting join-%d request from %s (with better " ++ "CIB generation than current best from %s) " CRM_XS " ref=%s", ++ join_id, join_from, max_generation_from, ref); ++ crm_log_xml_debug(max_generation_xml, "Old max generation"); ++ crm_log_xml_debug(generation, "New max generation"); + + free(max_generation_from); + free_xml(max_generation_xml); + + max_generation_from = strdup(join_from); + max_generation_xml = copy_xml(join_ack->xml); ++ ++ } else { ++ crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s", ++ join_id, join_from, ref); + } + + if (ack_nack_bool == FALSE) { +- /* NACK this client */ +- ack_nack = CRMD_JOINSTATE_NACK; + crm_update_peer_join(__FUNCTION__, join_node, crm_join_nack); +- crm_err("Rejecting cluster join request from %s " CRM_XS +- " NACK join-%d ref=%s", join_from, join_id, ref); +- ++ crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_NACK); + } else { +- crm_debug("join-%d: Welcoming node %s (ref %s)", join_id, join_from, ref); + crm_update_peer_join(__FUNCTION__, join_node, crm_join_integrated); ++ crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER); + } + +- crm_update_peer_expected(__FUNCTION__, join_node, ack_nack); +- +- crm_debug("%u nodes have been integrated into join-%d", +- crmd_join_phase_count(crm_join_integrated), join_id); +- ++ count = crmd_join_phase_count(crm_join_integrated); ++ crm_debug("%d node%s currently integrated in join-%d", ++ count, pcmk__plural_s(count), join_id); + + if (check_join_state(cur_state, __FUNCTION__) == FALSE) { + // Don't waste time by invoking the scheduler yet +- crm_debug("join-%d: Still waiting on %d outstanding offers", +- join_id, crmd_join_phase_count(crm_join_welcomed)); ++ count = crmd_join_phase_count(crm_join_welcomed); ++ crm_debug("Waiting on join-%d requests from %d outstanding node%s", ++ join_id, count, pcmk__plural_s(count)); + } + } + +@@ -405,21 +415,24 @@ do_dc_join_finalize(long long action, + { + char *sync_from = NULL; + int rc = pcmk_ok; ++ int count_welcomed = crmd_join_phase_count(crm_join_welcomed); ++ int count_integrated = crmd_join_phase_count(crm_join_integrated); + + /* This we can do straight away and avoid clients timing us out + * while we compute the latest CIB + */ +- crm_debug("Finalizing join-%d for %d clients", +- current_join_id, crmd_join_phase_count(crm_join_integrated)); +- +- crmd_join_phase_log(LOG_INFO); +- if (crmd_join_phase_count(crm_join_welcomed) != 0) { +- crm_info("Waiting for %d more nodes", crmd_join_phase_count(crm_join_welcomed)); ++ if (count_welcomed != 0) { ++ crm_debug("Waiting on join-%d requests from %d outstanding node%s " ++ "before finalizing join", current_join_id, count_welcomed, ++ pcmk__plural_s(count_welcomed)); ++ crmd_join_phase_log(LOG_DEBUG); + /* crmd_fsa_stall(FALSE); Needed? */ + return; + +- } else if (crmd_join_phase_count(crm_join_integrated) == 0) { +- /* Nothing to do */ ++ } else if (count_integrated == 0) { ++ crm_debug("Finalization not needed for join-%d at the current time", ++ current_join_id); ++ crmd_join_phase_log(LOG_DEBUG); + check_join_state(fsa_state, __FUNCTION__); + return; + } +@@ -430,8 +443,9 @@ do_dc_join_finalize(long long action, + } + + if (is_set(fsa_input_register, R_IN_TRANSITION)) { +- crm_warn("Delaying response to cluster join offer while transition in progress " +- CRM_XS " join-%d", current_join_id); ++ crm_warn("Delaying join-%d finalization while transition in progress", ++ current_join_id); ++ crmd_join_phase_log(LOG_DEBUG); + crmd_fsa_stall(FALSE); + return; + } +@@ -440,18 +454,20 @@ do_dc_join_finalize(long long action, + /* ask for the agreed best CIB */ + sync_from = strdup(max_generation_from); + set_bit(fsa_input_register, R_CIB_ASKED); +- crm_notice("Syncing the Cluster Information Base from %s to rest of cluster " +- CRM_XS " join-%d", sync_from, current_join_id); +- crm_log_xml_notice(max_generation_xml, "Requested version"); ++ crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)", ++ current_join_id, count_integrated, ++ pcmk__plural_s(count_integrated), sync_from); ++ crm_log_xml_notice(max_generation_xml, "Requested CIB version"); + + } else { + /* Send _our_ CIB out to everyone */ + sync_from = strdup(fsa_our_uname); +- crm_info("join-%d: Syncing our CIB to the rest of the cluster", +- current_join_id); +- crm_log_xml_debug(max_generation_xml, "Requested version"); ++ crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)", ++ current_join_id, count_integrated, ++ pcmk__plural_s(count_integrated)); ++ crm_log_xml_debug(max_generation_xml, "Requested CIB version"); + } +- ++ crmd_join_phase_log(LOG_DEBUG); + + rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override); + fsa_register_cib_callback(rc, FALSE, sync_from, finalize_sync_callback); +@@ -463,26 +479,33 @@ finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, voi + CRM_LOG_ASSERT(-EPERM != rc); + clear_bit(fsa_input_register, R_CIB_ASKED); + if (rc != pcmk_ok) { +- do_crm_log((rc == -pcmk_err_old_data ? LOG_WARNING : LOG_ERR), +- "Sync from %s failed: %s", (char *)user_data, pcmk_strerror(rc)); ++ do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR), ++ "Could not sync CIB from %s in join-%d: %s", ++ (char *) user_data, current_join_id, pcmk_strerror(rc)); + + /* restart the whole join process */ + register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__); + +- } else if (AM_I_DC && fsa_state == S_FINALIZE_JOIN) { ++ } else if (!AM_I_DC) { ++ crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id); ++ ++ } else if (fsa_state != S_FINALIZE_JOIN) { ++ crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN (%s)", ++ current_join_id, fsa_state2string(fsa_state)); ++ ++ } else { + set_bit(fsa_input_register, R_HAVE_CIB); + clear_bit(fsa_input_register, R_CIB_ASKED); + + /* make sure dc_uuid is re-set to us */ + if (check_join_state(fsa_state, __FUNCTION__) == FALSE) { +- crm_debug("Notifying %d clients of join-%d results", +- crmd_join_phase_count(crm_join_integrated), current_join_id); ++ int count_integrated = crmd_join_phase_count(crm_join_integrated); ++ ++ crm_debug("Notifying %d node%s of join-%d results", ++ count_integrated, pcmk__plural_s(count_integrated), ++ current_join_id); + g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL); + } +- +- } else { +- crm_debug("No longer the DC in S_FINALIZE_JOIN: %s in %s", +- AM_I_DC ? "DC" : "controller", fsa_state2string(fsa_state)); + } + } + +@@ -492,11 +515,14 @@ join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * outp + fsa_data_t *msg_data = NULL; + + if (rc == pcmk_ok) { +- crm_debug("Join update %d complete", call_id); ++ crm_debug("join-%d node history update (via CIB call %d) complete", ++ current_join_id, call_id); + check_join_state(fsa_state, __FUNCTION__); + + } else { +- crm_err("Join update %d failed", call_id); ++ crm_err("join-%d node history update (via CIB call %d) failed: %s " ++ "(next transition may determine resource status incorrectly)", ++ current_join_id, call_id, pcmk_strerror(rc)); + crm_log_xml_debug(msg, "failed"); + register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); + } +@@ -515,61 +541,75 @@ do_dc_join_ack(long long action, + + const char *op = crm_element_value(join_ack->msg, F_CRM_TASK); + const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); +- crm_node_t *peer = crm_get_peer(0, join_from); ++ crm_node_t *peer = NULL; + +- if (safe_str_neq(op, CRM_OP_JOIN_CONFIRM) || peer == NULL) { +- crm_debug("Ignoring op=%s message from %s", op, join_from); ++ // Sanity checks ++ if (join_from == NULL) { ++ crm_warn("Ignoring message received without node identification"); ++ return; ++ } ++ if (op == NULL) { ++ crm_warn("Ignoring message received from %s without task", join_from); + return; + } + +- crm_trace("Processing ack from %s", join_from); +- crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); ++ if (strcmp(op, CRM_OP_JOIN_CONFIRM)) { ++ crm_debug("Ignoring '%s' message from %s while waiting for '%s'", ++ op, join_from, CRM_OP_JOIN_CONFIRM); ++ return; ++ } + ++ if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) { ++ crm_warn("Ignoring join confirmation from %s without valid join ID", ++ join_from); ++ return; ++ } ++ ++ peer = crm_get_peer(0, join_from); + if (peer->join != crm_join_finalized) { +- crm_info("Join not in progress: ignoring join-%d from %s (phase = %d)", +- join_id, join_from, peer->join); ++ crm_info("Ignoring out-of-sequence join-%d confirmation from %s " ++ "(currently %s not %s)", ++ join_id, join_from, crm_join_phase_str(peer->join), ++ crm_join_phase_str(crm_join_finalized)); + return; ++ } + +- } else if (join_id != current_join_id) { +- crm_err("Invalid response from %s: join-%d vs. join-%d", +- join_from, join_id, current_join_id); ++ if (join_id != current_join_id) { ++ crm_err("Rejecting join-%d confirmation from %s " ++ "because currently on join-%d", ++ join_id, join_from, current_join_id); + crm_update_peer_join(__FUNCTION__, peer, crm_join_nack); + return; + } + + crm_update_peer_join(__FUNCTION__, peer, crm_join_confirmed); + +- crm_info("join-%d: Updating node state to %s for %s", +- join_id, CRMD_JOINSTATE_MEMBER, join_from); +- +- /* update CIB with the current LRM status from the node +- * We don't need to notify the TE of these updates, a transition will +- * be started in due time ++ /* Update CIB with node's current executor state. A new transition will be ++ * triggered later, when the CIB notifies us of the change. + */ + erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local); +- + if (safe_str_eq(join_from, fsa_our_uname)) { + xmlNode *now_dc_lrmd_state = do_lrm_query(TRUE, fsa_our_uname); + + if (now_dc_lrmd_state != NULL) { +- crm_debug("Local executor state updated from query"); + fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state, + cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL); + free_xml(now_dc_lrmd_state); ++ crm_debug("Updating local node history for join-%d " ++ "from query result (via CIB call %d)", join_id, call_id); + } else { +- crm_warn("Local executor state updated from join acknowledgement because query failed"); + fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml, + cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL); ++ crm_warn("Updating local node history from join-%d confirmation " ++ "because query failed (via CIB call %d)", join_id, call_id); + } + } else { +- crm_debug("Executor state for %s updated from join acknowledgement", +- join_from); + fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml, + cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL); ++ crm_debug("Updating node history for %s from join-%d confirmation " ++ "(via CIB call %d)", join_from, join_id, call_id); + } +- + fsa_register_cib_callback(call_id, FALSE, NULL, join_update_complete_callback); +- crm_debug("join-%d: Registered callback for CIB status update %d", join_id, call_id); + } + + void +@@ -581,17 +621,16 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data) + const char *join_to = join_node->uname; + + if(join_node->join != crm_join_integrated) { +- crm_trace("Skipping %s in state %d", join_to, join_node->join); ++ crm_trace("Not updating non-integrated node %s (%s) for join-%d", ++ join_to, crm_join_phase_str(join_node->join), ++ current_join_id); + return; + } + +- /* make sure a node entry exists for the new node */ +- crm_trace("Creating node entry for %s", join_to); +- ++ crm_trace("Updating node state for %s", join_to); + tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE); + set_uuid(tmp1, XML_ATTR_UUID, join_node); + crm_xml_add(tmp1, XML_ATTR_UNAME, join_to); +- + fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1); + free_xml(tmp1); + +@@ -610,11 +649,10 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data) + return; + } + +- /* send the ack/nack to the node */ +- acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to); +- +- crm_debug("join-%d: ACK'ing join request from %s", ++ // Acknowledge node's join request ++ crm_debug("Acknowledging join-%d request from %s", + current_join_id, join_to); ++ acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to); + crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE); + crm_update_peer_join(__FUNCTION__, join_node, crm_join_finalized); + crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER); +@@ -629,11 +667,11 @@ check_join_state(enum crmd_fsa_state cur_state, const char *source) + { + static unsigned long long highest_seq = 0; + +- crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state)); +- + if (saved_ccm_membership_id != crm_peer_seq) { +- crm_debug("%s: Membership changed since join started: %llu -> %llu (%llu)", +- source, saved_ccm_membership_id, crm_peer_seq, highest_seq); ++ crm_debug("join-%d: Membership changed from %llu to %llu " ++ CRM_XS " highest=%llu state=%s for=%s", ++ current_join_id, saved_ccm_membership_id, crm_peer_seq, highest_seq, ++ fsa_state2string(cur_state), source); + if(highest_seq < crm_peer_seq) { + /* Don't spam the FSA with duplicates */ + highest_seq = crm_peer_seq; +@@ -642,34 +680,53 @@ check_join_state(enum crmd_fsa_state cur_state, const char *source) + + } else if (cur_state == S_INTEGRATION) { + if (crmd_join_phase_count(crm_join_welcomed) == 0) { +- crm_debug("join-%d: Integration of %d peers complete: %s", +- current_join_id, crmd_join_phase_count(crm_join_integrated), source); ++ int count = crmd_join_phase_count(crm_join_integrated); ++ ++ crm_debug("join-%d: Integration of %d peer%s complete " ++ CRM_XS " state=%s for=%s", ++ current_join_id, count, pcmk__plural_s(count), ++ fsa_state2string(cur_state), source); + register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL); + return TRUE; + } + + } else if (cur_state == S_FINALIZE_JOIN) { + if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { +- crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id); ++ crm_debug("join-%d: Delaying finalization until we have CIB " ++ CRM_XS " state=%s for=%s", ++ current_join_id, fsa_state2string(cur_state), source); + return TRUE; + + } else if (crmd_join_phase_count(crm_join_welcomed) != 0) { +- crm_debug("join-%d: Still waiting on %d welcomed nodes", +- current_join_id, crmd_join_phase_count(crm_join_welcomed)); ++ int count = crmd_join_phase_count(crm_join_welcomed); ++ ++ crm_debug("join-%d: Still waiting on %d welcomed node%s " ++ CRM_XS " state=%s for=%s", ++ current_join_id, count, pcmk__plural_s(count), ++ fsa_state2string(cur_state), source); + crmd_join_phase_log(LOG_DEBUG); + + } else if (crmd_join_phase_count(crm_join_integrated) != 0) { +- crm_debug("join-%d: Still waiting on %d integrated nodes", +- current_join_id, crmd_join_phase_count(crm_join_integrated)); ++ int count = crmd_join_phase_count(crm_join_integrated); ++ ++ crm_debug("join-%d: Still waiting on %d integrated node%s " ++ CRM_XS " state=%s for=%s", ++ current_join_id, count, pcmk__plural_s(count), ++ fsa_state2string(cur_state), source); + crmd_join_phase_log(LOG_DEBUG); + + } else if (crmd_join_phase_count(crm_join_finalized) != 0) { +- crm_debug("join-%d: Still waiting on %d finalized nodes", +- current_join_id, crmd_join_phase_count(crm_join_finalized)); ++ int count = crmd_join_phase_count(crm_join_finalized); ++ ++ crm_debug("join-%d: Still waiting on %d finalized node%s " ++ CRM_XS " state=%s for=%s", ++ current_join_id, count, pcmk__plural_s(count), ++ fsa_state2string(cur_state), source); + crmd_join_phase_log(LOG_DEBUG); + + } else { +- crm_debug("join-%d complete: %s", current_join_id, source); ++ crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s", ++ current_join_id, fsa_state2string(cur_state), source); + register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL); + return TRUE; + } +-- +1.8.3.1 + + +From 034b27734d05e8aeddb586f2daaede8314f9516f Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 13 Dec 2019 10:39:34 -0600 +Subject: [PATCH 6/8] Log: controller: improve CIB status deletion messages + +--- + daemons/controld/controld_utils.c | 25 +++++++++++++++++-------- + 1 file changed, 17 insertions(+), 8 deletions(-) + +diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c +index 3acd488..bb8ace9 100644 +--- a/daemons/controld/controld_utils.c ++++ b/daemons/controld/controld_utils.c +@@ -751,14 +751,18 @@ update_dc(xmlNode * msg) + return TRUE; + } + +-#define STATUS_PATH_MAX 512 + static void + erase_xpath_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) + { + char *xpath = user_data; + +- do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE, +- "Deletion of \"%s\": %s (rc=%d)", xpath, pcmk_strerror(rc), rc); ++ if (rc == 0) { ++ crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded", ++ xpath, call_id); ++ } else { ++ crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s " ++ CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc); ++ } + } + + #define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s" +@@ -766,14 +770,19 @@ erase_xpath_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void + void + erase_status_tag(const char *uname, const char *tag, int options) + { +- if (fsa_cib_conn && uname) { ++ CRM_CHECK(uname != NULL, return); ++ ++ if (fsa_cib_conn == NULL) { ++ crm_warn("Unable to delete CIB '%s' section for node %s: " ++ "no CIB connection", tag, uname); ++ } else { + int call_id; + char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag); + +- crm_info("Deleting %s status entries for %s " CRM_XS " xpath=%s", +- tag, uname, xpath); +- call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, +- cib_quorum_override | cib_xpath | options); ++ options |= cib_quorum_override|cib_xpath; ++ call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options); ++ crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) " ++ CRM_XS " xpath=%s", tag, uname, call_id, xpath); + fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback); + // CIB library handles freeing xpath + } +-- +1.8.3.1 + + +From 73510818bc9905dcc130893198590b10c0067425 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 13 Dec 2019 10:36:56 -0600 +Subject: [PATCH 7/8] Refactor: controller: move erase_status_tag() to + controld_based.c + +--- + daemons/controld/controld_based.c | 38 ++++++++++++++++++++++++++++++++++++++ + daemons/controld/controld_utils.c | 37 ------------------------------------- + 2 files changed, 38 insertions(+), 37 deletions(-) + +diff --git a/daemons/controld/controld_based.c b/daemons/controld/controld_based.c +index e6a4612..1db5650 100644 +--- a/daemons/controld/controld_based.c ++++ b/daemons/controld/controld_based.c +@@ -168,3 +168,41 @@ controld_action_is_recordable(const char *action) + } + return TRUE; + } ++ ++static void ++erase_xpath_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, ++ void *user_data) ++{ ++ char *xpath = user_data; ++ ++ if (rc == 0) { ++ crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded", ++ xpath, call_id); ++ } else { ++ crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s " ++ CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc); ++ } ++} ++ ++#define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s" ++ ++void ++erase_status_tag(const char *uname, const char *tag, int options) ++{ ++ CRM_CHECK(uname != NULL, return); ++ ++ if (fsa_cib_conn == NULL) { ++ crm_warn("Unable to delete CIB '%s' section for node %s: " ++ "no CIB connection", tag, uname); ++ } else { ++ int call_id; ++ char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag); ++ ++ options |= cib_quorum_override|cib_xpath; ++ call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options); ++ crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) " ++ CRM_XS " xpath=%s", tag, uname, call_id, xpath); ++ fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback); ++ // CIB library handles freeing xpath ++ } ++} +diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c +index bb8ace9..4ed6aeb 100644 +--- a/daemons/controld/controld_utils.c ++++ b/daemons/controld/controld_utils.c +@@ -751,43 +751,6 @@ update_dc(xmlNode * msg) + return TRUE; + } + +-static void +-erase_xpath_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +-{ +- char *xpath = user_data; +- +- if (rc == 0) { +- crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded", +- xpath, call_id); +- } else { +- crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s " +- CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc); +- } +-} +- +-#define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s" +- +-void +-erase_status_tag(const char *uname, const char *tag, int options) +-{ +- CRM_CHECK(uname != NULL, return); +- +- if (fsa_cib_conn == NULL) { +- crm_warn("Unable to delete CIB '%s' section for node %s: " +- "no CIB connection", tag, uname); +- } else { +- int call_id; +- char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag); +- +- options |= cib_quorum_override|cib_xpath; +- call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options); +- crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) " +- CRM_XS " xpath=%s", tag, uname, call_id, xpath); +- fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback); +- // CIB library handles freeing xpath +- } +-} +- + void crmd_peer_down(crm_node_t *peer, bool full) + { + if(full && peer->state == NULL) { +-- +1.8.3.1 + + +From c4cc759e733db894957d039f65572cc21704224f Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 13 Dec 2019 11:16:25 -0600 +Subject: [PATCH 8/8] Refactor: controller: improve efficiency when deleting + node state + +Rename erase_status_xpath() to controld_delete_node_state() to follow current +naming practice. + +Instead of passing it a node_state subsection name, pass a new enum value +indicating what to erase (resource history, transient node attributes, or +both). This allows us to improve the log messages further, as well as improving +efficiency when both need to be cleared. +--- + daemons/controld/controld_based.c | 69 +++++++++++++++++++++++++++-------- + daemons/controld/controld_callbacks.c | 8 +++- + daemons/controld/controld_execd.c | 3 +- + daemons/controld/controld_fencing.c | 5 +-- + daemons/controld/controld_join_dc.c | 3 +- + daemons/controld/controld_remote_ra.c | 24 ++++++------ + daemons/controld/controld_utils.h | 11 +++++- + 7 files changed, 87 insertions(+), 36 deletions(-) + +diff --git a/daemons/controld/controld_based.c b/daemons/controld/controld_based.c +index 1db5650..008a02d 100644 +--- a/daemons/controld/controld_based.c ++++ b/daemons/controld/controld_based.c +@@ -170,39 +170,76 @@ controld_action_is_recordable(const char *action) + } + + static void +-erase_xpath_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, +- void *user_data) ++cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, ++ void *user_data) + { +- char *xpath = user_data; ++ char *desc = user_data; + + if (rc == 0) { +- crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded", +- xpath, call_id); ++ crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id); + } else { +- crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s " +- CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc); ++ crm_warn("Deletion of %s (via CIB call %d) failed: %s " CRM_XS " rc=%d", ++ desc, call_id, pcmk_strerror(rc), rc); + } + } + +-#define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s" ++// Searches for various portions of node_state to delete + ++// Match a particular node's node_state (takes node name 1x) ++#define XPATH_NODE_STATE "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" ++ ++// Node's lrm section (name 1x) ++#define XPATH_NODE_LRM XPATH_NODE_STATE "/" XML_CIB_TAG_LRM ++ ++// Node's transient_attributes section (name 1x) ++#define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS ++ ++// Everything under node_state (name 1x) ++#define XPATH_NODE_ALL XPATH_NODE_STATE "/*" ++ ++/*! ++ * \internal ++ * \brief Delete subsection of a node's CIB node_state ++ * ++ * \param[in] uname Desired node ++ * \param[in] section Subsection of node_state to delete ++ * \param[in] options CIB call options to use ++ */ + void +-erase_status_tag(const char *uname, const char *tag, int options) ++controld_delete_node_state(const char *uname, enum controld_section_e section, ++ int options) + { ++ char *xpath = NULL; ++ char *desc = NULL; ++ + CRM_CHECK(uname != NULL, return); ++ switch (section) { ++ case controld_section_lrm: ++ xpath = crm_strdup_printf(XPATH_NODE_LRM, uname); ++ desc = crm_strdup_printf("resource history for node %s", uname); ++ break; ++ case controld_section_attrs: ++ xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname); ++ desc = crm_strdup_printf("transient attributes for node %s", uname); ++ break; ++ case controld_section_all: ++ xpath = crm_strdup_printf(XPATH_NODE_ALL, uname); ++ desc = crm_strdup_printf("all state for node %s", uname); ++ break; ++ } + + if (fsa_cib_conn == NULL) { +- crm_warn("Unable to delete CIB '%s' section for node %s: " +- "no CIB connection", tag, uname); ++ crm_warn("Unable to delete %s: no CIB connection", desc); ++ free(desc); + } else { + int call_id; +- char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag); + + options |= cib_quorum_override|cib_xpath; + call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options); +- crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) " +- CRM_XS " xpath=%s", tag, uname, call_id, xpath); +- fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback); +- // CIB library handles freeing xpath ++ crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s", ++ desc, call_id, xpath); ++ fsa_register_cib_callback(call_id, FALSE, desc, cib_delete_callback); ++ // CIB library handles freeing desc + } ++ free(xpath); + } +diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c +index 5cbd392..f7e3db2 100644 +--- a/daemons/controld/controld_callbacks.c ++++ b/daemons/controld/controld_callbacks.c +@@ -200,14 +200,18 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d + * transient attributes intact until it rejoins. + */ + if (compare_version(fsa_our_dc_version, "3.0.9") > 0) { +- erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); ++ controld_delete_node_state(node->uname, ++ controld_section_attrs, ++ cib_scope_local); + } + + } else if(AM_I_DC) { + if (appeared) { + te_trigger_stonith_history_sync(FALSE); + } else { +- erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); ++ controld_delete_node_state(node->uname, ++ controld_section_attrs, ++ cib_scope_local); + } + } + break; +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 46c1958..b7deeae 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -1411,7 +1411,8 @@ force_reprobe(lrm_state_t *lrm_state, const char *from_sys, + } + + /* Now delete the copy in the CIB */ +- erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local); ++ controld_delete_node_state(lrm_state->node_name, controld_section_lrm, ++ cib_scope_local); + + /* Finally, _delete_ the value in pacemaker-attrd -- setting it to FALSE + * would result in the scheduler sending us back here again +diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c +index d9b1e1e..9897cf3 100644 +--- a/daemons/controld/controld_fencing.c ++++ b/daemons/controld/controld_fencing.c +@@ -229,9 +229,8 @@ send_stonith_update(crm_action_t *action, const char *target, const char *uuid) + /* Make sure it sticks */ + /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */ + +- erase_status_tag(peer->uname, XML_CIB_TAG_LRM, cib_scope_local); +- erase_status_tag(peer->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); +- ++ controld_delete_node_state(peer->uname, controld_section_all, ++ cib_scope_local); + free_xml(node_state); + return; + } +diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c +index 54324b2..ac6b430 100644 +--- a/daemons/controld/controld_join_dc.c ++++ b/daemons/controld/controld_join_dc.c +@@ -587,7 +587,8 @@ do_dc_join_ack(long long action, + /* Update CIB with node's current executor state. A new transition will be + * triggered later, when the CIB notifies us of the change. + */ +- erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local); ++ controld_delete_node_state(join_from, controld_section_lrm, ++ cib_scope_local); + if (safe_str_eq(join_from, fsa_our_uname)) { + xmlNode *now_dc_lrmd_state = do_lrm_query(TRUE, fsa_our_uname); + +diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c +index 4fbae45..2d3dfa7 100644 +--- a/daemons/controld/controld_remote_ra.c ++++ b/daemons/controld/controld_remote_ra.c +@@ -181,13 +181,13 @@ remote_node_up(const char *node_name) + CRM_CHECK(node_name != NULL, return); + crm_info("Announcing pacemaker_remote node %s", node_name); + +- /* Clear node's operation history. The node's transient attributes should +- * and normally will be cleared when the node leaves, but since remote node +- * state has a number of corner cases, clear them here as well, to be sure. ++ /* Clear node's entire state (resource history and transient attributes). ++ * The transient attributes should and normally will be cleared when the ++ * node leaves, but since remote node state has a number of corner cases, ++ * clear them here as well, to be sure. + */ + call_opt = crmd_cib_smart_opt(); +- erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt); +- erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt); ++ controld_delete_node_state(node_name, controld_section_all, call_opt); + + /* Clear node's probed attribute */ + update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE); +@@ -252,15 +252,15 @@ remote_node_down(const char *node_name, const enum down_opts opts) + /* Purge node from attrd's memory */ + update_attrd_remote_node_removed(node_name, NULL); + +- /* Purge node's transient attributes */ +- erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt); +- +- /* Normally, the LRM operation history should be kept until the node comes +- * back up. However, after a successful fence, we want to clear it, so we +- * don't think resources are still running on the node. ++ /* Normally, only node attributes should be erased, and the resource history ++ * should be kept until the node comes back up. However, after a successful ++ * fence, we want to clear the history as well, so we don't think resources ++ * are still running on the node. + */ + if (opts == DOWN_ERASE_LRM) { +- erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt); ++ controld_delete_node_state(node_name, controld_section_all, call_opt); ++ } else { ++ controld_delete_node_state(node_name, controld_section_attrs, call_opt); + } + + /* Ensure node is in the remote peer cache with lost state */ +diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h +index cf04f13..f902361 100644 +--- a/daemons/controld/controld_utils.h ++++ b/daemons/controld/controld_utils.h +@@ -70,7 +70,6 @@ xmlNode *create_node_state_update(crm_node_t *node, int flags, + xmlNode *parent, const char *source); + void populate_cib_nodes(enum node_update_flags flags, const char *source); + void crm_update_quorum(gboolean quorum, gboolean force_update); +-void erase_status_tag(const char *uname, const char *tag, int options); + void controld_close_attrd_ipc(void); + void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node); + void update_attrd_remote_node_removed(const char *host, const char *user_name); +@@ -87,6 +86,16 @@ unsigned int cib_op_timeout(void); + bool feature_set_compatible(const char *dc_version, const char *join_version); + bool controld_action_is_recordable(const char *action); + ++// Subsections of node_state ++enum controld_section_e { ++ controld_section_lrm, ++ controld_section_attrs, ++ controld_section_all, ++}; ++ ++void controld_delete_node_state(const char *uname, ++ enum controld_section_e section, int options); ++ + const char *get_node_id(xmlNode *lrm_rsc_op); + + /* Convenience macro for registering a CIB callback +-- +1.8.3.1 + diff --git a/SOURCES/003-return-codes.patch b/SOURCES/003-return-codes.patch new file mode 100644 index 0000000..e4448af --- /dev/null +++ b/SOURCES/003-return-codes.patch @@ -0,0 +1,908 @@ +From 55ebd895ba2c64713c3db2590ffe22c15b8563e3 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 13 Dec 2019 16:05:05 -0600 +Subject: [PATCH] Refactor: libcrmcommon: introduce new set of return codes + +Since we plan to introduce a high-level public API, it's a good time to +introduce some best practices. + +Most Pacemaker API functions currently return an integer return code, such that +its absolute value is either a system error number or a custom pcmk_err_* +number. This is less than ideal because system error numbers are constrained +only to the positive int range, so there's the possibility (though not noticed +in the wild) that system errors and custom errors could collide. + +The new method being introduced here still uses an integer return code, +but negative values are from a new enumeration, and positive values are +system error numbers. 0 still represents success. + +It is expected that the new method will be used with new functions, and +existing internal functions will be gradually refactored to use it as well. +Existing public API functions can be addressed at the next backward +compatibility break (2.1.0). +--- + include/crm/common/results.h | 59 ++++- + lib/common/results.c | 536 ++++++++++++++++++++++++++++++------------- + tools/crm_error.c | 100 +++++--- + 3 files changed, 493 insertions(+), 202 deletions(-) + +diff --git a/include/crm/common/results.h b/include/crm/common/results.h +index 7a32110..b29a016 100644 +--- a/include/crm/common/results.h ++++ b/include/crm/common/results.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2012-2019 the Pacemaker project contributors ++ * Copyright 2012-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -49,11 +49,21 @@ extern "C" { + /* + * Function return codes + * ++ * Most Pacemaker API functions return an integer return code. There are two ++ * alternative interpretations. The legacy interpration is that the absolute ++ * value of the return code is either a system error number or a custom ++ * pcmk_err_* number. This is less than ideal because system error numbers are ++ * constrained only to the positive int range, so there's the possibility ++ * (though not noticed in the wild) that system errors and custom errors could ++ * collide. The new intepretation is that negative values are from the pcmk_rc_e ++ * enum, and positive values are system error numbers. Both use 0 for success. ++ * + * For system error codes, see: + * - /usr/include/asm-generic/errno.h + * - /usr/include/asm-generic/errno-base.h + */ + ++// Legacy custom return codes for Pacemaker API functions (deprecated) + # define pcmk_ok 0 + # define PCMK_ERROR_OFFSET 190 /* Replacements on non-linux systems, see include/portability.h */ + # define PCMK_CUSTOM_OFFSET 200 /* Purely custom codes */ +@@ -75,6 +85,48 @@ extern "C" { + # define pcmk_err_bad_nvpair 216 + # define pcmk_err_unknown_format 217 + ++/*! ++ * \enum pcmk_rc_e ++ * \brief Return codes for Pacemaker API functions ++ * ++ * Any Pacemaker API function documented as returning a "standard Pacemaker ++ * return code" will return pcmk_rc_ok (0) on success, and one of this ++ * enumeration's other (negative) values or a (positive) system error number ++ * otherwise. The custom codes are at -1001 and lower, so that the caller may ++ * use -1 through -1000 for their own custom values if desired. While generally ++ * referred to as "errors", nonzero values simply indicate a result, which might ++ * or might not be an error depending on the calling context. ++ */ ++enum pcmk_rc_e { ++ /* When adding new values, use consecutively lower numbers, update the array ++ * in lib/common/results.c, and test with crm_error. ++ */ ++ pcmk_rc_no_quorum = -1017, ++ pcmk_rc_schema_validation = -1016, ++ pcmk_rc_schema_unchanged = -1015, ++ pcmk_rc_transform_failed = -1014, ++ pcmk_rc_old_data = -1013, ++ pcmk_rc_diff_failed = -1012, ++ pcmk_rc_diff_resync = -1011, ++ pcmk_rc_cib_modified = -1010, ++ pcmk_rc_cib_backup = -1009, ++ pcmk_rc_cib_save = -1008, ++ pcmk_rc_cib_corrupt = -1007, ++ pcmk_rc_multiple = -1006, ++ pcmk_rc_node_unknown = -1005, ++ pcmk_rc_already = -1004, ++ pcmk_rc_bad_nvpair = -1003, ++ pcmk_rc_unknown_format = -1002, ++ // Developers: Use a more specific code than pcmk_rc_error whenever possible ++ pcmk_rc_error = -1001, ++ ++ // Values -1 through -1000 reserved for caller use ++ ++ pcmk_rc_ok = 0 ++ ++ // Positive values reserved for system error numbers ++}; ++ + /* + * Exit status codes + * +@@ -150,6 +202,11 @@ typedef enum crm_exit_e { + CRM_EX_MAX = 255, // ensure crm_exit_t can hold this + } crm_exit_t; + ++const char *pcmk_rc_name(int rc); ++const char *pcmk_rc_str(int rc); ++crm_exit_t pcmk_rc2exitc(int rc); ++int pcmk_rc2legacy(int rc); ++int pcmk_legacy2rc(int legacy_rc); + const char *pcmk_strerror(int rc); + const char *pcmk_errorname(int rc); + const char *bz2_strerror(int rc); +diff --git a/lib/common/results.c b/lib/common/results.c +index b80191c..189648f 100644 +--- a/lib/common/results.c ++++ b/lib/common/results.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2019 the Pacemaker project contributors ++ * Copyright 2004-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -22,148 +22,14 @@ + #include + #include + ++// @COMPAT Legacy function return codes ++ ++//! \deprecated Use standard return codes and pcmk_rc_name() instead + const char * + pcmk_errorname(int rc) + { +- int error = abs(rc); +- +- switch (error) { +- case E2BIG: return "E2BIG"; +- case EACCES: return "EACCES"; +- case EADDRINUSE: return "EADDRINUSE"; +- case EADDRNOTAVAIL: return "EADDRNOTAVAIL"; +- case EAFNOSUPPORT: return "EAFNOSUPPORT"; +- case EAGAIN: return "EAGAIN"; +- case EALREADY: return "EALREADY"; +- case EBADF: return "EBADF"; +- case EBADMSG: return "EBADMSG"; +- case EBUSY: return "EBUSY"; +- case ECANCELED: return "ECANCELED"; +- case ECHILD: return "ECHILD"; +- case ECOMM: return "ECOMM"; +- case ECONNABORTED: return "ECONNABORTED"; +- case ECONNREFUSED: return "ECONNREFUSED"; +- case ECONNRESET: return "ECONNRESET"; +- /* case EDEADLK: return "EDEADLK"; */ +- case EDESTADDRREQ: return "EDESTADDRREQ"; +- case EDOM: return "EDOM"; +- case EDQUOT: return "EDQUOT"; +- case EEXIST: return "EEXIST"; +- case EFAULT: return "EFAULT"; +- case EFBIG: return "EFBIG"; +- case EHOSTDOWN: return "EHOSTDOWN"; +- case EHOSTUNREACH: return "EHOSTUNREACH"; +- case EIDRM: return "EIDRM"; +- case EILSEQ: return "EILSEQ"; +- case EINPROGRESS: return "EINPROGRESS"; +- case EINTR: return "EINTR"; +- case EINVAL: return "EINVAL"; +- case EIO: return "EIO"; +- case EISCONN: return "EISCONN"; +- case EISDIR: return "EISDIR"; +- case ELIBACC: return "ELIBACC"; +- case ELOOP: return "ELOOP"; +- case EMFILE: return "EMFILE"; +- case EMLINK: return "EMLINK"; +- case EMSGSIZE: return "EMSGSIZE"; +-#ifdef EMULTIHOP // Not available on OpenBSD +- case EMULTIHOP: return "EMULTIHOP"; +-#endif +- case ENAMETOOLONG: return "ENAMETOOLONG"; +- case ENETDOWN: return "ENETDOWN"; +- case ENETRESET: return "ENETRESET"; +- case ENETUNREACH: return "ENETUNREACH"; +- case ENFILE: return "ENFILE"; +- case ENOBUFS: return "ENOBUFS"; +- case ENODATA: return "ENODATA"; +- case ENODEV: return "ENODEV"; +- case ENOENT: return "ENOENT"; +- case ENOEXEC: return "ENOEXEC"; +- case ENOKEY: return "ENOKEY"; +- case ENOLCK: return "ENOLCK"; +-#ifdef ENOLINK // Not available on OpenBSD +- case ENOLINK: return "ENOLINK"; +-#endif +- case ENOMEM: return "ENOMEM"; +- case ENOMSG: return "ENOMSG"; +- case ENOPROTOOPT: return "ENOPROTOOPT"; +- case ENOSPC: return "ENOSPC"; +- case ENOSR: return "ENOSR"; +- case ENOSTR: return "ENOSTR"; +- case ENOSYS: return "ENOSYS"; +- case ENOTBLK: return "ENOTBLK"; +- case ENOTCONN: return "ENOTCONN"; +- case ENOTDIR: return "ENOTDIR"; +- case ENOTEMPTY: return "ENOTEMPTY"; +- case ENOTSOCK: return "ENOTSOCK"; +- /* case ENOTSUP: return "ENOTSUP"; */ +- case ENOTTY: return "ENOTTY"; +- case ENOTUNIQ: return "ENOTUNIQ"; +- case ENXIO: return "ENXIO"; +- case EOPNOTSUPP: return "EOPNOTSUPP"; +- case EOVERFLOW: return "EOVERFLOW"; +- case EPERM: return "EPERM"; +- case EPFNOSUPPORT: return "EPFNOSUPPORT"; +- case EPIPE: return "EPIPE"; +- case EPROTO: return "EPROTO"; +- case EPROTONOSUPPORT: return "EPROTONOSUPPORT"; +- case EPROTOTYPE: return "EPROTOTYPE"; +- case ERANGE: return "ERANGE"; +- case EREMOTE: return "EREMOTE"; +- case EREMOTEIO: return "EREMOTEIO"; +- +- case EROFS: return "EROFS"; +- case ESHUTDOWN: return "ESHUTDOWN"; +- case ESPIPE: return "ESPIPE"; +- case ESOCKTNOSUPPORT: return "ESOCKTNOSUPPORT"; +- case ESRCH: return "ESRCH"; +- case ESTALE: return "ESTALE"; +- case ETIME: return "ETIME"; +- case ETIMEDOUT: return "ETIMEDOUT"; +- case ETXTBSY: return "ETXTBSY"; +- case EUNATCH: return "EUNATCH"; +- case EUSERS: return "EUSERS"; +- /* case EWOULDBLOCK: return "EWOULDBLOCK"; */ +- case EXDEV: return "EXDEV"; +- +-#ifdef EBADE +- /* Not available on OSX */ +- case EBADE: return "EBADE"; +- case EBADFD: return "EBADFD"; +- case EBADSLT: return "EBADSLT"; +- case EDEADLOCK: return "EDEADLOCK"; +- case EBADR: return "EBADR"; +- case EBADRQC: return "EBADRQC"; +- case ECHRNG: return "ECHRNG"; +-#ifdef EISNAM /* Not available on Illumos/Solaris */ +- case EISNAM: return "EISNAM"; +- case EKEYEXPIRED: return "EKEYEXPIRED"; +- case EKEYREJECTED: return "EKEYREJECTED"; +- case EKEYREVOKED: return "EKEYREVOKED"; +-#endif +- case EL2HLT: return "EL2HLT"; +- case EL2NSYNC: return "EL2NSYNC"; +- case EL3HLT: return "EL3HLT"; +- case EL3RST: return "EL3RST"; +- case ELIBBAD: return "ELIBBAD"; +- case ELIBMAX: return "ELIBMAX"; +- case ELIBSCN: return "ELIBSCN"; +- case ELIBEXEC: return "ELIBEXEC"; +-#ifdef ENOMEDIUM /* Not available on Illumos/Solaris */ +- case ENOMEDIUM: return "ENOMEDIUM"; +- case EMEDIUMTYPE: return "EMEDIUMTYPE"; +-#endif +- case ENONET: return "ENONET"; +- case ENOPKG: return "ENOPKG"; +- case EREMCHG: return "EREMCHG"; +- case ERESTART: return "ERESTART"; +- case ESTRPIPE: return "ESTRPIPE"; +-#ifdef EUCLEAN /* Not available on Illumos/Solaris */ +- case EUCLEAN: return "EUCLEAN"; +-#endif +- case EXFULL: return "EXFULL"; +-#endif +- ++ rc = abs(rc); ++ switch (rc) { + case pcmk_err_generic: return "pcmk_err_generic"; + case pcmk_err_no_quorum: return "pcmk_err_no_quorum"; + case pcmk_err_schema_validation: return "pcmk_err_schema_validation"; +@@ -180,24 +46,26 @@ pcmk_errorname(int rc) + case pcmk_err_already: return "pcmk_err_already"; + case pcmk_err_bad_nvpair: return "pcmk_err_bad_nvpair"; + case pcmk_err_unknown_format: return "pcmk_err_unknown_format"; ++ default: return pcmk_rc_name(rc); // system errno + } +- return "Unknown"; + } + ++//! \deprecated Use standard return codes and pcmk_rc_str() instead + const char * + pcmk_strerror(int rc) + { +- int error = abs(rc); +- +- if (error == 0) { ++ if (rc == 0) { + return "OK"; ++ } + +- // Of course error > 0 ... unless someone passed INT_MIN as rc +- } else if ((error > 0) && (error < PCMK_ERROR_OFFSET)) { +- return strerror(error); ++ rc = abs(rc); ++ ++ // Of course rc > 0 ... unless someone passed INT_MIN as rc ++ if ((rc > 0) && (rc < PCMK_ERROR_OFFSET)) { ++ return strerror(rc); + } + +- switch (error) { ++ switch (rc) { + case pcmk_err_generic: + return "Generic Pacemaker error"; + case pcmk_err_no_quorum: +@@ -253,11 +121,313 @@ pcmk_strerror(int rc) + case ENOKEY: + return "Required key not available"; + } +- + crm_err("Unknown error code: %d", rc); + return "Unknown error"; + } + ++// Standard Pacemaker API return codes ++ ++/* This array is used only for nonzero values of pcmk_rc_e. Its values must be ++ * kept in the exact reverse order of the enum value numbering (i.e. add new ++ * values to the end of the array). ++ */ ++static struct pcmk__rc_info { ++ const char *name; ++ const char *desc; ++ int legacy_rc; ++} pcmk__rcs[] = { ++ { "pcmk_rc_error", ++ "Error", ++ -pcmk_err_generic, ++ }, ++ { "pcmk_rc_unknown_format", ++ "Unknown output format", ++ -pcmk_err_unknown_format, ++ }, ++ { "pcmk_rc_bad_nvpair", ++ "Bad name/value pair given", ++ -pcmk_err_bad_nvpair, ++ }, ++ { "pcmk_rc_already", ++ "Already in requested state", ++ -pcmk_err_already, ++ }, ++ { "pcmk_rc_node_unknown", ++ "Node not found", ++ -pcmk_err_node_unknown, ++ }, ++ { "pcmk_rc_multiple", ++ "Resource active on multiple nodes", ++ -pcmk_err_multiple, ++ }, ++ { "pcmk_rc_cib_corrupt", ++ "Could not parse on-disk configuration", ++ -pcmk_err_cib_corrupt, ++ }, ++ { "pcmk_rc_cib_save", ++ "Could not save new configuration to disk", ++ -pcmk_err_cib_save, ++ }, ++ { "pcmk_rc_cib_backup", ++ "Could not archive previous configuration", ++ -pcmk_err_cib_backup, ++ }, ++ { "pcmk_rc_cib_modified", ++ "On-disk configuration was manually modified", ++ -pcmk_err_cib_modified, ++ }, ++ { "pcmk_rc_diff_resync", ++ "Application of update diff failed, requesting full refresh", ++ -pcmk_err_diff_resync, ++ }, ++ { "pcmk_rc_diff_failed", ++ "Application of update diff failed", ++ -pcmk_err_diff_failed, ++ }, ++ { "pcmk_rc_old_data", ++ "Update was older than existing configuration", ++ -pcmk_err_old_data, ++ }, ++ { "pcmk_rc_transform_failed", ++ "Schema transform failed", ++ -pcmk_err_transform_failed, ++ }, ++ { "pcmk_rc_schema_unchanged", ++ "Schema is already the latest available", ++ -pcmk_err_schema_unchanged, ++ }, ++ { "pcmk_rc_schema_validation", ++ "Update does not conform to the configured schema", ++ -pcmk_err_schema_validation, ++ }, ++ { "pcmk_rc_no_quorum", ++ "Operation requires quorum", ++ -pcmk_err_no_quorum, ++ }, ++}; ++ ++#define PCMK__N_RC (sizeof(pcmk__rcs) / sizeof(struct pcmk__rc_info)) ++ ++/*! ++ * \brief Get a return code constant name as a string ++ * ++ * \param[in] rc Integer return code to convert ++ * ++ * \return String of constant name corresponding to rc ++ */ ++const char * ++pcmk_rc_name(int rc) ++{ ++ if ((rc <= pcmk_rc_error) && ((pcmk_rc_error - rc) < PCMK__N_RC)) { ++ return pcmk__rcs[pcmk_rc_error - rc].name; ++ } ++ switch (rc) { ++ case pcmk_rc_ok: return "pcmk_rc_ok"; ++ case E2BIG: return "E2BIG"; ++ case EACCES: return "EACCES"; ++ case EADDRINUSE: return "EADDRINUSE"; ++ case EADDRNOTAVAIL: return "EADDRNOTAVAIL"; ++ case EAFNOSUPPORT: return "EAFNOSUPPORT"; ++ case EAGAIN: return "EAGAIN"; ++ case EALREADY: return "EALREADY"; ++ case EBADF: return "EBADF"; ++ case EBADMSG: return "EBADMSG"; ++ case EBUSY: return "EBUSY"; ++ case ECANCELED: return "ECANCELED"; ++ case ECHILD: return "ECHILD"; ++ case ECOMM: return "ECOMM"; ++ case ECONNABORTED: return "ECONNABORTED"; ++ case ECONNREFUSED: return "ECONNREFUSED"; ++ case ECONNRESET: return "ECONNRESET"; ++ /* case EDEADLK: return "EDEADLK"; */ ++ case EDESTADDRREQ: return "EDESTADDRREQ"; ++ case EDOM: return "EDOM"; ++ case EDQUOT: return "EDQUOT"; ++ case EEXIST: return "EEXIST"; ++ case EFAULT: return "EFAULT"; ++ case EFBIG: return "EFBIG"; ++ case EHOSTDOWN: return "EHOSTDOWN"; ++ case EHOSTUNREACH: return "EHOSTUNREACH"; ++ case EIDRM: return "EIDRM"; ++ case EILSEQ: return "EILSEQ"; ++ case EINPROGRESS: return "EINPROGRESS"; ++ case EINTR: return "EINTR"; ++ case EINVAL: return "EINVAL"; ++ case EIO: return "EIO"; ++ case EISCONN: return "EISCONN"; ++ case EISDIR: return "EISDIR"; ++ case ELIBACC: return "ELIBACC"; ++ case ELOOP: return "ELOOP"; ++ case EMFILE: return "EMFILE"; ++ case EMLINK: return "EMLINK"; ++ case EMSGSIZE: return "EMSGSIZE"; ++#ifdef EMULTIHOP // Not available on OpenBSD ++ case EMULTIHOP: return "EMULTIHOP"; ++#endif ++ case ENAMETOOLONG: return "ENAMETOOLONG"; ++ case ENETDOWN: return "ENETDOWN"; ++ case ENETRESET: return "ENETRESET"; ++ case ENETUNREACH: return "ENETUNREACH"; ++ case ENFILE: return "ENFILE"; ++ case ENOBUFS: return "ENOBUFS"; ++ case ENODATA: return "ENODATA"; ++ case ENODEV: return "ENODEV"; ++ case ENOENT: return "ENOENT"; ++ case ENOEXEC: return "ENOEXEC"; ++ case ENOKEY: return "ENOKEY"; ++ case ENOLCK: return "ENOLCK"; ++#ifdef ENOLINK // Not available on OpenBSD ++ case ENOLINK: return "ENOLINK"; ++#endif ++ case ENOMEM: return "ENOMEM"; ++ case ENOMSG: return "ENOMSG"; ++ case ENOPROTOOPT: return "ENOPROTOOPT"; ++ case ENOSPC: return "ENOSPC"; ++ case ENOSR: return "ENOSR"; ++ case ENOSTR: return "ENOSTR"; ++ case ENOSYS: return "ENOSYS"; ++ case ENOTBLK: return "ENOTBLK"; ++ case ENOTCONN: return "ENOTCONN"; ++ case ENOTDIR: return "ENOTDIR"; ++ case ENOTEMPTY: return "ENOTEMPTY"; ++ case ENOTSOCK: return "ENOTSOCK"; ++#if ENOTSUP != EOPNOTSUPP ++ case ENOTSUP: return "ENOTSUP"; ++#endif ++ case ENOTTY: return "ENOTTY"; ++ case ENOTUNIQ: return "ENOTUNIQ"; ++ case ENXIO: return "ENXIO"; ++ case EOPNOTSUPP: return "EOPNOTSUPP"; ++ case EOVERFLOW: return "EOVERFLOW"; ++ case EPERM: return "EPERM"; ++ case EPFNOSUPPORT: return "EPFNOSUPPORT"; ++ case EPIPE: return "EPIPE"; ++ case EPROTO: return "EPROTO"; ++ case EPROTONOSUPPORT: return "EPROTONOSUPPORT"; ++ case EPROTOTYPE: return "EPROTOTYPE"; ++ case ERANGE: return "ERANGE"; ++ case EREMOTE: return "EREMOTE"; ++ case EREMOTEIO: return "EREMOTEIO"; ++ case EROFS: return "EROFS"; ++ case ESHUTDOWN: return "ESHUTDOWN"; ++ case ESPIPE: return "ESPIPE"; ++ case ESOCKTNOSUPPORT: return "ESOCKTNOSUPPORT"; ++ case ESRCH: return "ESRCH"; ++ case ESTALE: return "ESTALE"; ++ case ETIME: return "ETIME"; ++ case ETIMEDOUT: return "ETIMEDOUT"; ++ case ETXTBSY: return "ETXTBSY"; ++ case EUNATCH: return "EUNATCH"; ++ case EUSERS: return "EUSERS"; ++ /* case EWOULDBLOCK: return "EWOULDBLOCK"; */ ++ case EXDEV: return "EXDEV"; ++ ++#ifdef EBADE // Not available on OS X ++ case EBADE: return "EBADE"; ++ case EBADFD: return "EBADFD"; ++ case EBADSLT: return "EBADSLT"; ++ case EDEADLOCK: return "EDEADLOCK"; ++ case EBADR: return "EBADR"; ++ case EBADRQC: return "EBADRQC"; ++ case ECHRNG: return "ECHRNG"; ++#ifdef EISNAM // Not available on OS X, Illumos, Solaris ++ case EISNAM: return "EISNAM"; ++ case EKEYEXPIRED: return "EKEYEXPIRED"; ++ case EKEYREJECTED: return "EKEYREJECTED"; ++ case EKEYREVOKED: return "EKEYREVOKED"; ++#endif ++ case EL2HLT: return "EL2HLT"; ++ case EL2NSYNC: return "EL2NSYNC"; ++ case EL3HLT: return "EL3HLT"; ++ case EL3RST: return "EL3RST"; ++ case ELIBBAD: return "ELIBBAD"; ++ case ELIBMAX: return "ELIBMAX"; ++ case ELIBSCN: return "ELIBSCN"; ++ case ELIBEXEC: return "ELIBEXEC"; ++#ifdef ENOMEDIUM // Not available on OS X, Illumos, Solaris ++ case ENOMEDIUM: return "ENOMEDIUM"; ++ case EMEDIUMTYPE: return "EMEDIUMTYPE"; ++#endif ++ case ENONET: return "ENONET"; ++ case ENOPKG: return "ENOPKG"; ++ case EREMCHG: return "EREMCHG"; ++ case ERESTART: return "ERESTART"; ++ case ESTRPIPE: return "ESTRPIPE"; ++#ifdef EUCLEAN // Not available on OS X, Illumos, Solaris ++ case EUCLEAN: return "EUCLEAN"; ++#endif ++ case EXFULL: return "EXFULL"; ++#endif // EBADE ++ default: return "Unknown"; ++ } ++} ++ ++/*! ++ * \brief Get a user-friendly description of a return code ++ * ++ * \param[in] rc Integer return code to convert ++ * ++ * \return String description of rc ++ */ ++const char * ++pcmk_rc_str(int rc) ++{ ++ if (rc == pcmk_rc_ok) { ++ return "OK"; ++ } ++ if ((rc <= pcmk_rc_error) && ((pcmk_rc_error - rc) < PCMK__N_RC)) { ++ return pcmk__rcs[pcmk_rc_error - rc].desc; ++ } ++ if (rc < 0) { ++ return "Unknown error"; ++ } ++ return strerror(rc); ++} ++ ++// This returns negative values for errors ++//! \deprecated Use standard return codes instead ++int ++pcmk_rc2legacy(int rc) ++{ ++ if (rc >= 0) { ++ return -rc; // OK or system errno ++ } ++ if ((rc <= pcmk_rc_error) && ((pcmk_rc_error - rc) < PCMK__N_RC)) { ++ return pcmk__rcs[pcmk_rc_error - rc].legacy_rc; ++ } ++ return -pcmk_err_generic; ++} ++ ++//! \deprecated Use standard return codes instead ++int ++pcmk_legacy2rc(int legacy_rc) ++{ ++ legacy_rc = abs(legacy_rc); ++ switch (legacy_rc) { ++ case pcmk_err_no_quorum: return pcmk_rc_no_quorum; ++ case pcmk_err_schema_validation: return pcmk_rc_schema_validation; ++ case pcmk_err_schema_unchanged: return pcmk_rc_schema_unchanged; ++ case pcmk_err_transform_failed: return pcmk_rc_transform_failed; ++ case pcmk_err_old_data: return pcmk_rc_old_data; ++ case pcmk_err_diff_failed: return pcmk_rc_diff_failed; ++ case pcmk_err_diff_resync: return pcmk_rc_diff_resync; ++ case pcmk_err_cib_modified: return pcmk_rc_cib_modified; ++ case pcmk_err_cib_backup: return pcmk_rc_cib_backup; ++ case pcmk_err_cib_save: return pcmk_rc_cib_save; ++ case pcmk_err_cib_corrupt: return pcmk_rc_cib_corrupt; ++ case pcmk_err_multiple: return pcmk_rc_multiple; ++ case pcmk_err_node_unknown: return pcmk_rc_node_unknown; ++ case pcmk_err_already: return pcmk_rc_already; ++ case pcmk_err_bad_nvpair: return pcmk_rc_bad_nvpair; ++ case pcmk_err_unknown_format: return pcmk_rc_unknown_format; ++ case pcmk_err_generic: return pcmk_rc_error; ++ case pcmk_ok: return pcmk_rc_ok; ++ default: return legacy_rc; // system errno ++ } ++} ++ ++// Exit status codes ++ + const char * + crm_exit_name(crm_exit_t exit_code) + { +@@ -347,26 +517,17 @@ crm_exit_str(crm_exit_t exit_code) + case CRM_EX_TIMEOUT: return "Timeout occurred"; + case CRM_EX_MAX: return "Error occurred"; + } +- if (exit_code > 128) { ++ if ((exit_code > 128) && (exit_code < CRM_EX_MAX)) { + return "Interrupted by signal"; + } + return "Unknown exit status"; + } + +-/*! +- * \brief Map an errno to a similar exit status +- * +- * \param[in] errno Error number to map +- * +- * \return Exit status corresponding to errno +- */ ++//! \deprecated Use standard return codes and pcmk_rc2exitc() instead + crm_exit_t + crm_errno2exit(int rc) + { + rc = abs(rc); // Convenience for functions that return -errno +- if (rc == EOPNOTSUPP) { +- rc = ENOTSUP; // Values are same on Linux, can't use both in case +- } + switch (rc) { + case pcmk_ok: + return CRM_EX_OK; +@@ -384,6 +545,48 @@ crm_errno2exit(int rc) + case pcmk_err_bad_nvpair: + return CRM_EX_INVALID_PARAM; + ++ case pcmk_err_already: ++ return CRM_EX_EXISTS; ++ ++ case pcmk_err_multiple: ++ return CRM_EX_MULTIPLE; ++ ++ case pcmk_err_node_unknown: ++ case pcmk_err_unknown_format: ++ return CRM_EX_NOSUCH; ++ ++ default: ++ return pcmk_rc2exitc(rc); // system errno ++ } ++} ++ ++/*! ++ * \brief Map a function return code to the most similar exit code ++ * ++ * \param[in] rc Function return code ++ * ++ * \return Most similar exit code ++ */ ++crm_exit_t ++pcmk_rc2exitc(int rc) ++{ ++ switch (rc) { ++ case pcmk_rc_ok: ++ return CRM_EX_OK; ++ ++ case pcmk_rc_no_quorum: ++ return CRM_EX_QUORUM; ++ ++ case pcmk_rc_old_data: ++ return CRM_EX_OLD; ++ ++ case pcmk_rc_schema_validation: ++ case pcmk_rc_transform_failed: ++ return CRM_EX_CONFIG; ++ ++ case pcmk_rc_bad_nvpair: ++ return CRM_EX_INVALID_PARAM; ++ + case EACCES: + return CRM_EX_INSUFFICIENT_PRIV; + +@@ -414,22 +617,25 @@ crm_errno2exit(int rc) + return CRM_EX_DISCONNECT; + + case EEXIST: +- case pcmk_err_already: ++ case pcmk_rc_already: + return CRM_EX_EXISTS; + + case EIO: + return CRM_EX_IOERR; + + case ENOTSUP: ++#if EOPNOTSUPP != ENOTSUP ++ case EOPNOTSUPP: ++#endif + return CRM_EX_UNIMPLEMENT_FEATURE; + + case ENOTUNIQ: +- case pcmk_err_multiple: ++ case pcmk_rc_multiple: + return CRM_EX_MULTIPLE; + + case ENXIO: +- case pcmk_err_node_unknown: +- case pcmk_err_unknown_format: ++ case pcmk_rc_node_unknown: ++ case pcmk_rc_unknown_format: + return CRM_EX_NOSUCH; + + case ETIME: +@@ -441,6 +647,8 @@ crm_errno2exit(int rc) + } + } + ++// Other functions ++ + const char * + bz2_strerror(int rc) + { +diff --git a/tools/crm_error.c b/tools/crm_error.c +index f6dc73c..0dcae05 100644 +--- a/tools/crm_error.c ++++ b/tools/crm_error.c +@@ -1,21 +1,10 @@ +-/* +- * Copyright 2012-2018 the Pacemaker project contributors ++/* ++ * Copyright 2012-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public +- * License as published by the Free Software Foundation; either +- * version 2 of the License, or (at your option) any later version. +- * +- * This software is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * General Public License for more details. +- * +- * You should have received a copy of the GNU General Public +- * License along with this library; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * This source code is licensed under the GNU General Public License version 2 ++ * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + + #include +@@ -33,12 +22,31 @@ static struct crm_option long_options[] = { + "\n\t\t\tUseful for looking for sources of the error in source code"}, + + {"list", 0, 0, 'l', "\tShow all known errors."}, +- {"exit", 0, 0, 'X', "\tInterpret as exit code rather than function return value"}, ++ {"exit", 0, 0, 'X', "\tInterpret as exit code rather than legacy function return value"}, ++ {"rc", 0, 0, 'r', "\tInterpret as return code rather than legacy function return value"}, + + {0, 0, 0, 0} + }; + /* *INDENT-ON* */ + ++static bool as_exit_code = false; ++static bool as_rc = false; ++ ++static void ++get_strings(int rc, const char **name, const char **str) ++{ ++ if (as_exit_code) { ++ *str = crm_exit_str((crm_exit_t) rc); ++ *name = crm_exit_name(rc); ++ } else if (as_rc) { ++ *str = pcmk_rc_str(rc); ++ *name = pcmk_rc_name(rc); ++ } else { ++ *str = pcmk_strerror(rc); ++ *name = pcmk_errorname(rc); ++ } ++} ++ + int + main(int argc, char **argv) + { +@@ -49,10 +57,12 @@ main(int argc, char **argv) + + bool do_list = FALSE; + bool with_name = FALSE; +- bool as_exit_code = FALSE; ++ ++ const char *name = NULL; ++ const char *desc = NULL; + + crm_log_cli_init("crm_error"); +- crm_set_options(NULL, "[options] -- rc", long_options, ++ crm_set_options(NULL, "[options] -- [...]", long_options, + "Tool for displaying the textual name or description of a reported error code"); + + while (flag >= 0) { +@@ -73,6 +83,9 @@ main(int argc, char **argv) + case 'l': + do_list = TRUE; + break; ++ case 'r': ++ as_rc = true; ++ break; + case 'X': + as_exit_code = TRUE; + break; +@@ -83,30 +96,43 @@ main(int argc, char **argv) + } + + if(do_list) { +- for (rc = 0; rc < 256; rc++) { +- const char *name = as_exit_code? crm_exit_name(rc) : pcmk_errorname(rc); +- const char *desc = as_exit_code? crm_exit_str(rc) : pcmk_strerror(rc); ++ int start, end, width; ++ ++ // 256 is a hacky magic number that "should" be enough ++ if (as_rc) { ++ start = pcmk_rc_error - 256; ++ end = PCMK_CUSTOM_OFFSET; ++ width = 4; ++ } else { ++ start = 0; ++ end = 256; ++ width = 3; ++ } ++ ++ for (rc = start; rc < end; rc++) { ++ if (rc == (pcmk_rc_error + 1)) { ++ // Values in between are reserved for callers, no use iterating ++ rc = pcmk_rc_ok; ++ } ++ get_strings(rc, &name, &desc); + if (!name || !strcmp(name, "Unknown") || !strcmp(name, "CRM_EX_UNKNOWN")) { +- /* Unknown */ ++ // Undefined + } else if(with_name) { +- printf("%.3d: %-26s %s\n", rc, name, desc); ++ printf("% .*d: %-26s %s\n", width, rc, name, desc); + } else { +- printf("%.3d: %s\n", rc, desc); ++ printf("% .*d: %s\n", width, rc, desc); + } + } +- return CRM_EX_OK; +- } + +- for (lpc = optind; lpc < argc; lpc++) { +- const char *str, *name; +- +- rc = crm_atoi(argv[lpc], NULL); +- str = as_exit_code? crm_exit_str(rc) : pcmk_strerror(rc); +- if(with_name) { +- name = as_exit_code? crm_exit_name(rc) : pcmk_errorname(rc); +- printf("%s - %s\n", name, str); +- } else { +- printf("%s\n", str); ++ } else { ++ for (lpc = optind; lpc < argc; lpc++) { ++ rc = crm_atoi(argv[lpc], NULL); ++ get_strings(rc, &name, &desc); ++ if (with_name) { ++ printf("%s - %s\n", name, desc); ++ } else { ++ printf("%s\n", desc); ++ } + } + } + return CRM_EX_OK; +-- +1.8.3.1 + diff --git a/SOURCES/004-unused.patch b/SOURCES/004-unused.patch new file mode 100644 index 0000000..e732b42 --- /dev/null +++ b/SOURCES/004-unused.patch @@ -0,0 +1,159 @@ +From 6df10102c02f93890c1994136b3ce6a60b33a05e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 6 Jan 2020 11:01:38 -0600 +Subject: [PATCH] Refactor: controller: remove unused function arguments + +... and rename affected functions +--- + daemons/controld/controld_execd.c | 2 +- + daemons/controld/controld_fsa.c | 1 - + daemons/controld/controld_fsa.h | 4 ++-- + daemons/controld/controld_join_client.c | 4 ++-- + daemons/controld/controld_join_dc.c | 32 ++++++++++++++------------------ + 5 files changed, 19 insertions(+), 24 deletions(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index f068413..16751b9 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -839,7 +839,7 @@ do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags) + } + + xmlNode * +-do_lrm_query(gboolean is_replace, const char *node_name) ++controld_query_executor_state(const char *node_name) + { + lrm_state_t *lrm_state = lrm_state_find(node_name); + +diff --git a/daemons/controld/controld_fsa.c b/daemons/controld/controld_fsa.c +index bd732bc..db2b3f3 100644 +--- a/daemons/controld/controld_fsa.c ++++ b/daemons/controld/controld_fsa.c +@@ -41,7 +41,6 @@ enum crmd_fsa_state fsa_state = S_STARTING; + + extern uint highest_born_on; + extern uint num_join_invites; +-extern void initialize_join(gboolean before); + + #define DOT_PREFIX "actions:trace: " + #define do_dot_log(fmt, args...) crm_trace( fmt, ##args) +diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h +index 06794cb..8aaaadf 100644 +--- a/daemons/controld/controld_fsa.h ++++ b/daemons/controld/controld_fsa.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2019 the Pacemaker project contributors ++ * Copyright 2004-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -484,7 +484,7 @@ extern gboolean ever_had_quorum; + // These should be moved elsewhere + void do_update_cib_nodes(gboolean overwrite, const char *caller); + int crmd_cib_smart_opt(void); +-xmlNode *do_lrm_query(gboolean, const char *node_name); ++xmlNode *controld_query_executor_state(const char *node_name); + + const char *fsa_input2string(enum crmd_fsa_input input); + const char *fsa_state2string(enum crmd_fsa_state state); +diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c +index 4ac0d2a..383ee29 100644 +--- a/daemons/controld/controld_join_client.c ++++ b/daemons/controld/controld_join_client.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2019 the Pacemaker project contributors ++ * Copyright 2004-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -264,7 +264,7 @@ do_cl_join_finalize_respond(long long action, + update_dc_expected(input->msg); + + /* send our status section to the DC */ +- tmp1 = do_lrm_query(TRUE, fsa_our_uname); ++ tmp1 = controld_query_executor_state(fsa_our_uname); + if (tmp1 != NULL) { + xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc, + CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); +diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c +index ac6b430..885b2a9 100644 +--- a/daemons/controld/controld_join_dc.c ++++ b/daemons/controld/controld_join_dc.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2019 the Pacemaker project contributors ++ * Copyright 2004-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -21,7 +21,6 @@ char *max_epoch = NULL; + char *max_generation_from = NULL; + xmlNode *max_generation_xml = NULL; + +-void initialize_join(gboolean before); + void finalize_join_for(gpointer key, gpointer value, gpointer user_data); + void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); + gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); +@@ -68,8 +67,8 @@ crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase + } + } + +-void +-initialize_join(gboolean before) ++static void ++start_join_round() + { + GHashTableIter iter; + crm_node_t *peer = NULL; +@@ -80,19 +79,16 @@ initialize_join(gboolean before) + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { + crm_update_peer_join(__FUNCTION__, peer, crm_join_none); + } +- +- if (before) { +- if (max_generation_from != NULL) { +- free(max_generation_from); +- max_generation_from = NULL; +- } +- if (max_generation_xml != NULL) { +- free_xml(max_generation_xml); +- max_generation_xml = NULL; +- } +- clear_bit(fsa_input_register, R_HAVE_CIB); +- clear_bit(fsa_input_register, R_CIB_ASKED); ++ if (max_generation_from != NULL) { ++ free(max_generation_from); ++ max_generation_from = NULL; ++ } ++ if (max_generation_xml != NULL) { ++ free_xml(max_generation_xml); ++ max_generation_xml = NULL; + } ++ clear_bit(fsa_input_register, R_HAVE_CIB); ++ clear_bit(fsa_input_register, R_CIB_ASKED); + } + + /*! +@@ -192,7 +188,7 @@ do_dc_join_offer_all(long long action, + * will be seen as offline by the scheduler anyway. + */ + current_join_id++; +- initialize_join(TRUE); ++ start_join_round(); + /* do_update_cib_nodes(TRUE, __FUNCTION__); */ + + update_dc(NULL); +@@ -590,7 +586,7 @@ do_dc_join_ack(long long action, + controld_delete_node_state(join_from, controld_section_lrm, + cib_scope_local); + if (safe_str_eq(join_from, fsa_our_uname)) { +- xmlNode *now_dc_lrmd_state = do_lrm_query(TRUE, fsa_our_uname); ++ xmlNode *now_dc_lrmd_state = controld_query_executor_state(fsa_our_uname); + + if (now_dc_lrmd_state != NULL) { + fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state, +-- +1.8.3.1 + diff --git a/SOURCES/005-shutdown-lock.patch b/SOURCES/005-shutdown-lock.patch new file mode 100644 index 0000000..9a4fe46 --- /dev/null +++ b/SOURCES/005-shutdown-lock.patch @@ -0,0 +1,207 @@ +From 4bdda97ff76d0e682f4f58bc632cd2cbd417c423 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 14 Jan 2020 12:52:21 -0600 +Subject: [PATCH 01/18] Log: controller: improve messages when deleting CIB + resource history + +This also moves delete_rsc_status() to controld_based.c and renames it. +--- + daemons/controld/controld_based.c | 71 +++++++++++++++++++++++++++++++++++++++ + daemons/controld/controld_execd.c | 47 +++++--------------------- + daemons/controld/controld_utils.h | 4 ++- + 3 files changed, 83 insertions(+), 39 deletions(-) + +diff --git a/daemons/controld/controld_based.c b/daemons/controld/controld_based.c +index 42e321f..f3a7c4f 100644 +--- a/daemons/controld/controld_based.c ++++ b/daemons/controld/controld_based.c +@@ -243,3 +243,74 @@ controld_delete_node_state(const char *uname, enum controld_section_e section, + } + free(xpath); + } ++ ++// Takes node name and resource ID ++#define XPATH_RESOURCE_HISTORY "//" XML_CIB_TAG_STATE \ ++ "[@" XML_ATTR_UNAME "='%s'] /" \ ++ XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ ++ "/" XML_LRM_TAG_RESOURCE \ ++ "[@" XML_ATTR_ID "='%s']" ++// @TODO could add "and @XML_CONFIG_ATTR_SHUTDOWN_LOCK" to limit to locks ++ ++/*! ++ * \internal ++ * \brief Clear resource history from CIB for a given resource and node ++ * ++ * \param[in] rsc_id ID of resource to be cleared ++ * \param[in] node Node whose resource history should be cleared ++ * \param[in] user_name ACL user name to use ++ * \param[in] call_options CIB call options ++ * ++ * \return Standard Pacemaker return code ++ */ ++int ++controld_delete_resource_history(const char *rsc_id, const char *node, ++ const char *user_name, int call_options) ++{ ++ char *desc = NULL; ++ char *xpath = NULL; ++ int rc = pcmk_rc_ok; ++ ++ CRM_CHECK((rsc_id != NULL) && (node != NULL), return EINVAL); ++ ++ desc = crm_strdup_printf("resource history for %s on %s", rsc_id, node); ++ if (fsa_cib_conn == NULL) { ++ crm_err("Unable to clear %s: no CIB connection", desc); ++ free(desc); ++ return ENOTCONN; ++ } ++ ++ // Ask CIB to delete the entry ++ xpath = crm_strdup_printf(XPATH_RESOURCE_HISTORY, node, rsc_id); ++ rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, xpath, NULL, ++ NULL, call_options|cib_xpath, user_name); ++ ++ if (rc < 0) { ++ rc = pcmk_legacy2rc(rc); ++ crm_err("Could not delete resource status of %s on %s%s%s: %s " ++ CRM_XS " rc=%d", rsc_id, node, ++ (user_name? " for user " : ""), (user_name? user_name : ""), ++ pcmk_rc_str(rc), rc); ++ free(desc); ++ free(xpath); ++ return rc; ++ } ++ ++ if (is_set(call_options, cib_sync_call)) { ++ if (is_set(call_options, cib_dryrun)) { ++ crm_debug("Deletion of %s would succeed", desc); ++ } else { ++ crm_debug("Deletion of %s succeeded", desc); ++ } ++ free(desc); ++ ++ } else { ++ crm_info("Clearing %s (via CIB call %d) " CRM_XS " xpath=%s", ++ desc, rc, xpath); ++ fsa_register_cib_callback(rc, FALSE, desc, cib_delete_callback); ++ // CIB library handles freeing desc ++ } ++ ++ free(xpath); ++ return pcmk_rc_ok; ++} +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 16751b9..212739e 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -36,8 +36,6 @@ struct delete_event_s { + static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); + static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); + static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); +-static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, +- const char *user_name); + + static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, + const char *rsc_id, const char *operation); +@@ -169,7 +167,8 @@ update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_ + + if (op->rsc_deleted) { + crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); +- delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL); ++ controld_delete_resource_history(op->rsc_id, lrm_state->node_name, ++ NULL, crmd_cib_smart_opt()); + return; + } + +@@ -917,31 +916,6 @@ lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) + return FALSE; + } + +-/* +- * Remove the rsc from the CIB +- * +- * Avoids refreshing the entire LRM section of this host +- */ +-#define RSC_TEMPLATE "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" +- +-static int +-delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, +- const char *user_name) +-{ +- char *rsc_xpath = NULL; +- int rc = pcmk_ok; +- +- CRM_CHECK(rsc_id != NULL, return -ENXIO); +- +- rsc_xpath = crm_strdup_printf(RSC_TEMPLATE, lrm_state->node_name, rsc_id); +- +- rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath, +- NULL, NULL, call_options | cib_xpath, user_name); +- +- free(rsc_xpath); +- return rc; +-} +- + static void + delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, + GHashTableIter * rsc_gIter, int rc, const char *user_name) +@@ -958,7 +932,8 @@ delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rs + else + g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); + crm_debug("sync: Sending delete op for %s", rsc_id_copy); +- delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name); ++ controld_delete_resource_history(rsc_id_copy, lrm_state->node_name, ++ user_name, crmd_cib_smart_opt()); + + g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy); + free(rsc_id_copy); +@@ -1694,21 +1669,17 @@ do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, + gboolean unregister = TRUE; + + #if ENABLE_ACL +- int cib_rc = delete_rsc_status(lrm_state, rsc->id, +- cib_dryrun|cib_sync_call, user_name); ++ int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name, ++ user_name, ++ cib_dryrun|cib_sync_call); + +- if (cib_rc != pcmk_ok) { ++ if (cib_rc != pcmk_rc_ok) { + lrmd_event_data_t *op = NULL; + +- crm_err("Could not delete resource status of %s for %s (user %s) on %s: %s" +- CRM_XS " rc=%d", +- rsc->id, from_sys, (user_name? user_name : "unknown"), +- from_host, pcmk_strerror(cib_rc), cib_rc); +- + op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE); + op->op_status = PCMK_LRM_OP_ERROR; + +- if (cib_rc == -EACCES) { ++ if (cib_rc == EACCES) { + op->rc = PCMK_OCF_INSUFFICIENT_PRIV; + } else { + op->rc = PCMK_OCF_UNKNOWN_ERROR; +diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h +index f902361..ca8cddb 100644 +--- a/daemons/controld/controld_utils.h ++++ b/daemons/controld/controld_utils.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2019 the Pacemaker project contributors ++ * Copyright 2004-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -95,6 +95,8 @@ enum controld_section_e { + + void controld_delete_node_state(const char *uname, + enum controld_section_e section, int options); ++int controld_delete_resource_history(const char *rsc_id, const char *node, ++ const char *user_name, int call_options); + + const char *get_node_id(xmlNode *lrm_rsc_op); + +-- +1.8.3.1 + diff --git a/SOURCES/006-shutdown-lock.patch b/SOURCES/006-shutdown-lock.patch new file mode 100644 index 0000000..357a2e8 --- /dev/null +++ b/SOURCES/006-shutdown-lock.patch @@ -0,0 +1,252 @@ +From 3d8a7dc405e98cd8fe637d3e283bc0468d50bc71 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 15 Jan 2020 17:56:44 -0600 +Subject: [PATCH 02/18] Refactor: controller: functionize parts of resource + deletion notification + +... for future reuse +--- + daemons/controld/controld_execd.c | 116 +++++++++++++++++++++++++------------- + daemons/controld/controld_lrm.h | 11 +++- + 2 files changed, 88 insertions(+), 39 deletions(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 212739e..82f2bf1 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -42,9 +42,6 @@ static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op + static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, + const char *operation, xmlNode *msg); + +-void send_direct_ack(const char *to_host, const char *to_sys, +- lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id); +- + static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, + int log_level); + static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op); +@@ -278,7 +275,7 @@ send_task_ok_ack(lrm_state_t *lrm_state, ha_msg_input_t *input, + + op->rc = PCMK_OCF_OK; + op->op_status = PCMK_LRM_OP_DONE; +- send_direct_ack(ack_host, ack_sys, rsc, op, rsc_id); ++ controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id); + lrmd_free_event(op); + } + +@@ -850,6 +847,57 @@ controld_query_executor_state(const char *node_name) + node_update_cluster|node_update_peer); + } + ++/*! ++ * \internal ++ * \brief Map standard Pacemaker return code to operation status and OCF code ++ * ++ * \param[out] event Executor event whose status and return code should be set ++ * \param[in] rc Standard Pacemaker return code ++ */ ++void ++controld_rc2event(lrmd_event_data_t *event, int rc) ++{ ++ switch (rc) { ++ case pcmk_rc_ok: ++ event->rc = PCMK_OCF_OK; ++ event->op_status = PCMK_LRM_OP_DONE; ++ break; ++ case EACCES: ++ event->rc = PCMK_OCF_INSUFFICIENT_PRIV; ++ event->op_status = PCMK_LRM_OP_ERROR; ++ break; ++ default: ++ event->rc = PCMK_OCF_UNKNOWN_ERROR; ++ event->op_status = PCMK_LRM_OP_ERROR; ++ break; ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Trigger a new transition after CIB status was deleted ++ * ++ * If a CIB status delete was not expected (as part of the transition graph), ++ * trigger a new transition by updating the (arbitrary) "last-lrm-refresh" ++ * cluster property. ++ * ++ * \param[in] from_sys IPC name that requested the delete ++ * \param[in] rsc_id Resource whose status was deleted (for logging only) ++ */ ++void ++controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id) ++{ ++ if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { ++ char *now_s = crm_strdup_printf("%lld", (long long) time(NULL)); ++ ++ crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id); ++ update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, ++ NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, ++ FALSE, NULL, NULL); ++ free(now_s); ++ } ++} ++ + static void + notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) + { +@@ -860,33 +908,11 @@ notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_ + crm_info("Notifying %s on %s that %s was%s deleted", + from_sys, (from_host? from_host : "localhost"), rsc_id, + ((rc == pcmk_ok)? "" : " not")); +- + op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE); +- +- if (rc == pcmk_ok) { +- op->op_status = PCMK_LRM_OP_DONE; +- op->rc = PCMK_OCF_OK; +- } else { +- op->op_status = PCMK_LRM_OP_ERROR; +- op->rc = PCMK_OCF_UNKNOWN_ERROR; +- } +- +- send_direct_ack(from_host, from_sys, NULL, op, rsc_id); ++ controld_rc2event(op, pcmk_legacy2rc(rc)); ++ controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); + lrmd_free_event(op); +- +- if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { +- /* this isn't expected - trigger a new transition */ +- time_t now = time(NULL); +- char *now_s = crm_itoa(now); +- +- crm_debug("Triggering a refresh after %s deleted %s from the executor", +- from_sys, rsc_id); +- +- update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, +- "last-lrm-refresh", now_s, FALSE, NULL, NULL); +- +- free(now_s); +- } ++ controld_trigger_delete_refresh(from_sys, rsc_id); + } + + static gboolean +@@ -1495,7 +1521,7 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, + #if ENABLE_ACL + if (user_name && is_privileged(user_name) == FALSE) { + crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); +- send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); ++ controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); + lrmd_free_event(op); + return; + } +@@ -1514,7 +1540,7 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, + crm_log_xml_warn(xml, "bad input"); + } + +- send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); ++ controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); + lrmd_free_event(op); + } + +@@ -1684,7 +1710,7 @@ do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, + } else { + op->rc = PCMK_OCF_UNKNOWN_ERROR; + } +- send_direct_ack(from_host, from_sys, NULL, op, rsc->id); ++ controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id); + lrmd_free_event(op); + return; + } +@@ -2000,9 +2026,23 @@ construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, cons + return op; + } + ++/*! ++ * \internal ++ * \brief Send a (synthesized) event result ++ * ++ * Reply with a synthesized event result directly, as opposed to going through ++ * the executor. ++ * ++ * \param[in] to_host Host to send result to ++ * \param[in] to_sys IPC name to send result to (NULL for transition engine) ++ * \param[in] rsc Type information about resource the result is for ++ * \param[in] op Event with result to send ++ * \param[in] rsc_id ID of resource the result is for ++ */ + void +-send_direct_ack(const char *to_host, const char *to_sys, +- lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id) ++controld_ack_event_directly(const char *to_host, const char *to_sys, ++ lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, ++ const char *rsc_id) + { + xmlNode *reply = NULL; + xmlNode *update, *iter; +@@ -2221,7 +2261,7 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, + + op->rc = PCMK_OCF_UNKNOWN_ERROR; + op->op_status = PCMK_LRM_OP_INVALID; +- send_direct_ack(NULL, NULL, rsc, op, rsc->id); ++ controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); + lrmd_free_event(op); + free(op_id); + return; +@@ -2288,7 +2328,7 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, + decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc); + op->rc = target_rc; + op->op_status = PCMK_LRM_OP_DONE; +- send_direct_ack(NULL, NULL, rsc, op, rsc->id); ++ controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); + } + + pending->params = op->params; +@@ -2388,7 +2428,7 @@ do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data + + } else { + crm_warn("Resource %s no longer exists in the executor", op->rsc_id); +- send_direct_ack(NULL, NULL, rsc, op, op->rsc_id); ++ controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id); + goto cleanup; + } + +@@ -2660,7 +2700,7 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, + } + + if (need_direct_ack) { +- send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); ++ controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id); + } + + if(remove == FALSE) { +diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h +index 3ab7048..7acac2a 100644 +--- a/daemons/controld/controld_lrm.h ++++ b/daemons/controld/controld_lrm.h +@@ -1,11 +1,13 @@ + /* +- * Copyright 2004-2019 the Pacemaker project contributors ++ * Copyright 2004-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ ++#ifndef CONTROLD_LRM__H ++# define CONTROLD_LRM__H + + #include + #include +@@ -169,3 +171,10 @@ gboolean remote_ra_controlling_guest(lrm_state_t * lrm_state); + + void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, + active_op_t *pending, xmlNode *action_xml); ++void controld_ack_event_directly(const char *to_host, const char *to_sys, ++ lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, ++ const char *rsc_id); ++void controld_rc2event(lrmd_event_data_t *event, int rc); ++void controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id); ++ ++#endif +-- +1.8.3.1 + diff --git a/SOURCES/007-shutdown-lock.patch b/SOURCES/007-shutdown-lock.patch new file mode 100644 index 0000000..17e7588 --- /dev/null +++ b/SOURCES/007-shutdown-lock.patch @@ -0,0 +1,60 @@ +From f17c99492c7ab9e639b940a34d2a48b55937b605 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 14 Jan 2020 16:00:36 -0600 +Subject: [PATCH 03/18] Low: tools: improve crm_resource "why" messages + +--- + tools/crm_resource_runtime.c | 21 ++++++++++++--------- + 1 file changed, 12 insertions(+), 9 deletions(-) + +diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c +index 9ae24b6..61ceee7 100644 +--- a/tools/crm_resource_runtime.c ++++ b/tools/crm_resource_runtime.c +@@ -878,7 +878,7 @@ cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, + void + cli_resource_check(cib_t * cib_conn, resource_t *rsc) + { +- int need_nl = 0; ++ bool printed = false; + char *role_s = NULL; + char *managed = NULL; + resource_t *parent = uber_parent(rsc); +@@ -897,23 +897,26 @@ cli_resource_check(cib_t * cib_conn, resource_t *rsc) + // Treated as if unset + + } else if(role == RSC_ROLE_STOPPED) { +- printf("\n * The configuration specifies that '%s' should remain stopped\n", parent->id); +- need_nl++; ++ printf("\n * Configuration specifies '%s' should remain stopped\n", ++ parent->id); ++ printed = true; + + } else if (is_set(parent->flags, pe_rsc_promotable) + && (role == RSC_ROLE_SLAVE)) { +- printf("\n * The configuration specifies that '%s' should not be promoted\n", parent->id); +- need_nl++; ++ printf("\n * Configuration specifies '%s' should not be promoted\n", ++ parent->id); ++ printed = true; + } + } + +- if(managed && crm_is_true(managed) == FALSE) { +- printf("%s * The configuration prevents the cluster from stopping or starting '%s' (unmanaged)\n", need_nl == 0?"\n":"", parent->id); +- need_nl++; ++ if (managed && !crm_is_true(managed)) { ++ printf("%s * Configuration prevents cluster from stopping or starting unmanaged '%s'\n", ++ (printed? "" : "\n"), parent->id); ++ printed = true; + } + free(managed); + +- if(need_nl) { ++ if (printed) { + printf("\n"); + } + } +-- +1.8.3.1 + diff --git a/SOURCES/008-shutdown-lock.patch b/SOURCES/008-shutdown-lock.patch new file mode 100644 index 0000000..0592013 --- /dev/null +++ b/SOURCES/008-shutdown-lock.patch @@ -0,0 +1,122 @@ +From 736f255c18d4c99f1956fbb5ad4ac5bfc15bb841 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 14 Jan 2020 16:23:25 -0600 +Subject: [PATCH 04/18] Low: tools: improve error checking for crm_resource + cleanup/fail commands + +Bail earlier for misconfigured resources, and return error (rather than hang) +for unknown or offline node. Also add timeout directly to controller request +rather than rely on the controller using the interval as default timeout. +--- + tools/crm_resource_runtime.c | 54 +++++++++++++++++++++++++++----------------- + 1 file changed, 33 insertions(+), 21 deletions(-) + +diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c +index 61ceee7..2ea8bb3 100644 +--- a/tools/crm_resource_runtime.c ++++ b/tools/crm_resource_runtime.c +@@ -468,8 +468,9 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, + int rc = -ECOMM; + xmlNode *cmd = NULL; + xmlNode *xml_rsc = NULL; +- const char *value = NULL; + const char *router_node = host_uname; ++ const char *rsc_class = NULL; ++ const char *rsc_type = NULL; + xmlNode *params = NULL; + xmlNode *msg_data = NULL; + resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); +@@ -481,27 +482,49 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, + } else if (rsc->variant != pe_native) { + CMD_ERR("We can only process primitive resources, not %s", rsc_id); + return -EINVAL; ++ } + +- } else if (host_uname == NULL) { ++ rsc_class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); ++ rsc_type = crm_element_value(rsc->xml, XML_ATTR_TYPE); ++ if ((rsc_class == NULL) || (rsc_type == NULL)) { ++ CMD_ERR("Resource %s does not have a class and type", rsc_id); ++ return -EINVAL; ++ } ++ ++ if (host_uname == NULL) { + CMD_ERR("Please specify a node name"); + return -EINVAL; ++ + } else { +- node_t *node = pe_find_node(data_set->nodes, host_uname); ++ pe_node_t *node = pe_find_node(data_set->nodes, host_uname); + ++ if (node == NULL) { ++ CMD_ERR("Node %s not found", host_uname); ++ return -pcmk_err_node_unknown; ++ } ++ ++ if (!(node->details->online)) { ++ CMD_ERR("Node %s is not online", host_uname); ++ return -ENOTCONN; ++ } + if (pe__is_guest_or_remote_node(node)) { + node = pe__current_node(node->details->remote_rsc); + if (node == NULL) { + CMD_ERR("No cluster connection to Pacemaker Remote node %s detected", + host_uname); +- return -ENXIO; ++ return -ENOTCONN; + } + router_node = node->details->uname; + } + } + +- key = generate_transition_key(0, getpid(), 0, "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); +- + msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); ++ ++ /* The controller logs the transition key from requests, so we need to have ++ * *something* for it. ++ */ ++ key = generate_transition_key(0, getpid(), 0, ++ "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); + crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); + free(key); + +@@ -519,31 +542,20 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, + crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->id); + } + +- value = crm_copy_xml_element(rsc->xml, xml_rsc, XML_ATTR_TYPE); +- if (value == NULL) { +- CMD_ERR("%s has no type! Aborting...", rsc_id); +- return -ENXIO; +- } +- +- value = crm_copy_xml_element(rsc->xml, xml_rsc, XML_AGENT_ATTR_CLASS); +- if (value == NULL) { +- CMD_ERR("%s has no class! Aborting...", rsc_id); +- return -ENXIO; +- } +- ++ crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, rsc_class); + crm_copy_xml_element(rsc->xml, xml_rsc, XML_AGENT_ATTR_PROVIDER); ++ crm_xml_add(xml_rsc, XML_ATTR_TYPE, rsc_type); + + params = create_xml_node(msg_data, XML_TAG_ATTRS); + crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + +- key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS); ++ // The controller parses the timeout from the request ++ key = crm_meta_name(XML_ATTR_TIMEOUT); + crm_xml_add(params, key, "60000"); /* 1 minute */ + free(key); + + our_pid = crm_getpid_s(); + cmd = create_request(op, msg_data, router_node, CRM_SYSTEM_CRMD, crm_system_name, our_pid); +- +-/* crm_log_xml_warn(cmd, "send_lrm_rsc_op"); */ + free_xml(msg_data); + + if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) { +-- +1.8.3.1 + diff --git a/SOURCES/009-shutdown-lock.patch b/SOURCES/009-shutdown-lock.patch new file mode 100644 index 0000000..ff73598 --- /dev/null +++ b/SOURCES/009-shutdown-lock.patch @@ -0,0 +1,139 @@ +From 8a0e19a7702f61622d06b1c473fb3d9a5924c8f4 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 14 Jan 2020 18:07:18 -0600 +Subject: [PATCH 05/18] Refactor: liblrmd: new convenience function for + allocating lrmd_event_data_t + +--- + daemons/controld/controld_execd.c | 7 +------ + include/crm/lrmd.h | 2 ++ + lib/lrmd/lrmd_client.c | 34 +++++++++++++++++++++++++++++++++- + lib/pacemaker/pcmk_sched_transition.c | 7 +------ + lib/pacemaker/pcmk_trans_unpack.c | 9 +++------ + 5 files changed, 40 insertions(+), 19 deletions(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 82f2bf1..17cc8d6 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -1878,15 +1878,10 @@ construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, cons + + CRM_ASSERT(rsc_id && operation); + +- op = calloc(1, sizeof(lrmd_event_data_t)); +- CRM_ASSERT(op != NULL); +- ++ op = lrmd_new_event(rsc_id, operation, 0); + op->type = lrmd_event_exec_complete; +- op->op_type = strdup(operation); + op->op_status = PCMK_LRM_OP_PENDING; + op->rc = -1; +- op->rsc_id = strdup(rsc_id); +- op->interval_ms = 0; + op->timeout = 0; + op->start_delay = 0; + +diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h +index cfa2925..3ad1f05 100644 +--- a/include/crm/lrmd.h ++++ b/include/crm/lrmd.h +@@ -248,6 +248,8 @@ typedef struct lrmd_event_data_s { + const char *exit_reason; + } lrmd_event_data_t; + ++lrmd_event_data_t *lrmd_new_event(const char *rsc_id, const char *task, ++ guint interval_ms); + lrmd_event_data_t *lrmd_copy_event(lrmd_event_data_t * event); + void lrmd_free_event(lrmd_event_data_t * event); + +diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c +index 2469c52..d16743d 100644 +--- a/lib/lrmd/lrmd_client.c ++++ b/lib/lrmd/lrmd_client.c +@@ -1,5 +1,7 @@ + /* +- * Copyright 2012-2018 David Vossel ++ * Copyright 2012-2020 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. +@@ -175,6 +177,36 @@ lrmd_key_value_freeall(lrmd_key_value_t * head) + } + } + ++/*! ++ * Create a new lrmd_event_data_t object ++ * ++ * \param[in] rsc_id ID of resource involved in event ++ * \param[in] task Action name ++ * \param[in] interval_ms Action interval ++ * ++ * \return Newly allocated and initialized lrmd_event_data_t ++ * \note This functions asserts on memory errors, so the return value is ++ * guaranteed to be non-NULL. The caller is responsible for freeing the ++ * result with lrmd_free_event(). ++ */ ++lrmd_event_data_t * ++lrmd_new_event(const char *rsc_id, const char *task, guint interval_ms) ++{ ++ lrmd_event_data_t *event = calloc(1, sizeof(lrmd_event_data_t)); ++ ++ CRM_ASSERT(event != NULL); ++ if (rsc_id != NULL) { ++ event->rsc_id = strdup(rsc_id); ++ CRM_ASSERT(event->rsc_id != NULL); ++ } ++ if (task != NULL) { ++ event->op_type = strdup(task); ++ CRM_ASSERT(event->op_type != NULL); ++ } ++ event->interval_ms = interval_ms; ++ return event; ++} ++ + lrmd_event_data_t * + lrmd_copy_event(lrmd_event_data_t * event) + { +diff --git a/lib/pacemaker/pcmk_sched_transition.c b/lib/pacemaker/pcmk_sched_transition.c +index c415b75..1698c85 100644 +--- a/lib/pacemaker/pcmk_sched_transition.c ++++ b/lib/pacemaker/pcmk_sched_transition.c +@@ -131,12 +131,7 @@ create_op(xmlNode *cib_resource, const char *task, guint interval_ms, + lrmd_event_data_t *op = NULL; + xmlNode *xop = NULL; + +- op = calloc(1, sizeof(lrmd_event_data_t)); +- +- op->rsc_id = strdup(ID(cib_resource)); +- op->interval_ms = interval_ms; +- op->op_type = strdup(task); +- ++ op = lrmd_new_event(ID(cib_resource), task, interval_ms); + op->rc = outcome; + op->op_status = 0; + op->params = NULL; /* TODO: Fill me in */ +diff --git a/lib/pacemaker/pcmk_trans_unpack.c b/lib/pacemaker/pcmk_trans_unpack.c +index e57f386..3e53289 100644 +--- a/lib/pacemaker/pcmk_trans_unpack.c ++++ b/lib/pacemaker/pcmk_trans_unpack.c +@@ -298,12 +298,9 @@ convert_graph_action(xmlNode * resource, crm_action_t * action, int status, int + CRM_CHECK(action_resource != NULL, crm_log_xml_warn(action->xml, "Bad"); + return NULL); + +- op = calloc(1, sizeof(lrmd_event_data_t)); +- +- op->rsc_id = strdup(ID(action_resource)); +- op->interval_ms = action->interval_ms; +- op->op_type = strdup(crm_element_value(action->xml, XML_LRM_ATTR_TASK)); +- ++ op = lrmd_new_event(ID(action_resource), ++ crm_element_value(action->xml, XML_LRM_ATTR_TASK), ++ action->interval_ms); + op->rc = rc; + op->op_status = status; + op->t_run = time(NULL); +-- +1.8.3.1 + diff --git a/SOURCES/010-shutdown-lock.patch b/SOURCES/010-shutdown-lock.patch new file mode 100644 index 0000000..6304246 --- /dev/null +++ b/SOURCES/010-shutdown-lock.patch @@ -0,0 +1,129 @@ +From 50b0944c8add3f16b8190e75a6d06c3473c12a8f Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 21 Nov 2019 14:48:02 -0600 +Subject: [PATCH 06/18] Feature: scheduler: add shutdown lock cluster options + +This commit adds shutdown-lock and shutdown-lock-limit options (just the +options, not the feature itself). + +shutdown-lock defaults to false, which preserves current behavior. The intended +purpose of setting it to true is to *prevent* recovery of a node's resources +elsewhere when the node is cleanly shut down, until the node rejoins. If +shutdown-lock-limit is set to a nonzero time duration, the cluster will +be allowed to recover the resources if the node has not rejoined within this +time. + +The use case is when rebooting a node (such as for software updates) is done by +cluster-unaware system administrators during scheduled maintenance windows, +resources prefer specific nodes, and resource recovery time is high. +--- + include/crm/msg_xml.h | 4 +++- + include/crm/pengine/pe_types.h | 2 ++ + lib/pengine/common.c | 24 +++++++++++++++++++++++- + lib/pengine/unpack.c | 10 ++++++++++ + 4 files changed, 38 insertions(+), 2 deletions(-) + +diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h +index d56e40c..d0cdf6c 100644 +--- a/include/crm/msg_xml.h ++++ b/include/crm/msg_xml.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2019 the Pacemaker project contributors ++ * Copyright 2004-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -346,6 +346,8 @@ extern "C" { + # define XML_CONFIG_ATTR_FORCE_QUIT "shutdown-escalation" + # define XML_CONFIG_ATTR_RECHECK "cluster-recheck-interval" + # define XML_CONFIG_ATTR_FENCE_REACTION "fence-reaction" ++# define XML_CONFIG_ATTR_SHUTDOWN_LOCK "shutdown-lock" ++# define XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT "shutdown-lock-limit" + + # define XML_ALERT_ATTR_PATH "path" + # define XML_ALERT_ATTR_TIMEOUT "timeout" +diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h +index 23e1c46..8a735a3 100644 +--- a/include/crm/pengine/pe_types.h ++++ b/include/crm/pengine/pe_types.h +@@ -102,6 +102,7 @@ enum pe_find { + # define pe_flag_start_failure_fatal 0x00001000ULL + # define pe_flag_remove_after_stop 0x00002000ULL + # define pe_flag_startup_fencing 0x00004000ULL ++# define pe_flag_shutdown_lock 0x00008000ULL + + # define pe_flag_startup_probes 0x00010000ULL + # define pe_flag_have_status 0x00020000ULL +@@ -167,6 +168,7 @@ struct pe_working_set_s { + GList *stop_needed; // Containers that need stop actions + time_t recheck_by; // Hint to controller to re-run scheduler by this time + int ninstances; // Total number of resource instances ++ guint shutdown_lock;// How long (seconds) to lock resources to shutdown node + }; + + enum pe_check_parameters { +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index da39c99..e72a033 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -1,5 +1,7 @@ + /* +- * Copyright 2004-2018 Andrew Beekhof ++ * Copyright 2004-2020 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. +@@ -85,6 +87,26 @@ static pe_cluster_option pe_opts[] = { + "When set to TRUE, the cluster will immediately ban a resource from a node if it fails to start there. When FALSE, the cluster will instead check the resource's fail count against its migration-threshold." }, + { "enable-startup-probes", NULL, "boolean", NULL, "true", &check_boolean, + "Should the cluster check for active resources during startup", NULL }, ++ { ++ XML_CONFIG_ATTR_SHUTDOWN_LOCK, ++ NULL, "boolean", NULL, "false", &check_boolean, ++ "Whether to lock resources to a cleanly shut down node", ++ "When true, resources active on a node when it is cleanly shut down " ++ "are kept \"locked\" to that node (not allowed to run elsewhere) " ++ "until they start again on that node after it rejoins (or for at " ++ "most shutdown-lock-limit, if set). Stonith resources and " ++ "Pacemaker Remote connections are never locked. Clone and bundle " ++ "instances and the master role of promotable clones are currently " ++ "never locked, though support could be added in a future release." ++ }, ++ { ++ XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, ++ NULL, "time", NULL, "0", &check_timer, ++ "Do not lock resources to a cleanly shut down node longer than this", ++ "If shutdown-lock is true and this is set to a nonzero time duration, " ++ "shutdown locks will expire after this much time has passed since " ++ "the shutdown was initiated, even if the node has not rejoined." ++ }, + + /* Stonith Options */ + { "stonith-enabled", NULL, "boolean", NULL, "true", &check_boolean, +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index c9fc672..8c0d72a 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -319,6 +319,16 @@ unpack_config(xmlNode * config, pe_working_set_t * data_set) + data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy"); + crm_trace("Placement strategy: %s", data_set->placement_strategy); + ++ set_config_flag(data_set, "shutdown-lock", pe_flag_shutdown_lock); ++ crm_trace("Resources will%s be locked to cleanly shut down nodes", ++ (is_set(data_set->flags, pe_flag_shutdown_lock)? "" : " not")); ++ if (is_set(data_set->flags, pe_flag_shutdown_lock)) { ++ value = pe_pref(data_set->config_hash, ++ XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT); ++ data_set->shutdown_lock = crm_parse_interval_spec(value) / 1000; ++ crm_trace("Shutdown locks expire after %us", data_set->shutdown_lock); ++ } ++ + return TRUE; + } + +-- +1.8.3.1 + diff --git a/SOURCES/011-shutdown-lock.patch b/SOURCES/011-shutdown-lock.patch new file mode 100644 index 0000000..e9f1f5c --- /dev/null +++ b/SOURCES/011-shutdown-lock.patch @@ -0,0 +1,144 @@ +From f5d88938955f63935058b7cc2d706a12e6ea1121 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 6 Dec 2019 11:57:59 -0600 +Subject: [PATCH 07/18] Low: scheduler: respect shutdown locks when placing + active resources + +Use new pe_resource_t members to indicate that a resource is locked to a +particular node. + +For active resources (i.e. in the transition where the node is scheduled for +shutdown), these are connected by checking each lockable resource for whether +it is running on a single clean node that is shutting down. + +When applying constraints, place -INFINITY location constraints for locked +resources on all nodes other than the lock node. + +(Inactive resources -- i.e. in later transitions after the node is shut down -- +are not yet locked.) +--- + include/crm/pengine/pe_types.h | 2 + + lib/pacemaker/pcmk_sched_allocate.c | 87 +++++++++++++++++++++++++++++++++++++ + 2 files changed, 89 insertions(+) + +diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h +index 8a735a3..123d8ef 100644 +--- a/include/crm/pengine/pe_types.h ++++ b/include/crm/pengine/pe_types.h +@@ -354,6 +354,8 @@ struct pe_resource_s { + GListPtr fillers; + + pe_node_t *pending_node; // Node on which pending_task is happening ++ pe_node_t *lock_node; // Resource is shutdown-locked to this node ++ time_t lock_time; // When shutdown lock started + + #if ENABLE_VERSIONED_ATTRS + xmlNode *versioned_parameters; +diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c +index fc2f4cf..0314f1b 100644 +--- a/lib/pacemaker/pcmk_sched_allocate.c ++++ b/lib/pacemaker/pcmk_sched_allocate.c +@@ -977,6 +977,87 @@ rsc_discover_filter(resource_t *rsc, node_t *node) + } + } + ++static time_t ++shutdown_time(pe_node_t *node, pe_working_set_t *data_set) ++{ ++ const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN); ++ time_t result = 0; ++ ++ if (shutdown) { ++ errno = 0; ++ result = (time_t) crm_int_helper(shutdown, NULL); ++ if (errno != 0) { ++ result = 0; ++ } ++ } ++ return result? result : get_effective_time(data_set); ++} ++ ++static void ++apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set) ++{ ++ const char *class; ++ ++ // Only primitives and (uncloned) groups may be locked ++ if (rsc->variant == pe_group) { ++ for (GList *item = rsc->children; item != NULL; ++ item = item->next) { ++ apply_shutdown_lock((pe_resource_t *) item->data, data_set); ++ } ++ } else if (rsc->variant != pe_native) { ++ return; ++ } ++ ++ // Fence devices and remote connections can't be locked ++ class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); ++ if ((class == NULL) || !strcmp(class, PCMK_RESOURCE_CLASS_STONITH) ++ || pe__resource_is_remote_conn(rsc, data_set)) { ++ return; ++ } ++ ++ // Only a resource active on exactly one node can be locked ++ if (pcmk__list_of_1(rsc->running_on)) { ++ pe_node_t *node = rsc->running_on->data; ++ ++ if (node->details->shutdown) { ++ if (node->details->unclean) { ++ pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown", ++ rsc->id, node->details->uname); ++ } else { ++ rsc->lock_node = node; ++ rsc->lock_time = shutdown_time(node, data_set); ++ } ++ } ++ } ++ ++ if (rsc->lock_node == NULL) { ++ // No lock needed ++ return; ++ } ++ ++ if (data_set->shutdown_lock > 0) { ++ time_t lock_expiration = rsc->lock_time + data_set->shutdown_lock; ++ ++ pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)", ++ rsc->id, rsc->lock_node->details->uname, ++ (long long) lock_expiration); ++ pe__update_recheck_time(++lock_expiration, data_set); ++ } else { ++ pe_rsc_info(rsc, "Locking %s to %s due to shutdown", ++ rsc->id, rsc->lock_node->details->uname); ++ } ++ ++ // If resource is locked to one node, ban it from all other nodes ++ for (GList *item = data_set->nodes; item != NULL; item = item->next) { ++ pe_node_t *node = item->data; ++ ++ if (strcmp(node->details->uname, rsc->lock_node->details->uname)) { ++ resource_location(rsc, node, -CRM_SCORE_INFINITY, ++ XML_CONFIG_ATTR_SHUTDOWN_LOCK, data_set); ++ } ++ } ++} ++ + /* + * Count how many valid nodes we have (so we know the maximum number of + * colors we can resolve). +@@ -988,6 +1069,12 @@ stage2(pe_working_set_t * data_set) + { + GListPtr gIter = NULL; + ++ if (is_set(data_set->flags, pe_flag_shutdown_lock)) { ++ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { ++ apply_shutdown_lock((pe_resource_t *) gIter->data, data_set); ++ } ++ } ++ + for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { + node_t *node = (node_t *) gIter->data; + +-- +1.8.3.1 + diff --git a/SOURCES/012-shutdown-lock.patch b/SOURCES/012-shutdown-lock.patch new file mode 100644 index 0000000..c700d96 --- /dev/null +++ b/SOURCES/012-shutdown-lock.patch @@ -0,0 +1,202 @@ +From 16f57bb79de4f88c2def174e3bb7d8ef312674cd Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 6 Dec 2019 12:17:03 -0600 +Subject: [PATCH 08/18] Low: scheduler: respect shutdown locks when placing + inactive resources + +When shutdown-lock is enabled, and we're either scheduling a resource stop +on a node that's cleanly shutting down or scheduling any action for a +previously locked resource, add "shutdown-lock=" to the +graph action. The controller will be able to use this to know when to preserve +the lock (by adding the lock time to the resource state entry). + +When the scheduler unpacks a resource state entry with a lock, it will remember +the lock node and lock time, which will trigger existing code for applying +shutdown locks. +--- + lib/pacemaker/pcmk_sched_allocate.c | 17 ++++++++++++- + lib/pacemaker/pcmk_sched_graph.c | 30 ++++++++++++++++++++++- + lib/pengine/unpack.c | 49 +++++++++++++++++++++++++++++++++---- + 3 files changed, 89 insertions(+), 7 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c +index 0314f1b..884e1bd 100644 +--- a/lib/pacemaker/pcmk_sched_allocate.c ++++ b/lib/pacemaker/pcmk_sched_allocate.c +@@ -1015,8 +1015,23 @@ apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set) + return; + } + ++ if (rsc->lock_node != NULL) { ++ // The lock was obtained from resource history ++ ++ if (rsc->running_on != NULL) { ++ /* The resource was started elsewhere even though it is now ++ * considered locked. This shouldn't be possible, but as a ++ * failsafe, we don't want to disturb the resource now. ++ */ ++ pe_rsc_info(rsc, ++ "Cancelling shutdown lock because %s is already active", ++ rsc->id); ++ rsc->lock_node = NULL; ++ rsc->lock_time = 0; ++ } ++ + // Only a resource active on exactly one node can be locked +- if (pcmk__list_of_1(rsc->running_on)) { ++ } else if (pcmk__list_of_1(rsc->running_on)) { + pe_node_t *node = rsc->running_on->data; + + if (node->details->shutdown) { +diff --git a/lib/pacemaker/pcmk_sched_graph.c b/lib/pacemaker/pcmk_sched_graph.c +index a6967fe..2861f3d 100644 +--- a/lib/pacemaker/pcmk_sched_graph.c ++++ b/lib/pacemaker/pcmk_sched_graph.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2019 the Pacemaker project contributors ++ * Copyright 2004-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -988,6 +988,26 @@ add_downed_nodes(xmlNode *xml, const action_t *action, + } + } + ++static bool ++should_lock_action(pe_action_t *action) ++{ ++ // Only actions taking place on resource's lock node are locked ++ if ((action->rsc->lock_node == NULL) || (action->node == NULL) ++ || (action->node->details != action->rsc->lock_node->details)) { ++ return false; ++ } ++ ++ /* During shutdown, only stops are locked (otherwise, another action such as ++ * a demote would cause the controller to clear the lock) ++ */ ++ if (action->node->details->shutdown && action->task ++ && strcmp(action->task, RSC_STOP)) { ++ return false; ++ } ++ ++ return true; ++} ++ + static xmlNode * + action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set) + { +@@ -1097,6 +1117,14 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set) + XML_ATTR_TYPE + }; + ++ /* If a resource is locked to a node via shutdown-lock, mark its actions ++ * so the controller can preserve the lock when the action completes. ++ */ ++ if (should_lock_action(action)) { ++ crm_xml_add_ll(action_xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK, ++ (long long) action->rsc->lock_time); ++ } ++ + // List affected resource + + rsc_xml = create_xml_node(action_xml, +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 8c0d72a..5139e60 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -1059,7 +1060,8 @@ unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set) + crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED); + + } else if (!pe__is_guest_node(this_node) +- && rsc->role == RSC_ROLE_STARTED) { ++ && ((rsc->role == RSC_ROLE_STARTED) ++ || is_set(data_set->flags, pe_flag_shutdown_lock))) { + check = TRUE; + crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED); + } +@@ -1075,6 +1077,9 @@ unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set) + + } else if (fence) { + process = TRUE; ++ ++ } else if (is_set(data_set->flags, pe_flag_shutdown_lock)) { ++ process = TRUE; + } + + if(process) { +@@ -2198,6 +2203,28 @@ calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) + } + } + ++// If resource history entry has shutdown lock, remember lock node and time ++static void ++unpack_shutdown_lock(xmlNode *rsc_entry, pe_resource_t *rsc, pe_node_t *node, ++ pe_working_set_t *data_set) ++{ ++ time_t lock_time = 0; // When lock started (i.e. node shutdown time) ++ ++ if ((crm_element_value_epoch(rsc_entry, XML_CONFIG_ATTR_SHUTDOWN_LOCK, ++ &lock_time) == pcmk_ok) && (lock_time != 0)) { ++ ++ if ((data_set->shutdown_lock > 0) ++ && (get_effective_time(data_set) ++ > (lock_time + data_set->shutdown_lock))) { ++ pe_rsc_info(rsc, "Shutdown lock for %s on %s expired", ++ rsc->id, node->details->uname); ++ } else { ++ rsc->lock_node = node; ++ rsc->lock_time = lock_time; ++ } ++ } ++} ++ + static resource_t * + unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set) + { +@@ -2234,18 +2261,30 @@ unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data + } + } + +- if (op_list == NULL) { +- /* if there are no operations, there is nothing to do */ +- return NULL; ++ if (is_not_set(data_set->flags, pe_flag_shutdown_lock)) { ++ if (op_list == NULL) { ++ // If there are no operations, there is nothing to do ++ return NULL; ++ } + } + + /* find the resource */ + rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); + if (rsc == NULL) { +- rsc = process_orphan_resource(rsc_entry, node, data_set); ++ if (op_list == NULL) { ++ // If there are no operations, there is nothing to do ++ return NULL; ++ } else { ++ rsc = process_orphan_resource(rsc_entry, node, data_set); ++ } + } + CRM_ASSERT(rsc != NULL); + ++ // Check whether the resource is "shutdown-locked" to this node ++ if (is_set(data_set->flags, pe_flag_shutdown_lock)) { ++ unpack_shutdown_lock(rsc_entry, rsc, node, data_set); ++ } ++ + /* process operations */ + saved_role = rsc->role; + on_fail = action_fail_ignore; +-- +1.8.3.1 + diff --git a/SOURCES/013-shutdown-lock.patch b/SOURCES/013-shutdown-lock.patch new file mode 100644 index 0000000..4b9c91f --- /dev/null +++ b/SOURCES/013-shutdown-lock.patch @@ -0,0 +1,281 @@ +From 223ab7251adcb8c6f6b96def138be58b1478c42b Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 22 Nov 2019 17:03:20 -0600 +Subject: [PATCH 09/18] Low: controller: mark shutdown-locked resources in + resource history + +When a graph action indicates that the resource should be shutdown-locked +to its node, remember the shutdown lock time in active_op_t so we can remember +that when the result comes back. When the result does come back, add +"shutdown-lock" to its lrm_resource entry in the CIB status section -- as +the timestamp if it's a successful stop or a probe finding the resource +inactive, or as 0 to clear the lock for any other operation. +--- + daemons/controld/controld_control.c | 9 ++++- + daemons/controld/controld_execd.c | 44 +++++++++++++++++++-- + daemons/controld/controld_lrm.h | 1 + + daemons/controld/controld_te_callbacks.c | 65 ++++++++++++++++++++++---------- + daemons/controld/controld_utils.h | 1 + + 5 files changed, 95 insertions(+), 25 deletions(-) + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 6c7f97c..c918a1e 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2019 the Pacemaker project contributors ++ * Copyright 2004-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -35,6 +35,7 @@ gboolean fsa_has_quorum = FALSE; + crm_trigger_t *fsa_source = NULL; + crm_trigger_t *config_read = NULL; + bool no_quorum_suicide_escalation = FALSE; ++bool controld_shutdown_lock_enabled = false; + + /* A_HA_CONNECT */ + void +@@ -587,7 +588,10 @@ static pe_cluster_option crmd_opts[] = { + { "stonith-max-attempts",NULL,"integer",NULL,"10",&check_positive_number, + "How many times stonith can fail before it will no longer be attempted on a target" + }, ++ ++ // Already documented in libpe_status (other values must be kept identical) + { "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL }, ++ { XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, "false", &check_boolean, NULL, NULL }, + }; + /* *INDENT-ON* */ + +@@ -698,6 +702,9 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void + value = crmd_pref(config_hash, "join-finalization-timeout"); + finalization_timer->period_ms = crm_parse_interval_spec(value); + ++ value = crmd_pref(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK); ++ controld_shutdown_lock_enabled = crm_is_true(value); ++ + free(fsa_cluster_name); + fsa_cluster_name = NULL; + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 17cc8d6..c0436a2 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -44,7 +44,8 @@ static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, + + static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, + int log_level); +-static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op); ++static int do_update_resource(const char *node_name, lrmd_rsc_info_t *rsc, ++ lrmd_event_data_t *op, time_t lock_time); + + static void + lrm_connection_destroy(void) +@@ -2171,7 +2172,7 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t + crm_debug("Recording pending op " CRM_OP_FMT " on %s in the CIB", + op->rsc_id, op->op_type, op->interval_ms, node_name); + +- do_update_resource(node_name, rsc, op); ++ do_update_resource(node_name, rsc, op, 0); + } + + static void +@@ -2313,6 +2314,10 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, + pending->rsc_id = strdup(rsc->id); + pending->start_time = time(NULL); + pending->user_data = op->user_data? strdup(op->user_data) : NULL; ++ if (crm_element_value_epoch(msg, XML_CONFIG_ATTR_SHUTDOWN_LOCK, ++ &(pending->lock_time)) != pcmk_ok) { ++ pending->lock_time = 0; ++ } + g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending); + + if ((op->interval_ms > 0) +@@ -2356,8 +2361,28 @@ cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *use + } + } + ++/* Only successful stops, and probes that found the resource inactive, get locks ++ * recorded in the history. This ensures the resource stays locked to the node ++ * until it is active there again after the node comes back up. ++ */ ++static bool ++should_preserve_lock(lrmd_event_data_t *op) ++{ ++ if (!controld_shutdown_lock_enabled) { ++ return false; ++ } ++ if (!strcmp(op->op_type, RSC_STOP) && (op->rc == PCMK_OCF_OK)) { ++ return true; ++ } ++ if (!strcmp(op->op_type, RSC_STATUS) && (op->rc == PCMK_OCF_NOT_RUNNING)) { ++ return true; ++ } ++ return false; ++} ++ + static int +-do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) ++do_update_resource(const char *node_name, lrmd_rsc_info_t *rsc, ++ lrmd_event_data_t *op, time_t lock_time) + { + /* + +@@ -2412,6 +2437,16 @@ do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data + crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); + crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->standard); + crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider); ++ if (lock_time != 0) { ++ /* Actions on a locked resource should either preserve the lock by ++ * recording it with the action result, or clear it. ++ */ ++ if (!should_preserve_lock(op)) { ++ lock_time = 0; ++ } ++ crm_xml_add_ll(iter, XML_CONFIG_ATTR_SHUTDOWN_LOCK, ++ (long long) lock_time); ++ } + + if (op->params) { + container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); +@@ -2616,7 +2651,8 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, + if (controld_action_is_recordable(op->op_type)) { + if (node_name && rsc) { + // We should record the result, and happily, we can +- update_id = do_update_resource(node_name, rsc, op); ++ update_id = do_update_resource(node_name, rsc, op, ++ pending? pending->lock_time : 0); + need_direct_ack = FALSE; + + } else if (op->rsc_deleted) { +diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h +index 7acac2a..da0582c 100644 +--- a/daemons/controld/controld_lrm.h ++++ b/daemons/controld/controld_lrm.h +@@ -46,6 +46,7 @@ typedef struct active_op_s { + int call_id; + uint32_t flags; // bitmask of active_op_e + time_t start_time; ++ time_t lock_time; + char *rsc_id; + char *op_type; + char *op_key; +diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c +index 25f0ab2..8506f26 100644 +--- a/daemons/controld/controld_te_callbacks.c ++++ b/daemons/controld/controld_te_callbacks.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2019 the Pacemaker project contributors ++ * Copyright 2004-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -28,6 +28,17 @@ crm_trigger_t *transition_trigger = NULL; + /* #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */ + #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']" + ++// An explicit shutdown-lock of 0 means the lock has been cleared ++static bool ++shutdown_lock_cleared(xmlNode *lrm_resource) ++{ ++ time_t shutdown_lock = 0; ++ ++ return (crm_element_value_epoch(lrm_resource, XML_CONFIG_ATTR_SHUTDOWN_LOCK, ++ &shutdown_lock) == pcmk_ok) ++ && (shutdown_lock == 0); ++} ++ + static void + te_update_diff_v1(const char *event, xmlNode *diff) + { +@@ -106,33 +117,42 @@ te_update_diff_v1(const char *event, xmlNode *diff) + } + freeXpathObject(xpathObj); + ++ // Check for lrm_resource entries ++ xpathObj = xpath_search(diff, ++ "//" F_CIB_UPDATE_RESULT ++ "//" XML_TAG_DIFF_ADDED ++ "//" XML_LRM_TAG_RESOURCE); ++ max = numXpathResults(xpathObj); ++ + /* +- * Updates by, or in response to, TE actions will never contain updates +- * for more than one resource at a time, so such updates indicate an +- * LRM refresh. +- * +- * In that case, start a new transition rather than check each result +- * individually, which can result in _huge_ speedups in large clusters. ++ * Updates by, or in response to, graph actions will never affect more than ++ * one resource at a time, so such updates indicate an LRM refresh. In that ++ * case, start a new transition rather than check each result individually, ++ * which can result in _huge_ speedups in large clusters. + * + * Unfortunately, we can only do so when there are no pending actions. + * Otherwise, we could mistakenly throw away those results here, and + * the cluster will stall waiting for them and time out the operation. + */ +- if (transition_graph->pending == 0) { +- xpathObj = xpath_search(diff, +- "//" F_CIB_UPDATE_RESULT +- "//" XML_TAG_DIFF_ADDED +- "//" XML_LRM_TAG_RESOURCE); +- max = numXpathResults(xpathObj); +- if (max > 1) { +- crm_debug("Ignoring resource operation updates due to history refresh of %d resources", +- max); +- crm_log_xml_trace(diff, "lrm-refresh"); +- abort_transition(INFINITY, tg_restart, "History refresh", NULL); +- goto bail; ++ if ((transition_graph->pending == 0) && (max > 1)) { ++ crm_debug("Ignoring resource operation updates due to history refresh of %d resources", ++ max); ++ crm_log_xml_trace(diff, "lrm-refresh"); ++ abort_transition(INFINITY, tg_restart, "History refresh", NULL); ++ goto bail; ++ } ++ ++ if (max == 1) { ++ xmlNode *lrm_resource = getXpathResult(xpathObj, 0); ++ ++ if (shutdown_lock_cleared(lrm_resource)) { ++ // @TODO would be more efficient to abort once after transition done ++ abort_transition(INFINITY, tg_restart, "Shutdown lock cleared", ++ lrm_resource); ++ // Still process results, so we stop timers and update failcounts + } +- freeXpathObject(xpathObj); + } ++ freeXpathObject(xpathObj); + + /* Process operation updates */ + xpathObj = +@@ -205,6 +225,11 @@ process_lrm_resource_diff(xmlNode *lrm_resource, const char *node) + rsc_op = __xml_next(rsc_op)) { + process_graph_event(rsc_op, node); + } ++ if (shutdown_lock_cleared(lrm_resource)) { ++ // @TODO would be more efficient to abort once after transition done ++ abort_transition(INFINITY, tg_restart, "Shutdown lock cleared", ++ lrm_resource); ++ } + } + + static void +diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h +index ca8cddb..8e31007 100644 +--- a/daemons/controld/controld_utils.h ++++ b/daemons/controld/controld_utils.h +@@ -41,6 +41,7 @@ fsa_cib_anon_update(const char *section, xmlNode *data) { + } + + extern gboolean fsa_has_quorum; ++extern bool controld_shutdown_lock_enabled; + extern int last_peer_update; + extern int last_resource_update; + +-- +1.8.3.1 + diff --git a/SOURCES/014-shutdown-lock.patch b/SOURCES/014-shutdown-lock.patch new file mode 100644 index 0000000..b464947 --- /dev/null +++ b/SOURCES/014-shutdown-lock.patch @@ -0,0 +1,158 @@ +From 8270e8aed46f6e672b94f00fe0bde07cd2b6ddd7 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 13 Dec 2019 11:38:49 -0600 +Subject: [PATCH 10/18] Low: controller: don't clear shutdown locks when node + rejoins + +Add new controld_delete_node_state() values for clearing resource history +while preserving shutdown locks. This is accomplished by deleting all +unlocked lrm_resource entries and all lrm_rsc_op entries, instead of the entire +lrm subsection. +--- + daemons/controld/controld_based.c | 22 +++++++++++++++++++++- + daemons/controld/controld_join_dc.c | 7 +++++-- + daemons/controld/controld_remote_ra.c | 16 ++++++++++------ + daemons/controld/controld_utils.h | 2 ++ + 4 files changed, 38 insertions(+), 9 deletions(-) + +diff --git a/daemons/controld/controld_based.c b/daemons/controld/controld_based.c +index f3a7c4f..0ffc1e8 100644 +--- a/daemons/controld/controld_based.c ++++ b/daemons/controld/controld_based.c +@@ -191,12 +191,21 @@ cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, + // Node's lrm section (name 1x) + #define XPATH_NODE_LRM XPATH_NODE_STATE "/" XML_CIB_TAG_LRM + ++// Node's lrm_rsc_op entries and lrm_resource entries without lock (name 2x) ++#define XPATH_NODE_LRM_UNLOCKED XPATH_NODE_STATE "//" XML_LRM_TAG_RSC_OP \ ++ "|" XPATH_NODE_STATE \ ++ "//" XML_LRM_TAG_RESOURCE \ ++ "[not(@" XML_CONFIG_ATTR_SHUTDOWN_LOCK ")]" ++ + // Node's transient_attributes section (name 1x) + #define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS + + // Everything under node_state (name 1x) + #define XPATH_NODE_ALL XPATH_NODE_STATE "/*" + ++// Unlocked history + transient attributes (name 3x) ++#define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS ++ + /*! + * \internal + * \brief Delete subsection of a node's CIB node_state +@@ -218,6 +227,11 @@ controld_delete_node_state(const char *uname, enum controld_section_e section, + xpath = crm_strdup_printf(XPATH_NODE_LRM, uname); + desc = crm_strdup_printf("resource history for node %s", uname); + break; ++ case controld_section_lrm_unlocked: ++ xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED, uname, uname); ++ desc = crm_strdup_printf("resource history (other than shutdown " ++ "locks) for node %s", uname); ++ break; + case controld_section_attrs: + xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname); + desc = crm_strdup_printf("transient attributes for node %s", uname); +@@ -226,6 +240,12 @@ controld_delete_node_state(const char *uname, enum controld_section_e section, + xpath = crm_strdup_printf(XPATH_NODE_ALL, uname); + desc = crm_strdup_printf("all state for node %s", uname); + break; ++ case controld_section_all_unlocked: ++ xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED, ++ uname, uname, uname); ++ desc = crm_strdup_printf("all state (other than shutdown locks) " ++ "for node %s", uname); ++ break; + } + + if (fsa_cib_conn == NULL) { +@@ -234,7 +254,7 @@ controld_delete_node_state(const char *uname, enum controld_section_e section, + } else { + int call_id; + +- options |= cib_quorum_override|cib_xpath; ++ options |= cib_quorum_override|cib_xpath|cib_multiple; + call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options); + crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s", + desc, call_id, xpath); +diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c +index 885b2a9..f0eb2a2 100644 +--- a/daemons/controld/controld_join_dc.c ++++ b/daemons/controld/controld_join_dc.c +@@ -534,6 +534,7 @@ do_dc_join_ack(long long action, + int join_id = -1; + int call_id = 0; + ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); ++ enum controld_section_e section = controld_section_lrm; + + const char *op = crm_element_value(join_ack->msg, F_CRM_TASK); + const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); +@@ -583,8 +584,10 @@ do_dc_join_ack(long long action, + /* Update CIB with node's current executor state. A new transition will be + * triggered later, when the CIB notifies us of the change. + */ +- controld_delete_node_state(join_from, controld_section_lrm, +- cib_scope_local); ++ if (controld_shutdown_lock_enabled) { ++ section = controld_section_lrm_unlocked; ++ } ++ controld_delete_node_state(join_from, section, cib_scope_local); + if (safe_str_eq(join_from, fsa_our_uname)) { + xmlNode *now_dc_lrmd_state = controld_query_executor_state(fsa_our_uname); + +diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c +index 2d3dfa7..a81c354 100644 +--- a/daemons/controld/controld_remote_ra.c ++++ b/daemons/controld/controld_remote_ra.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2013-2019 the Pacemaker project contributors ++ * Copyright 2013-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -177,17 +177,21 @@ remote_node_up(const char *node_name) + int call_opt, call_id = 0; + xmlNode *update, *state; + crm_node_t *node; ++ enum controld_section_e section = controld_section_all; + + CRM_CHECK(node_name != NULL, return); + crm_info("Announcing pacemaker_remote node %s", node_name); + +- /* Clear node's entire state (resource history and transient attributes). +- * The transient attributes should and normally will be cleared when the +- * node leaves, but since remote node state has a number of corner cases, +- * clear them here as well, to be sure. ++ /* Clear node's entire state (resource history and transient attributes) ++ * other than shutdown locks. The transient attributes should and normally ++ * will be cleared when the node leaves, but since remote node state has a ++ * number of corner cases, clear them here as well, to be sure. + */ + call_opt = crmd_cib_smart_opt(); +- controld_delete_node_state(node_name, controld_section_all, call_opt); ++ if (controld_shutdown_lock_enabled) { ++ section = controld_section_all_unlocked; ++ } ++ controld_delete_node_state(node_name, section, call_opt); + + /* Clear node's probed attribute */ + update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE); +diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h +index 8e31007..5549636 100644 +--- a/daemons/controld/controld_utils.h ++++ b/daemons/controld/controld_utils.h +@@ -90,8 +90,10 @@ bool controld_action_is_recordable(const char *action); + // Subsections of node_state + enum controld_section_e { + controld_section_lrm, ++ controld_section_lrm_unlocked, + controld_section_attrs, + controld_section_all, ++ controld_section_all_unlocked + }; + + void controld_delete_node_state(const char *uname, +-- +1.8.3.1 + diff --git a/SOURCES/015-shutdown-lock.patch b/SOURCES/015-shutdown-lock.patch new file mode 100644 index 0000000..364b2aa --- /dev/null +++ b/SOURCES/015-shutdown-lock.patch @@ -0,0 +1,38 @@ +From d70d90367c898bcb62fd6c7dd8d641ca56be04ae Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 20 Dec 2019 11:46:37 -0600 +Subject: [PATCH 11/18] Low: scheduler: display when a resource is + shutdown-locked to a node + +... so it shows up in logs and cluster status displays +--- + lib/pengine/native.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/lib/pengine/native.c b/lib/pengine/native.c +index b064115..5a6fd80 100644 +--- a/lib/pengine/native.c ++++ b/lib/pengine/native.c +@@ -541,6 +541,9 @@ native_output_string(pe_resource_t *rsc, const char *name, pe_node_t *node, + provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); + } + ++ if ((node == NULL) && (rsc->lock_node != NULL)) { ++ node = rsc->lock_node; ++ } + if (is_set(options, pe_print_rsconly) + || pcmk__list_of_multiple(rsc->running_on)) { + node = NULL; +@@ -583,6 +586,9 @@ native_output_string(pe_resource_t *rsc, const char *name, pe_node_t *node, + if (node && !(node->details->online) && node->details->unclean) { + have_flags = add_output_flag(outstr, "UNCLEAN", have_flags); + } ++ if (node && (node == rsc->lock_node)) { ++ have_flags = add_output_flag(outstr, "LOCKED", have_flags); ++ } + if (is_set(options, pe_print_pending)) { + const char *pending_task = native_pending_task(rsc); + +-- +1.8.3.1 + diff --git a/SOURCES/016-shutdown-lock.patch b/SOURCES/016-shutdown-lock.patch new file mode 100644 index 0000000..b8f8e5d --- /dev/null +++ b/SOURCES/016-shutdown-lock.patch @@ -0,0 +1,29 @@ +From bc9c07951cb9c411324056b4d5322016153fee20 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 14 Jan 2020 16:01:16 -0600 +Subject: [PATCH 12/18] Low: tools: crm_resource resource checks should show + shutdown locks + +--- + tools/crm_resource_runtime.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c +index 2ea8bb3..ed5fb03 100644 +--- a/tools/crm_resource_runtime.c ++++ b/tools/crm_resource_runtime.c +@@ -928,6 +928,11 @@ cli_resource_check(cib_t * cib_conn, resource_t *rsc) + } + free(managed); + ++ if (rsc->lock_node) { ++ printf("%s * '%s' is locked to node %s due to shutdown\n", ++ (printed? "" : "\n"), parent->id, rsc->lock_node->details->uname); ++ } ++ + if (printed) { + printf("\n"); + } +-- +1.8.3.1 + diff --git a/SOURCES/017-shutdown-lock.patch b/SOURCES/017-shutdown-lock.patch new file mode 100644 index 0000000..8dc7dd9 --- /dev/null +++ b/SOURCES/017-shutdown-lock.patch @@ -0,0 +1,191 @@ +From 45a6f0b051743c266c13f3ffd365baf3a9d730f6 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 14 Jan 2020 12:53:39 -0600 +Subject: [PATCH 13/18] Low: controller: allow CRM_OP_LRM_DELETE to clear CIB + only + +Normally, CRM_OP_LRM_DELETE is relayed to the affected node's controller, which +clears the resource from the executor and CIB as well the its own bookkeeping. + +Now, we want to be able to use it to clear shutdown locks for nodes that are +down. Let it take a new "mode" attribute, and if it is "cib", clear the +resource from the CIB locally without relaying the operation or doing anything +else. +--- + daemons/controld/controld_execd.c | 4 +- + daemons/controld/controld_messages.c | 97 ++++++++++++++++++++++++++++++++-- + daemons/controld/controld_te_actions.c | 7 +++ + include/crm_internal.h | 2 + + 4 files changed, 106 insertions(+), 4 deletions(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index c0436a2..8d25fb8 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -1769,7 +1769,9 @@ do_lrm_invoke(long long action, + crm_trace("Executor %s command from %s", crm_op, from_sys); + + if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { +- crm_rsc_delete = TRUE; // Only crm_resource uses this op ++ if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { ++ crm_rsc_delete = TRUE; // from crm_resource ++ } + operation = CRMD_ACTION_DELETE; + + } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) { +diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c +index 466c64c..689e4a0 100644 +--- a/daemons/controld/controld_messages.c ++++ b/daemons/controld/controld_messages.c +@@ -410,6 +410,14 @@ relay_message(xmlNode * msg, gboolean originated_locally) + + } else if (safe_str_eq(fsa_our_uname, host_to)) { + is_local = 1; ++ } else if (is_for_crm && safe_str_eq(task, CRM_OP_LRM_DELETE)) { ++ xmlNode *msg_data = get_message_xml(msg, F_CRM_DATA); ++ const char *mode = crm_element_value(msg_data, PCMK__XA_MODE); ++ ++ if (safe_str_eq(mode, XML_TAG_CIB)) { ++ // Local delete of an offline node's resource history ++ is_local = 1; ++ } + } + + if (is_for_dc || is_for_dcib || is_for_te) { +@@ -654,6 +662,86 @@ handle_failcount_op(xmlNode * stored_msg) + return I_NULL; + } + ++static enum crmd_fsa_input ++handle_lrm_delete(xmlNode *stored_msg) ++{ ++ const char *mode = NULL; ++ xmlNode *msg_data = get_message_xml(stored_msg, F_CRM_DATA); ++ ++ CRM_CHECK(msg_data != NULL, return I_NULL); ++ ++ /* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to ++ * relay the operation to the affected node, which will unregister the ++ * resource from the local executor, clear the resource's history from the ++ * CIB, and do some bookkeeping in the controller. ++ * ++ * However, if the affected node is offline, the client will specify ++ * mode="cib" which means the controller receiving the operation should ++ * clear the resource's history from the CIB and nothing else. This is used ++ * to clear shutdown locks. ++ */ ++ mode = crm_element_value(msg_data, PCMK__XA_MODE); ++ if ((mode == NULL) || strcmp(mode, XML_TAG_CIB)) { ++ // Relay to affected node ++ crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); ++ return I_ROUTER; ++ ++ } else { ++ // Delete CIB history locally (compare with do_lrm_delete()) ++ const char *from_sys = NULL; ++ const char *user_name = NULL; ++ const char *rsc_id = NULL; ++ const char *node = NULL; ++ xmlNode *rsc_xml = NULL; ++ int rc = pcmk_rc_ok; ++ ++ rsc_xml = first_named_child(msg_data, XML_CIB_TAG_RESOURCE); ++ CRM_CHECK(rsc_xml != NULL, return I_NULL); ++ ++ rsc_id = ID(rsc_xml); ++ from_sys = crm_element_value(stored_msg, F_CRM_SYS_FROM); ++ node = crm_element_value(msg_data, XML_LRM_ATTR_TARGET); ++#if ENABLE_ACL ++ user_name = crm_acl_get_set_user(stored_msg, F_CRM_USER, NULL); ++#endif ++ crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s " ++ "(clearing CIB resource history only)", rsc_id, node, ++ (user_name? " for user " : ""), (user_name? user_name : "")); ++#if ENABLE_ACL ++ rc = controld_delete_resource_history(rsc_id, node, user_name, ++ cib_dryrun|cib_sync_call); ++#endif ++ if (rc == pcmk_rc_ok) { ++ rc = controld_delete_resource_history(rsc_id, node, user_name, ++ crmd_cib_smart_opt()); ++ } ++ ++ // Notify client if not from graph (compare with notify_deleted()) ++ if (from_sys && strcmp(from_sys, CRM_SYSTEM_TENGINE)) { ++ lrmd_event_data_t *op = NULL; ++ const char *from_host = crm_element_value(stored_msg, ++ F_CRM_HOST_FROM); ++ const char *transition = crm_element_value(msg_data, ++ XML_ATTR_TRANSITION_KEY); ++ ++ crm_info("Notifying %s on %s that %s was%s deleted", ++ from_sys, (from_host? from_host : "local node"), rsc_id, ++ ((rc == pcmk_rc_ok)? "" : " not")); ++ op = lrmd_new_event(rsc_id, CRMD_ACTION_DELETE, 0); ++ op->type = lrmd_event_exec_complete; ++ op->user_data = strdup(transition? transition : FAKE_TE_ID); ++ op->params = crm_str_table_new(); ++ g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), ++ strdup(CRM_FEATURE_SET)); ++ controld_rc2event(op, rc); ++ controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); ++ lrmd_free_event(op); ++ controld_trigger_delete_refresh(from_sys, rsc_id); ++ } ++ return I_NULL; ++ } ++} ++ + /*! + * \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache + * +@@ -913,9 +1001,12 @@ handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause) + crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); + return I_JOIN_RESULT; + +- } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0 +- || strcmp(op, CRM_OP_LRM_FAIL) == 0 +- || strcmp(op, CRM_OP_LRM_REFRESH) == 0 || strcmp(op, CRM_OP_REPROBE) == 0) { ++ } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) { ++ return handle_lrm_delete(stored_msg); ++ ++ } else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0) ++ || (strcmp(op, CRM_OP_LRM_REFRESH) == 0) ++ || (strcmp(op, CRM_OP_REPROBE) == 0)) { + + crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); + return I_ROUTER; +diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c +index 948bd64..59e0b5a 100644 +--- a/daemons/controld/controld_te_actions.c ++++ b/daemons/controld/controld_te_actions.c +@@ -107,6 +107,13 @@ te_crm_command(crm_graph_t * graph, crm_action_t * action) + + if (!router_node) { + router_node = on_node; ++ if (safe_str_eq(task, CRM_OP_LRM_DELETE)) { ++ const char *mode = crm_element_value(action->xml, PCMK__XA_MODE); ++ ++ if (safe_str_eq(mode, XML_TAG_CIB)) { ++ router_node = fsa_our_uname; ++ } ++ } + } + + CRM_CHECK(on_node != NULL && strlen(on_node) != 0, +diff --git a/include/crm_internal.h b/include/crm_internal.h +index 1f25686..2fa53dd 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -216,6 +216,8 @@ pid_t pcmk_locate_sbd(void); + # define ATTRD_OP_SYNC_RESPONSE "sync-response" + # define ATTRD_OP_CLEAR_FAILURE "clear-failure" + ++# define PCMK__XA_MODE "mode" ++ + # define PCMK_ENV_PHYSICAL_HOST "physical_host" + + +-- +1.8.3.1 + diff --git a/SOURCES/018-shutdown-lock.patch b/SOURCES/018-shutdown-lock.patch new file mode 100644 index 0000000..99ad90e --- /dev/null +++ b/SOURCES/018-shutdown-lock.patch @@ -0,0 +1,56 @@ +From 457e231256feb0bdcf206209e03f0875f50d03b3 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 14 Jan 2020 16:24:08 -0600 +Subject: [PATCH 14/18] Low: tools: for down nodes, crm_resource --refresh + should clear CIB only + +This provides a mechanism to manually clear shutdown locks. +--- + tools/crm_resource_runtime.c | 16 +++++++++++++--- + 1 file changed, 13 insertions(+), 3 deletions(-) + +diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c +index ed5fb03..e89b572 100644 +--- a/tools/crm_resource_runtime.c ++++ b/tools/crm_resource_runtime.c +@@ -473,6 +473,7 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, + const char *rsc_type = NULL; + xmlNode *params = NULL; + xmlNode *msg_data = NULL; ++ bool cib_only = false; + resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); + + if (rsc == NULL) { +@@ -504,10 +505,14 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, + } + + if (!(node->details->online)) { +- CMD_ERR("Node %s is not online", host_uname); +- return -ENOTCONN; ++ if (strcmp(op, CRM_OP_LRM_DELETE) == 0) { ++ cib_only = true; ++ } else { ++ CMD_ERR("Node %s is not online", host_uname); ++ return -ENOTCONN; ++ } + } +- if (pe__is_guest_or_remote_node(node)) { ++ if (!cib_only && pe__is_guest_or_remote_node(node)) { + node = pe__current_node(node->details->remote_rsc); + if (node == NULL) { + CMD_ERR("No cluster connection to Pacemaker Remote node %s detected", +@@ -533,6 +538,11 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, + crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); + } + ++ if (cib_only) { ++ // Indicate that only the CIB needs to be cleaned ++ crm_xml_add(msg_data, PCMK__XA_MODE, XML_TAG_CIB); ++ } ++ + xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE); + if (rsc->clone_name) { + crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->clone_name); +-- +1.8.3.1 + diff --git a/SOURCES/019-shutdown-lock.patch b/SOURCES/019-shutdown-lock.patch new file mode 100644 index 0000000..f94dc58 --- /dev/null +++ b/SOURCES/019-shutdown-lock.patch @@ -0,0 +1,221 @@ +From cf1e90ffe764f3639799206db9444ae32821386b Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 10 Jan 2020 18:18:07 -0600 +Subject: [PATCH 15/18] Low: scheduler: clear resource history when appropriate + +Tell the controller to clear resource history from the CIB when a resource has +a shutdown lock that expired or was cancelled because the resource is already +active elsewhere. +--- + include/crm/pengine/internal.h | 4 +++- + include/crm/pengine/pe_types.h | 4 +++- + lib/pacemaker/pcmk_sched_allocate.c | 1 + + lib/pacemaker/pcmk_sched_graph.c | 16 ++++++++++++++-- + lib/pacemaker/pcmk_sched_native.c | 6 ++++++ + lib/pengine/unpack.c | 1 + + lib/pengine/utils.c | 34 ++++++++++++++++++++++++++++++++-- + 7 files changed, 60 insertions(+), 6 deletions(-) + +diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h +index 119624d..bc2c70e 100644 +--- a/include/crm/pengine/internal.h ++++ b/include/crm/pengine/internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2019 the Pacemaker project contributors ++ * Copyright 2004-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -435,5 +435,7 @@ void pe__unpack_dataset_nvpairs(xmlNode *xml_obj, const char *set_name, + pe_working_set_t *data_set); + + bool pe__resource_is_disabled(pe_resource_t *rsc); ++pe_action_t *pe__clear_resource_history(pe_resource_t *rsc, pe_node_t *node, ++ pe_working_set_t *data_set); + + #endif +diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h +index 123d8ef..572787b 100644 +--- a/include/crm/pengine/pe_types.h ++++ b/include/crm/pengine/pe_types.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2019 the Pacemaker project contributors ++ * Copyright 2004-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -287,6 +287,8 @@ enum pe_action_flags { + pe_action_reschedule = 0x02000, + pe_action_tracking = 0x04000, + pe_action_dedup = 0x08000, //! Internal state tracking when creating graph ++ ++ pe_action_dc = 0x10000, //! Action may run on DC instead of target + }; + /* *INDENT-ON* */ + +diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c +index 884e1bd..195d055 100644 +--- a/lib/pacemaker/pcmk_sched_allocate.c ++++ b/lib/pacemaker/pcmk_sched_allocate.c +@@ -1026,6 +1026,7 @@ apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set) + pe_rsc_info(rsc, + "Cancelling shutdown lock because %s is already active", + rsc->id); ++ pe__clear_resource_history(rsc, rsc->lock_node, data_set); + rsc->lock_node = NULL; + rsc->lock_time = 0; + } +diff --git a/lib/pacemaker/pcmk_sched_graph.c b/lib/pacemaker/pcmk_sched_graph.c +index 2861f3d..355ffca 100644 +--- a/lib/pacemaker/pcmk_sched_graph.c ++++ b/lib/pacemaker/pcmk_sched_graph.c +@@ -586,10 +586,11 @@ update_action(pe_action_t *then, pe_working_set_t *data_set) + + /* 'then' is required, so we must abandon 'first' + * (e.g. a required stop cancels any reload). +- * Only used with reload actions as 'first'. + */ + set_bit(other->action->flags, pe_action_optional); +- clear_bit(first->rsc->flags, pe_rsc_reload); ++ if (!strcmp(first->task, CRMD_ACTION_RELOAD)) { ++ clear_bit(first->rsc->flags, pe_rsc_reload); ++ } + } + + if (first->rsc && then->rsc && (first->rsc != then->rsc) +@@ -1039,6 +1040,11 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set) + } else if (safe_str_eq(action->task, CRM_OP_LRM_REFRESH)) { + action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); + ++ } else if (safe_str_eq(action->task, CRM_OP_LRM_DELETE)) { ++ // CIB-only clean-up for shutdown locks ++ action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); ++ crm_xml_add(action_xml, PCMK__XA_MODE, XML_TAG_CIB); ++ + /* } else if(safe_str_eq(action->task, RSC_PROBED)) { */ + /* action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); */ + +@@ -1051,6 +1057,7 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set) + + } else { + action_xml = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); ++ + #if ENABLE_VERSIONED_ATTRS + rsc_details = pe_rsc_action_details(action); + #endif +@@ -1392,6 +1399,11 @@ should_dump_action(pe_action_t *action) + log_action(LOG_DEBUG, "Unallocated action", action, false); + return false; + ++ } else if (is_set(action->flags, pe_action_dc)) { ++ crm_trace("Action %s (%d) should be dumped: " ++ "can run on DC instead of %s", ++ action->uuid, action->id, action->node->details->uname); ++ + } else if (pe__is_guest_node(action->node) + && !action->node->details->remote_requires_reset) { + crm_trace("Action %s (%d) should be dumped: " +diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c +index 9ebdd35..714a7a0 100644 +--- a/lib/pacemaker/pcmk_sched_native.c ++++ b/lib/pacemaker/pcmk_sched_native.c +@@ -1403,6 +1403,12 @@ native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) + pe_order_runnable_left, data_set); + } + ++ // Don't clear resource history if probing on same node ++ custom_action_order(rsc, generate_op_key(rsc->id, CRM_OP_LRM_DELETE, 0), ++ NULL, rsc, generate_op_key(rsc->id, RSC_STATUS, 0), ++ NULL, pe_order_same_node|pe_order_then_cancels_first, ++ data_set); ++ + // Certain checks need allowed nodes + if (check_unfencing || check_utilization || rsc->container) { + allowed_nodes = allowed_nodes_as_list(rsc, data_set); +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 5139e60..87edc83 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2218,6 +2218,7 @@ unpack_shutdown_lock(xmlNode *rsc_entry, pe_resource_t *rsc, pe_node_t *node, + > (lock_time + data_set->shutdown_lock))) { + pe_rsc_info(rsc, "Shutdown lock for %s on %s expired", + rsc->id, node->details->uname); ++ pe__clear_resource_history(rsc, node, data_set); + } else { + rsc->lock_node = node; + rsc->lock_time = lock_time; +diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c +index 586d92c..b61455d 100644 +--- a/lib/pengine/utils.c ++++ b/lib/pengine/utils.c +@@ -520,6 +520,11 @@ custom_action(resource_t * rsc, char *key, const char *task, + } + action->uuid = strdup(key); + ++ if (safe_str_eq(task, CRM_OP_LRM_DELETE)) { ++ // Resource history deletion for a node can be done on the DC ++ pe_set_action_bit(action, pe_action_dc); ++ } ++ + pe_set_action_bit(action, pe_action_runnable); + if (optional) { + pe_set_action_bit(action, pe_action_optional); +@@ -588,7 +593,8 @@ custom_action(resource_t * rsc, char *key, const char *task, + pe_set_action_bit(action, pe_action_optional); + /* action->runnable = FALSE; */ + +- } else if (action->node->details->online == FALSE ++ } else if (is_not_set(action->flags, pe_action_dc) ++ && !(action->node->details->online) + && (!pe__is_guest_node(action->node) + || action->node->details->remote_requires_reset)) { + pe_clear_action_bit(action, pe_action_runnable); +@@ -600,7 +606,8 @@ custom_action(resource_t * rsc, char *key, const char *task, + pe_fence_node(data_set, action->node, "resource actions are unrunnable"); + } + +- } else if (action->node->details->pending) { ++ } else if (is_not_set(action->flags, pe_action_dc) ++ && action->node->details->pending) { + pe_clear_action_bit(action, pe_action_runnable); + do_crm_log(warn_level, "Action %s on %s is unrunnable (pending)", + action->uuid, action->node->details->uname); +@@ -714,6 +721,8 @@ unpack_operation_on_fail(action_t * action) + + value = on_fail; + } ++ } else if (safe_str_eq(action->task, CRM_OP_LRM_DELETE)) { ++ value = "ignore"; + } + + return value; +@@ -2595,3 +2604,24 @@ pe__resource_is_disabled(pe_resource_t *rsc) + } + return false; + } ++ ++/*! ++ * \internal ++ * \brief Create an action to clear a resource's history from CIB ++ * ++ * \param[in] rsc Resource to clear ++ * \param[in] node Node to clear history on ++ * ++ * \return New action to clear resource history ++ */ ++pe_action_t * ++pe__clear_resource_history(pe_resource_t *rsc, pe_node_t *node, ++ pe_working_set_t *data_set) ++{ ++ char *key = NULL; ++ ++ CRM_ASSERT(rsc && node); ++ key = generate_op_key(rsc->id, CRM_OP_LRM_DELETE, 0); ++ return custom_action(rsc, key, CRM_OP_LRM_DELETE, node, FALSE, TRUE, ++ data_set); ++} +-- +1.8.3.1 + diff --git a/SOURCES/020-shutdown-lock.patch b/SOURCES/020-shutdown-lock.patch new file mode 100644 index 0000000..f650b81 --- /dev/null +++ b/SOURCES/020-shutdown-lock.patch @@ -0,0 +1,32 @@ +From 16bcad136dc004b7c7bb9f5044c7ef488c441701 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 21 Nov 2019 15:39:42 -0600 +Subject: [PATCH 16/18] Feature: controller: bump feature set for shutdown-lock + +--- + include/crm/crm.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/include/crm/crm.h b/include/crm/crm.h +index cbf72d3..d2ffb61 100644 +--- a/include/crm/crm.h ++++ b/include/crm/crm.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2019 the Pacemaker project contributors ++ * Copyright 2004-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -51,7 +51,7 @@ extern "C" { + * >=3.0.13: Fail counts include operation name and interval + * >=3.2.0: DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED + */ +-# define CRM_FEATURE_SET "3.2.0" ++# define CRM_FEATURE_SET "3.3.0" + + # define EOS '\0' + # define DIMOF(a) ((int) (sizeof(a)/sizeof(a[0])) ) +-- +1.8.3.1 + diff --git a/SOURCES/021-shutdown-lock.patch b/SOURCES/021-shutdown-lock.patch new file mode 100644 index 0000000..cdd9dba --- /dev/null +++ b/SOURCES/021-shutdown-lock.patch @@ -0,0 +1,738 @@ +From a9fdae8b3acd9a271d04f98f9c4e230bfa74efd3 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 6 Jan 2020 16:19:12 -0600 +Subject: [PATCH 17/18] Test: scheduler: add regression tests for shutdown + locks + +--- + cts/cts-scheduler.in | 4 +- + cts/scheduler/shutdown-lock-expiration.dot | 11 ++ + cts/scheduler/shutdown-lock-expiration.exp | 68 +++++++++ + cts/scheduler/shutdown-lock-expiration.scores | 17 +++ + cts/scheduler/shutdown-lock-expiration.summary | 31 ++++ + cts/scheduler/shutdown-lock-expiration.xml | 187 +++++++++++++++++++++++++ + cts/scheduler/shutdown-lock.dot | 11 ++ + cts/scheduler/shutdown-lock.exp | 64 +++++++++ + cts/scheduler/shutdown-lock.scores | 17 +++ + cts/scheduler/shutdown-lock.summary | 31 ++++ + cts/scheduler/shutdown-lock.xml | 186 ++++++++++++++++++++++++ + 11 files changed, 626 insertions(+), 1 deletion(-) + create mode 100644 cts/scheduler/shutdown-lock-expiration.dot + create mode 100644 cts/scheduler/shutdown-lock-expiration.exp + create mode 100644 cts/scheduler/shutdown-lock-expiration.scores + create mode 100644 cts/scheduler/shutdown-lock-expiration.summary + create mode 100644 cts/scheduler/shutdown-lock-expiration.xml + create mode 100644 cts/scheduler/shutdown-lock.dot + create mode 100644 cts/scheduler/shutdown-lock.exp + create mode 100644 cts/scheduler/shutdown-lock.scores + create mode 100644 cts/scheduler/shutdown-lock.summary + create mode 100644 cts/scheduler/shutdown-lock.xml + +diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in +index 8fa16fb..f2957ba 100644 +--- a/cts/cts-scheduler.in ++++ b/cts/cts-scheduler.in +@@ -5,7 +5,7 @@ + # Pacemaker targets compatibility with Python 2.7 and 3.2+ + from __future__ import print_function, unicode_literals, absolute_import, division + +-__copyright__ = "Copyright 2004-2019 the Pacemaker project contributors" ++__copyright__ = "Copyright 2004-2020 the Pacemaker project contributors" + __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" + + import io +@@ -956,6 +956,8 @@ TESTS = [ + [ + [ "resource-discovery", "Exercises resource-discovery location constraint option" ], + [ "rsc-discovery-per-node", "Disable resource discovery per node" ], ++ [ "shutdown-lock", "Ensure shutdown lock works properly" ], ++ [ "shutdown-lock-expiration", "Ensure shutdown lock expiration works properly" ], + ], + + # @TODO: If pacemaker implements versioned attributes, uncomment these tests +diff --git a/cts/scheduler/shutdown-lock-expiration.dot b/cts/scheduler/shutdown-lock-expiration.dot +new file mode 100644 +index 0000000..ee99079 +--- /dev/null ++++ b/cts/scheduler/shutdown-lock-expiration.dot +@@ -0,0 +1,11 @@ ++ digraph "g" { ++"Fencing_monitor_120000 node3" [ style=bold color="green" fontcolor="black"] ++"Fencing_start_0 node3" -> "Fencing_monitor_120000 node3" [ style = bold] ++"Fencing_start_0 node3" [ style=bold color="green" fontcolor="black"] ++"Fencing_stop_0 node3" -> "Fencing_start_0 node3" [ style = bold] ++"Fencing_stop_0 node3" [ style=bold color="green" fontcolor="black"] ++"rsc2_lrm_delete_0 node2" [ style=bold color="green" fontcolor="black"] ++"rsc2_monitor_10000 node4" [ style=bold color="green" fontcolor="black"] ++"rsc2_start_0 node4" -> "rsc2_monitor_10000 node4" [ style = bold] ++"rsc2_start_0 node4" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/shutdown-lock-expiration.exp b/cts/scheduler/shutdown-lock-expiration.exp +new file mode 100644 +index 0000000..465f12b +--- /dev/null ++++ b/cts/scheduler/shutdown-lock-expiration.exp +@@ -0,0 +1,68 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/shutdown-lock-expiration.scores b/cts/scheduler/shutdown-lock-expiration.scores +new file mode 100644 +index 0000000..e5d435d +--- /dev/null ++++ b/cts/scheduler/shutdown-lock-expiration.scores +@@ -0,0 +1,17 @@ ++Allocation scores: ++Using the original execution date of: 2020-01-06 22:11:40Z ++native_color: Fencing allocation score on node1: 0 ++native_color: Fencing allocation score on node2: 0 ++native_color: Fencing allocation score on node3: 0 ++native_color: Fencing allocation score on node4: 0 ++native_color: Fencing allocation score on node5: 0 ++native_color: rsc1 allocation score on node1: INFINITY ++native_color: rsc1 allocation score on node2: -INFINITY ++native_color: rsc1 allocation score on node3: -INFINITY ++native_color: rsc1 allocation score on node4: -INFINITY ++native_color: rsc1 allocation score on node5: -INFINITY ++native_color: rsc2 allocation score on node1: 0 ++native_color: rsc2 allocation score on node2: INFINITY ++native_color: rsc2 allocation score on node3: 0 ++native_color: rsc2 allocation score on node4: 0 ++native_color: rsc2 allocation score on node5: 0 +diff --git a/cts/scheduler/shutdown-lock-expiration.summary b/cts/scheduler/shutdown-lock-expiration.summary +new file mode 100644 +index 0000000..08c93aa +--- /dev/null ++++ b/cts/scheduler/shutdown-lock-expiration.summary +@@ -0,0 +1,31 @@ ++Using the original execution date of: 2020-01-06 22:11:40Z ++ ++Current cluster status: ++Online: [ node3 node4 node5 ] ++OFFLINE: [ node1 node2 ] ++ ++ Fencing (stonith:fence_xvm): Started node3 ++ rsc1 (ocf::pacemaker:Dummy): Stopped node1 (LOCKED) ++ rsc2 (ocf::pacemaker:Dummy): Stopped ++ ++Transition Summary: ++ * Restart Fencing ( node3 ) due to resource definition change ++ * Start rsc2 ( node4 ) ++ ++Executing cluster transition: ++ * Resource action: Fencing stop on node3 ++ * Resource action: Fencing start on node3 ++ * Resource action: Fencing monitor=120000 on node3 ++ * Resource action: rsc2 start on node4 ++ * Cluster action: lrm_delete for rsc2 on node2 ++ * Resource action: rsc2 monitor=10000 on node4 ++Using the original execution date of: 2020-01-06 22:11:40Z ++ ++Revised cluster status: ++Online: [ node3 node4 node5 ] ++OFFLINE: [ node1 node2 ] ++ ++ Fencing (stonith:fence_xvm): Started node3 ++ rsc1 (ocf::pacemaker:Dummy): Stopped node1 (LOCKED) ++ rsc2 (ocf::pacemaker:Dummy): Started node4 ++ +diff --git a/cts/scheduler/shutdown-lock-expiration.xml b/cts/scheduler/shutdown-lock-expiration.xml +new file mode 100644 +index 0000000..26f720e +--- /dev/null ++++ b/cts/scheduler/shutdown-lock-expiration.xml +@@ -0,0 +1,187 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/shutdown-lock.dot b/cts/scheduler/shutdown-lock.dot +new file mode 100644 +index 0000000..0a7d8c3 +--- /dev/null ++++ b/cts/scheduler/shutdown-lock.dot +@@ -0,0 +1,11 @@ ++ digraph "g" { ++"Fencing_monitor_120000 node3" [ style=bold color="green" fontcolor="black"] ++"Fencing_start_0 node3" -> "Fencing_monitor_120000 node3" [ style = bold] ++"Fencing_start_0 node3" [ style=bold color="green" fontcolor="black"] ++"Fencing_stop_0 node1" -> "Fencing_start_0 node3" [ style = bold] ++"Fencing_stop_0 node1" -> "do_shutdown node1" [ style = bold] ++"Fencing_stop_0 node1" [ style=bold color="green" fontcolor="black"] ++"do_shutdown node1" [ style=bold color="green" fontcolor="black"] ++"rsc1_stop_0 node1" -> "do_shutdown node1" [ style = bold] ++"rsc1_stop_0 node1" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/shutdown-lock.exp b/cts/scheduler/shutdown-lock.exp +new file mode 100644 +index 0000000..e8bf9d8 +--- /dev/null ++++ b/cts/scheduler/shutdown-lock.exp +@@ -0,0 +1,64 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/shutdown-lock.scores b/cts/scheduler/shutdown-lock.scores +new file mode 100644 +index 0000000..e09ebfb +--- /dev/null ++++ b/cts/scheduler/shutdown-lock.scores +@@ -0,0 +1,17 @@ ++Allocation scores: ++Using the original execution date of: 2020-01-06 21:59:11Z ++native_color: Fencing allocation score on node1: 0 ++native_color: Fencing allocation score on node2: 0 ++native_color: Fencing allocation score on node3: 0 ++native_color: Fencing allocation score on node4: 0 ++native_color: Fencing allocation score on node5: 0 ++native_color: rsc1 allocation score on node1: INFINITY ++native_color: rsc1 allocation score on node2: -INFINITY ++native_color: rsc1 allocation score on node3: -INFINITY ++native_color: rsc1 allocation score on node4: -INFINITY ++native_color: rsc1 allocation score on node5: -INFINITY ++native_color: rsc2 allocation score on node1: -INFINITY ++native_color: rsc2 allocation score on node2: INFINITY ++native_color: rsc2 allocation score on node3: -INFINITY ++native_color: rsc2 allocation score on node4: -INFINITY ++native_color: rsc2 allocation score on node5: -INFINITY +diff --git a/cts/scheduler/shutdown-lock.summary b/cts/scheduler/shutdown-lock.summary +new file mode 100644 +index 0000000..6ed56d1 +--- /dev/null ++++ b/cts/scheduler/shutdown-lock.summary +@@ -0,0 +1,31 @@ ++Using the original execution date of: 2020-01-06 21:59:11Z ++ ++Current cluster status: ++Online: [ node1 node3 node4 node5 ] ++OFFLINE: [ node2 ] ++ ++ Fencing (stonith:fence_xvm): Started node1 ++ rsc1 (ocf::pacemaker:Dummy): Started node1 ++ rsc2 (ocf::pacemaker:Dummy): Stopped node2 (LOCKED) ++ ++Transition Summary: ++ * Shutdown node1 ++ * Move Fencing ( node1 -> node3 ) ++ * Stop rsc1 ( node1 ) due to node availability ++ ++Executing cluster transition: ++ * Resource action: Fencing stop on node1 ++ * Resource action: rsc1 stop on node1 ++ * Cluster action: do_shutdown on node1 ++ * Resource action: Fencing start on node3 ++ * Resource action: Fencing monitor=120000 on node3 ++Using the original execution date of: 2020-01-06 21:59:11Z ++ ++Revised cluster status: ++Online: [ node1 node3 node4 node5 ] ++OFFLINE: [ node2 ] ++ ++ Fencing (stonith:fence_xvm): Started node3 ++ rsc1 (ocf::pacemaker:Dummy): Stopped ++ rsc2 (ocf::pacemaker:Dummy): Stopped node2 (LOCKED) ++ +diff --git a/cts/scheduler/shutdown-lock.xml b/cts/scheduler/shutdown-lock.xml +new file mode 100644 +index 0000000..ec6db30 +--- /dev/null ++++ b/cts/scheduler/shutdown-lock.xml +@@ -0,0 +1,186 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +1.8.3.1 + diff --git a/SOURCES/022-shutdown-lock.patch b/SOURCES/022-shutdown-lock.patch new file mode 100644 index 0000000..cfcef11 --- /dev/null +++ b/SOURCES/022-shutdown-lock.patch @@ -0,0 +1,51 @@ +From 5656b7d486569702ea6f3fe695c2fba366c970ac Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 12 Dec 2019 09:26:00 -0600 +Subject: [PATCH 18/18] Doc: Pacemaker Explained: document shutdown lock + options + +--- + doc/Pacemaker_Explained/en-US/Ch-Options.txt | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/doc/Pacemaker_Explained/en-US/Ch-Options.txt b/doc/Pacemaker_Explained/en-US/Ch-Options.txt +index f864987..35856aa 100644 +--- a/doc/Pacemaker_Explained/en-US/Ch-Options.txt ++++ b/doc/Pacemaker_Explained/en-US/Ch-Options.txt +@@ -389,6 +389,33 @@ rules with +date_spec+ are only guaranteed to be checked this often, and it + also serves as a fail-safe for certain classes of scheduler bugs. A value of 0 + disables this polling; positive values are a time interval. + ++| shutdown-lock | false | ++The default of false allows active resources to be recovered elsewhere when ++their node is cleanly shut down, which is what the vast majority of users will ++want. However, some users prefer to make resources highly available only for ++failures, with no recovery for clean shutdowns. If this option is true, ++resources active on a node when it is cleanly shut down are kept "locked" to ++that node (not allowed to run elsewhere) until they start again on that node ++after it rejoins (or for at most shutdown-lock-limit, if set). Stonith ++resources and Pacemaker Remote connections are never locked. Clone and bundle ++instances and the master role of promotable clones are currently never locked, ++though support could be added in a future release. Locks may be manually ++cleared using the `--refresh` option of `crm_resource` (both the resource and ++node must be specified; this works with remote nodes if their connection ++resource's target-role is set to Stopped, but not if Pacemaker Remote is ++stopped on the remote node without disabling the connection resource). ++indexterm:[shutdown-lock,Cluster Option] ++indexterm:[Cluster,Option,shutdown-lock] ++ ++| shutdown-lock-limit | 0 | ++If shutdown-lock is true, and this is set to a nonzero time duration, locked ++resources will be allowed to start after this much time has passed since the ++node shutdown was initiated, even if the node has not rejoined. (This works ++with remote nodes only if their connection resource's target-role is set to ++Stopped.) ++indexterm:[shutdown-lock-limit,Cluster Option] ++indexterm:[Cluster,Option,shutdown-lock-limit] ++ + | remove-after-stop | FALSE | + indexterm:[remove-after-stop,Cluster Option] + indexterm:[Cluster,Option,remove-after-stop] +-- +1.8.3.1 + diff --git a/SOURCES/023-curses.patch b/SOURCES/023-curses.patch new file mode 100644 index 0000000..c1d9a91 --- /dev/null +++ b/SOURCES/023-curses.patch @@ -0,0 +1,27 @@ +From 426f06cc088d11d6db0c45b434e5ce6b69da78b4 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 2 Jan 2020 15:08:58 -0500 +Subject: [PATCH] Fix: tools: Fix definition of curses_indented_printf. + +The placeholder version that is built if curses is not enabled does not +have a type that matches the header file. Correct that. +--- + tools/crm_mon_curses.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/crm_mon_curses.c b/tools/crm_mon_curses.c +index c0dbedb..ecd0584 100644 +--- a/tools/crm_mon_curses.c ++++ b/tools/crm_mon_curses.c +@@ -368,7 +368,7 @@ curses_indented_vprintf(pcmk__output_t *out, const char *format, va_list args) { + + G_GNUC_PRINTF(2, 3) + void +-curses_indented_printf(pcmk__output_t *out, const char *format, va_list args) { ++curses_indented_printf(pcmk__output_t *out, const char *format, ...) { + return; + } + +-- +1.8.3.1 + diff --git a/SOURCES/024-crm_mon-cgi.patch b/SOURCES/024-crm_mon-cgi.patch new file mode 100644 index 0000000..c6743eb --- /dev/null +++ b/SOURCES/024-crm_mon-cgi.patch @@ -0,0 +1,33 @@ +From 5b98dd71cef867a115a1b07fca2351ba430baf08 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 10 Jan 2020 09:54:59 -0500 +Subject: [PATCH] Fix: tools: Re-enable CGI output from crm_mon. + +The CGI header was not being written out because "false" was being +passed to the finish function. That was being passed because we didn't +want the HTML to be printed out without the refresh header. The fix is +just to s/false/true, and change the order so the extra header is added +first. +--- + tools/crm_mon.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/tools/crm_mon.c b/tools/crm_mon.c +index c1dcf29..4b28bef 100644 +--- a/tools/crm_mon.c ++++ b/tools/crm_mon.c +@@ -1854,10 +1854,9 @@ static void + handle_html_output(crm_exit_t exit_code) { + xmlNodePtr html = NULL; + +- out->finish(out, exit_code, false, (void **) &html); + pcmk__html_add_header(html, "meta", "http-equiv", "refresh", "content", + crm_itoa(options.reconnect_msec/1000), NULL); +- htmlDocDump(out->dest, html->doc); ++ out->finish(out, exit_code, true, (void **) &html); + } + + /* +-- +1.8.3.1 + diff --git a/SOURCES/025-clear-attrs.patch b/SOURCES/025-clear-attrs.patch new file mode 100644 index 0000000..842656c --- /dev/null +++ b/SOURCES/025-clear-attrs.patch @@ -0,0 +1,37 @@ +From 01b463bd715d48dde5bf76ca3a2e78e31f0ffaa1 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 21 Jan 2020 17:25:57 -0600 +Subject: [PATCH] Fix: controller: clear leaving node's transient attributes + even if there is no DC + +--- + daemons/controld/controld_callbacks.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c +index f7e3db2..21f831a 100644 +--- a/daemons/controld/controld_callbacks.c ++++ b/daemons/controld/controld_callbacks.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2019 the Pacemaker project contributors ++ * Copyright 2004-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -205,7 +205,11 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d + cib_scope_local); + } + +- } else if(AM_I_DC) { ++ } else if (AM_I_DC || (fsa_our_dc == NULL)) { ++ /* This only needs to be done once, so normally the DC should do ++ * it. However if there is no DC, every node must do it, since ++ * there is no other way to ensure some one node does it. ++ */ + if (appeared) { + te_trigger_stonith_history_sync(FALSE); + } else { +-- +1.8.3.1 + diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec new file mode 100644 index 0000000..dab302c --- /dev/null +++ b/SPECS/pacemaker.spec @@ -0,0 +1,1295 @@ +# Globals and defines to control package behavior (configure these as desired) + +## User and group to use for nonprivileged services +%global uname hacluster +%global gname haclient + +## Where to install Pacemaker documentation +%if 0%{?suse_version} > 0 +%global pcmk_docdir %{_docdir}/%{name}-%{version} +%else +%if 0%{?rhel} > 7 +%global pcmk_docdir %{_docdir}/%{name}-doc +%else +%global pcmk_docdir %{_docdir}/%{name} +%endif +%endif + +## GitHub entity that distributes source (for ease of using a fork) +%global github_owner ClusterLabs + +## Upstream pacemaker version, and its package version (specversion +## can be incremented to build packages reliably considered "newer" +## than previously built packages with the same pcmkversion) +%global pcmkversion 2.0.3 +%global specversion 5 + +## Upstream commit (or git tag, such as "Pacemaker-" plus the +## {pcmkversion} macro for an official release) to use for this package +%global commit 4b1f869f0f64ef0d248b6aa4781d38ecccf83318 +## Since git v2.11, the extent of abbreviation is autoscaled by default +## (used to be constant of 7), so we need to convey it for non-tags, too. +%global commit_abbrev 7 + +## Python major version to use (2, 3, or 0 for auto-detect) +%global python_major 0 + +## Nagios source control identifiers +%global nagios_name nagios-agents-metadata +%global nagios_hash 105ab8a + + +# Define globals for convenient use later + +## Workaround to use parentheses in other globals +%global lparen ( +%global rparen ) + +## Short version of git commit +%define shortcommit %(c=%{commit}; case ${c} in + Pacemaker-*%{rparen} echo ${c:10};; + *%{rparen} echo ${c:0:%{commit_abbrev}};; esac) + +## Whether this is a tagged release +%define tag_release %([ %{commit} != Pacemaker-%{shortcommit} ]; echo $?) + +## Whether this is a release candidate (in case of a tagged release) +%define pre_release %([ "%{tag_release}" -eq 0 ] || { + case "%{shortcommit}" in *-rc[[:digit:]]*%{rparen} false;; + esac; }; echo $?) + +## Heuristic used to infer bleeding-edge deployments that are +## less likely to have working versions of the documentation tools +%define bleeding %(test ! -e /etc/yum.repos.d/fedora-rawhide.repo; echo $?) + +## Whether this platform defaults to using systemd as an init system +## (needs to be evaluated prior to BuildRequires being enumerated and +## installed as it's intended to conditionally select some of these, and +## for that there are only few indicators with varying reliability: +## - presence of systemd-defined macros (when building in a full-fledged +## environment, which is not the case with ordinary mock-based builds) +## - systemd-aware rpm as manifested with the presence of particular +## macro (rpm itself will trivially always be present when building) +## - existence of /usr/lib/os-release file, which is something heavily +## propagated by systemd project +## - when not good enough, there's always a possibility to check +## particular distro-specific macros (incl. version comparison) +%define systemd_native (%{?_unitdir:1}%{!?_unitdir:0}%{nil \ + } || %{?__transaction_systemd_inhibit:1}%{!?__transaction_systemd_inhibit:0}%{nil \ + } || %(test -f /usr/lib/os-release; test $? -ne 0; echo $?)) + +# Even though we pass @SYSTEM here, Pacemaker is still an exception to the +# crypto policies because it adds "+ANON-DH" for CIB remote commands and +# "+DHE-PSK:+PSK" for Pacemaker Remote connections. This is currently +# required for the respective functionality. +%if 0%{?fedora} > 20 || 0%{?rhel} > 7 +## Base GnuTLS cipher priorities (presumably only the initial, required keyword) +## overridable with "rpmbuild --define 'pcmk_gnutls_priorities PRIORITY-SPEC'" +%define gnutls_priorities %{?pcmk_gnutls_priorities}%{!?pcmk_gnutls_priorities:@SYSTEM} +%endif + +%if !%{defined _rundir} +%if 0%{?fedora} >= 15 || 0%{?rhel} >= 7 || 0%{?suse_version} >= 1200 +%define _rundir /run +%else +%define _rundir /var/run +%endif +%endif + +%if 0%{?fedora} > 22 || 0%{?rhel} > 7 +%global supports_recommends 1 +%endif + +## Different distros name certain packages differently +## (note: corosync libraries also differ, but all provide corosync-devel) +%if 0%{?suse_version} > 0 +%global pkgname_bzip2_devel libbz2-devel +%global pkgname_docbook_xsl docbook-xsl-stylesheets +%global pkgname_gnutls_devel libgnutls-devel +%global pkgname_shadow_utils shadow +%global pkgname_procps procps +%global pkgname_glue_libs libglue +%global pkgname_pcmk_libs lib%{name}3 +%global hacluster_id 90 +%else +%global pkgname_libtool_devel libtool-ltdl-devel +%global pkgname_libtool_devel_arch libtool-ltdl-devel%{?_isa} +%global pkgname_bzip2_devel bzip2-devel +%global pkgname_docbook_xsl docbook-style-xsl +%global pkgname_gnutls_devel gnutls-devel +%global pkgname_shadow_utils shadow-utils +%global pkgname_procps procps-ng +%global pkgname_publican publican +%global pkgname_glue_libs cluster-glue-libs +%global pkgname_pcmk_libs %{name}-libs +%global hacluster_id 189 +%endif + +# Python-related definitions + +## Use Python 3 on certain platforms if major version not specified +%if %{?python_major} == 0 +%if 0%{?fedora} > 26 || 0%{?rhel} > 7 +%global python_major 3 +%endif +%endif + +## Turn off auto-compilation of Python files outside Python specific paths, +## so there's no risk that unexpected "__python" macro gets picked to do the +## RPM-native byte-compiling there (only "{_datadir}/pacemaker/tests" affected) +## -- distro-dependent tricks or automake's fallback to be applied there +%if %{defined _python_bytecompile_extra} +%global _python_bytecompile_extra 0 +%else +### the statement effectively means no RPM-native byte-compiling will occur at +### all, so distro-dependent tricks for Python-specific packages to be applied +%global __os_install_post %(echo '%{__os_install_post}' | { + sed -e 's!/usr/lib[^[:space:]]*/brp-python-bytecompile[[:space:]].*$!!g'; }) +%endif + +## Values that differ by Python major version +%if 0%{?python_major} > 2 +%global python_name python3 +%global python_path %{?__python3}%{!?__python3:/usr/bin/python%{?python3_pkgversion}%{!?python3_pkgversion:3}} +%define python_site %{?python3_sitelib}%{!?python3_sitelib:%( + %{python_path} -c 'from distutils.sysconfig import get_python_lib as gpl; print(gpl(1))' 2>/dev/null)} +%else +%if 0%{?python_major} > 1 +%global python_name python2 +%global python_path %{?__python2}%{!?__python2:/usr/bin/python%{?python2_pkgversion}%{!?python2_pkgversion:2}} +%define python_site %{?python2_sitelib}%{!?python2_sitelib:%( + %{python_path} -c 'from distutils.sysconfig import get_python_lib as gpl; print(gpl(1))' 2>/dev/null)} +%else +%global python_name python +%global python_path %{?__python}%{!?__python:/usr/bin/python%{?python_pkgversion}} +%define python_site %{?python_sitelib}%{!?python_sitelib:%( + python -c 'from distutils.sysconfig import get_python_lib as gpl; print(gpl(1))' 2>/dev/null)} +%endif +%endif + + +# Definitions for backward compatibility with older RPM versions + +## Ensure the license macro behaves consistently (older RPM will otherwise +## overwrite it once it encounters "License:"). Courtesy Jason Tibbitts: +## https://pkgs.fedoraproject.org/cgit/rpms/epel-rpm-macros.git/tree/macros.zzz-epel?h=el6&id=e1adcb77 +%if !%{defined _licensedir} +%define description %{lua: + rpm.define("license %doc") + print("%description") +} +%endif + + +# Define conditionals so that "rpmbuild --with " and +# "rpmbuild --without " can enable and disable specific features + +## Add option to enable support for stonith/external fencing agents +%bcond_with stonithd + +## Add option to create binaries suitable for use with profiling tools +%bcond_with profiling + +## Add option to create binaries with coverage analysis +%bcond_with coverage + +## Add option to generate documentation (requires Publican, Asciidoc and Inkscape) +%bcond_with doc + +## Add option to prefix package version with "0." +## (so later "official" packages will be considered updates) +%bcond_with pre_release + +## Add option to ship Upstart job files +%bcond_with upstart_job + +## Add option to turn off hardening of libraries and daemon executables +%bcond_without hardening + +## Add option to disable links for legacy daemon names +%bcond_without legacy_links + + +# Keep sane profiling data if requested +%if %{with profiling} + +## Disable -debuginfo package and stripping binaries/libraries +%define debug_package %{nil} + +%endif + + +# Define the release version +# (do not look at externally enforced pre-release flag for tagged releases +# as only -rc tags, captured with the second condition, implies that then) +%if (!%{tag_release} && %{with pre_release}) || 0%{pre_release} +%if 0%{pre_release} +%define pcmk_release 0.%{specversion}.%(s=%{shortcommit}; echo ${s: -3}) +%else +%define pcmk_release 0.%{specversion}.%{shortcommit}.git +%endif +%else +%if 0%{tag_release} +%define pcmk_release %{specversion} +%else +# Never use the short commit in a RHEL release number +%define pcmk_release %{specversion} +%endif +%endif + +Name: pacemaker +Summary: Scalable High-Availability cluster resource manager +Version: %{pcmkversion} +Release: %{pcmk_release}%{?dist} +%if %{defined _unitdir} +License: GPLv2+ and LGPLv2+ +%else +# initscript is Revised BSD +License: GPLv2+ and LGPLv2+ and BSD +%endif +Url: http://www.clusterlabs.org +Group: System Environment/Daemons + +# Hint: use "spectool -s 0 pacemaker.spec" (rpmdevtools) to check the final URL: +# https://github.com/ClusterLabs/pacemaker/archive/e91769e5a39f5cb2f7b097d3c612368f0530535e/pacemaker-e91769e.tar.gz +Source0: https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{name}-%{shortcommit}.tar.gz +Source1: nagios-agents-metadata-%{nagios_hash}.tar.gz + +# upstream commits +Patch1: 001-status-deletion.patch +Patch2: 002-status-deletion.patch +Patch3: 003-return-codes.patch +Patch4: 004-unused.patch +Patch5: 005-shutdown-lock.patch +Patch6: 006-shutdown-lock.patch +Patch7: 007-shutdown-lock.patch +Patch8: 008-shutdown-lock.patch +Patch9: 009-shutdown-lock.patch +Patch10: 010-shutdown-lock.patch +Patch11: 011-shutdown-lock.patch +Patch12: 012-shutdown-lock.patch +Patch13: 013-shutdown-lock.patch +Patch14: 014-shutdown-lock.patch +Patch15: 015-shutdown-lock.patch +Patch16: 016-shutdown-lock.patch +Patch17: 017-shutdown-lock.patch +Patch18: 018-shutdown-lock.patch +Patch19: 019-shutdown-lock.patch +Patch20: 020-shutdown-lock.patch +Patch21: 021-shutdown-lock.patch +Patch22: 022-shutdown-lock.patch +Patch23: 023-curses.patch +Patch24: 024-crm_mon-cgi.patch +Patch25: 025-clear-attrs.patch + +# downstream-only commits +#Patch100: xxx.patch + +Requires: resource-agents +Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} +Requires: %{name}-cluster-libs%{?_isa} = %{version}-%{release} +Requires: %{name}-cli = %{version}-%{release} +%if !%{defined _unitdir} +Requires: %{pkgname_procps} +Requires: psmisc +%endif +%{?systemd_requires} + +ExclusiveArch: aarch64 i686 ppc64le s390x x86_64 + +Requires: %{python_path} +BuildRequires: %{python_name}-devel + +# Pacemaker requires a minimum libqb functionality +Requires: libqb >= 0.17.0 +BuildRequires: libqb-devel >= 0.17.0 + +# Basics required for the build (even if usually satisfied through other BRs) +BuildRequires: coreutils findutils grep sed + +# Required for core functionality +BuildRequires: automake autoconf gcc libtool pkgconfig %{?pkgname_libtool_devel} +BuildRequires: pkgconfig(glib-2.0) >= 2.16 +BuildRequires: libxml2-devel libxslt-devel libuuid-devel +BuildRequires: %{pkgname_bzip2_devel} + +# Enables optional functionality +BuildRequires: ncurses-devel %{pkgname_docbook_xsl} +BuildRequires: help2man %{pkgname_gnutls_devel} pam-devel pkgconfig(dbus-1) + +%if %{systemd_native} +BuildRequires: pkgconfig(systemd) +%endif + +# RH patches are created by git, so we need git to apply them +BuildRequires: git + +Requires: corosync >= 2.0.0 +BuildRequires: corosync-devel >= 2.0.0 + +%if %{with stonithd} +BuildRequires: %{pkgname_glue_libs}-devel +%endif + +## (note no avoiding effect when building through non-customized mock) +%if !%{bleeding} +%if %{with doc} +BuildRequires: inkscape asciidoc %{?pkgname_publican} +%endif +%endif + +Provides: pcmk-cluster-manager = %{version}-%{release} +Provides: pcmk-cluster-manager%{?_isa} = %{version}-%{release} + +# Bundled bits +## Pacemaker uses the crypto/md5-buffer module from gnulib +%if 0%{?fedora} || 0%{?rhel} +Provides: bundled(gnulib) +%endif + +%description +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +It supports more than 16 node clusters with significant capabilities +for managing resources and dependencies. + +It will run scripts at initialization, when machines go up or down, +when related resources fail and can be configured to periodically check +resource health. + +Available rpmbuild rebuild options: + --with(out) : coverage doc stonithd hardening pre_release profiling + +%package cli +License: GPLv2+ and LGPLv2+ +Summary: Command line tools for controlling Pacemaker clusters +Group: System Environment/Daemons +Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} +%if 0%{?supports_recommends} +#Recommends: pcmk-cluster-manager = %{version}-%{release} +# For crm_report +Requires: tar +Requires: bzip2 +%endif +Requires: perl-TimeDate +Requires: %{pkgname_procps} +Requires: psmisc +Requires(post):coreutils + +%description cli +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +The %{name}-cli package contains command line tools that can be used +to query and control the cluster from machines that may, or may not, +be part of the cluster. + +%package -n %{pkgname_pcmk_libs} +License: GPLv2+ and LGPLv2+ +Summary: Core Pacemaker libraries +Group: System Environment/Daemons +Requires(pre): %{pkgname_shadow_utils} +Requires: %{name}-schemas = %{version}-%{release} +# sbd 1.4.0+ supports the libpe_status API for pe_working_set_t +Conflicts: sbd < 1.4.0 + +%description -n %{pkgname_pcmk_libs} +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +The %{pkgname_pcmk_libs} package contains shared libraries needed for cluster +nodes and those just running the CLI tools. + +%package cluster-libs +License: GPLv2+ and LGPLv2+ +Summary: Cluster Libraries used by Pacemaker +Group: System Environment/Daemons +Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} + +%description cluster-libs +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +The %{name}-cluster-libs package contains cluster-aware shared +libraries needed for nodes that will form part of the cluster nodes. + +%package remote +%if %{defined _unitdir} +License: GPLv2+ and LGPLv2+ +%else +# initscript is Revised BSD +License: GPLv2+ and LGPLv2+ and BSD +%endif +Summary: Pacemaker remote daemon for non-cluster nodes +Group: System Environment/Daemons +Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} +Requires: %{name}-cli = %{version}-%{release} +Requires: resource-agents +%if !%{defined _unitdir} +Requires: %{pkgname_procps} +%endif +# -remote can be fully independent of systemd +%{?systemd_ordering}%{!?systemd_ordering:%{?systemd_requires}} +Provides: pcmk-cluster-manager = %{version}-%{release} +Provides: pcmk-cluster-manager%{?_isa} = %{version}-%{release} + +%description remote +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +The %{name}-remote package contains the Pacemaker Remote daemon +which is capable of extending pacemaker functionality to remote +nodes not running the full corosync/cluster stack. + +%package -n %{pkgname_pcmk_libs}-devel +License: GPLv2+ and LGPLv2+ +Summary: Pacemaker development package +Group: Development/Libraries +Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} +Requires: %{name}-cluster-libs%{?_isa} = %{version}-%{release} +Requires: libuuid-devel%{?_isa} %{?pkgname_libtool_devel_arch} +Requires: libxml2-devel%{?_isa} libxslt-devel%{?_isa} +Requires: %{pkgname_bzip2_devel}%{?_isa} glib2-devel%{?_isa} +Requires: libqb-devel%{?_isa} +Requires: corosync-devel >= 2.0.0 + +%description -n %{pkgname_pcmk_libs}-devel +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +The %{pkgname_pcmk_libs}-devel package contains headers and shared libraries +for developing tools for Pacemaker. + +%package cts +License: GPLv2+ and LGPLv2+ +Summary: Test framework for cluster-related technologies like Pacemaker +Group: System Environment/Daemons +Requires: %{python_path} +Requires: %{pkgname_pcmk_libs} = %{version}-%{release} +Requires: %{name}-cli = %{version}-%{release} +Requires: %{pkgname_procps} +Requires: psmisc +BuildArch: noarch + +# systemd python bindings are separate package in some distros +%if %{defined systemd_requires} + +%if 0%{?fedora} > 22 || 0%{?rhel} > 7 +Requires: %{python_name}-systemd +%else +%if 0%{?fedora} > 20 || 0%{?rhel} > 6 +Requires: systemd-python +%endif +%endif + +%endif + +%description cts +Test framework for cluster-related technologies like Pacemaker + +%package doc +License: CC-BY-SA-4.0 +Summary: Documentation for Pacemaker +Group: Documentation +BuildArch: noarch + +%description doc +Documentation for Pacemaker. + +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +%package schemas +License: GPLv2+ +Summary: Schemas and upgrade stylesheets for Pacemaker +BuildArch: noarch + +%description schemas +Schemas and upgrade stylesheets for Pacemaker + +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +%package nagios-plugins-metadata +License: GPLv3 +Summary: Pacemaker Nagios Metadata +Group: System Environment/Daemons +# NOTE below are the plugins this metadata uses. +# These packages are not requirements because RHEL does not ship these plugins. +# This metadata provides third-party support for nagios. Users may install the +# plugins via third-party rpm packages, or source. If RHEL ships the plugins in +# the future, we should consider enabling the following required fields. +#Requires: nagios-plugins-http +#Requires: nagios-plugins-ldap +#Requires: nagios-plugins-mysql +#Requires: nagios-plugins-pgsql +#Requires: nagios-plugins-tcp +Requires: pcmk-cluster-manager +BuildArch: noarch + +%description nagios-plugins-metadata +The metadata files required for Pacemaker to execute the nagios plugin +monitor resources. + +%prep +%autosetup -a 1 -n %{name}-%{commit} -S git_am -p 1 + +%build + +# Early versions of autotools (e.g. RHEL <= 5) do not support --docdir +export docdir=%{pcmk_docdir} + +export systemdsystemunitdir=%{?_unitdir}%{!?_unitdir:no} + +# RHEL changes pacemaker's concurrent-fencing default to true +export CPPFLAGS="-DDEFAULT_CONCURRENT_FENCING_TRUE" + +%if %{with hardening} +# prefer distro-provided hardening flags in case they are defined +# through _hardening_{c,ld}flags macros, configure script will +# use its own defaults otherwise; if such hardenings are completely +# undesired, rpmbuild using "--without hardening" +# (or "--define '_without_hardening 1'") +export CFLAGS_HARDENED_EXE="%{?_hardening_cflags}" +export CFLAGS_HARDENED_LIB="%{?_hardening_cflags}" +export LDFLAGS_HARDENED_EXE="%{?_hardening_ldflags}" +export LDFLAGS_HARDENED_LIB="%{?_hardening_ldflags}" +%endif + +./autogen.sh + +%{configure} \ + PYTHON=%{python_path} \ + %{!?with_hardening: --disable-hardening} \ + %{!?with_legacy_links: --disable-legacy-links} \ + %{?with_profiling: --with-profiling} \ + %{?with_coverage: --with-coverage} \ + %{!?with_doc: --with-brand=} \ + %{?gnutls_priorities: --with-gnutls-priorities="%{gnutls_priorities}"} \ + --with-initdir=%{_initrddir} \ + --with-runstatedir=%{_rundir} \ + --localstatedir=%{_var} \ + --with-bug-url=https://bugzilla.redhat.com/ \ + --with-nagios \ + --with-nagios-metadata-dir=%{_datadir}/pacemaker/nagios/plugins-metadata/ \ + --with-nagios-plugin-dir=%{_libdir}/nagios/plugins/ \ + --with-version=%{version}-%{release} + +%if 0%{?suse_version} >= 1200 +# Fedora handles rpath removal automagically +sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool +sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool +%endif + +make %{_smp_mflags} V=1 + +%check +{ cts/cts-scheduler --run load-stopped-loop \ + && cts/cts-cli \ + && touch .CHECKED +} 2>&1 | sed 's/[fF]ail/faiil/g' # prevent false positives in rpmlint +[ -f .CHECKED ] && rm -f -- .CHECKED +exit $? # TODO remove when rpm<4.14 compatibility irrelevant + +%install +# skip automake-native Python byte-compilation, since RPM-native one (possibly +# distro-confined to Python-specific directories, which is currently the only +# relevant place, anyway) assures proper intrinsic alignment with wider system +# (such as with py_byte_compile macro, which is concurrent Fedora/EL specific) +make install \ + DESTDIR=%{buildroot} V=1 docdir=%{pcmk_docdir} \ + %{?_python_bytecompile_extra:%{?py_byte_compile:am__py_compile=true}} + +mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig +install -m 644 daemons/pacemakerd/pacemaker.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/pacemaker +install -m 644 tools/crm_mon.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/crm_mon + +%if %{with upstart_job} +mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/init +install -m 644 pacemakerd/pacemaker.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/pacemaker.conf +install -m 644 pacemakerd/pacemaker.combined.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/pacemaker.combined.conf +install -m 644 tools/crm_mon.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/crm_mon.conf +%endif + +mkdir -p %{buildroot}%{_datadir}/pacemaker/nagios/plugins-metadata +for file in $(find %{nagios_name}-%{nagios_hash}/metadata -type f); do + install -m 644 $file %{buildroot}%{_datadir}/pacemaker/nagios/plugins-metadata +done + +%if %{defined _unitdir} +mkdir -p ${RPM_BUILD_ROOT}%{_localstatedir}/lib/rpm-state/%{name} +%endif + +# Don't package static libs +find %{buildroot} -name '*.a' -type f -print0 | xargs -0 rm -f +find %{buildroot} -name '*.la' -type f -print0 | xargs -0 rm -f + +# Do not package these either +rm -f %{buildroot}/%{_sbindir}/fence_legacy +rm -f %{buildroot}/%{_mandir}/man8/fence_legacy.* +find %{buildroot} -name '*o2cb*' -type f -print0 | xargs -0 rm -f + +# For now, don't package the servicelog-related binaries built only for +# ppc64le when certain dependencies are installed. If they get more exercise by +# advanced users, we can reconsider. +rm -f %{buildroot}/%{_sbindir}/notifyServicelogEvent +rm -f %{buildroot}/%{_sbindir}/ipmiservicelogd + +# Don't ship init scripts for systemd based platforms +%if %{defined _unitdir} +rm -f %{buildroot}/%{_initrddir}/pacemaker +rm -f %{buildroot}/%{_initrddir}/pacemaker_remote +%endif + +# Byte-compile Python sources where suitable and the distro procedures known +%if %{defined py_byte_compile} +%{py_byte_compile %{python_path} %{buildroot}%{_datadir}/pacemaker/tests} +%if !%{defined _python_bytecompile_extra} +%{py_byte_compile %{python_path} %{buildroot}%{python_site}/cts} +%endif +%endif + +%if %{with coverage} +GCOV_BASE=%{buildroot}/%{_var}/lib/pacemaker/gcov +mkdir -p $GCOV_BASE +find . -name '*.gcno' -type f | while read F ; do + D=`dirname $F` + mkdir -p ${GCOV_BASE}/$D + cp $F ${GCOV_BASE}/$D +done +%endif + +%post +%if %{defined _unitdir} +%systemd_post pacemaker.service +%else +/sbin/chkconfig --add pacemaker || : +%endif + +%preun +%if %{defined _unitdir} +%systemd_preun pacemaker.service +%else +/sbin/service pacemaker stop >/dev/null 2>&1 || : +if [ "$1" -eq 0 ]; then + # Package removal, not upgrade + /sbin/chkconfig --del pacemaker || : +fi +%endif + +%postun +%if %{defined _unitdir} +%systemd_postun_with_restart pacemaker.service +%endif + +%pre remote +%if %{defined _unitdir} +# Stop the service before anything is touched, and remember to restart +# it as one of the last actions (compared to using systemd_postun_with_restart, +# this avoids suicide when sbd is in use) +systemctl --quiet is-active pacemaker_remote +if [ $? -eq 0 ] ; then + mkdir -p %{_localstatedir}/lib/rpm-state/%{name} + touch %{_localstatedir}/lib/rpm-state/%{name}/restart_pacemaker_remote + systemctl stop pacemaker_remote >/dev/null 2>&1 +else + rm -f %{_localstatedir}/lib/rpm-state/%{name}/restart_pacemaker_remote +fi +%endif + +%post remote +%if %{defined _unitdir} +%systemd_post pacemaker_remote.service +%else +/sbin/chkconfig --add pacemaker_remote || : +%endif + +%preun remote +%if %{defined _unitdir} +%systemd_preun pacemaker_remote.service +%else +/sbin/service pacemaker_remote stop >/dev/null 2>&1 || : +if [ "$1" -eq 0 ]; then + # Package removal, not upgrade + /sbin/chkconfig --del pacemaker_remote || : +fi +%endif + +%postun remote +%if %{defined _unitdir} +# This next line is a no-op, because we stopped the service earlier, but +# we leave it here because it allows us to revert to the standard behavior +# in the future if desired +%systemd_postun_with_restart pacemaker_remote.service +# Explicitly take care of removing the flag-file(s) upon final removal +if [ "$1" -eq 0 ] ; then + rm -f %{_localstatedir}/lib/rpm-state/%{name}/restart_pacemaker_remote +fi +%endif + +%posttrans remote +%if %{defined _unitdir} +if [ -e %{_localstatedir}/lib/rpm-state/%{name}/restart_pacemaker_remote ] ; then + systemctl start pacemaker_remote >/dev/null 2>&1 + rm -f %{_localstatedir}/lib/rpm-state/%{name}/restart_pacemaker_remote +fi +%endif + +%post cli +%if %{defined _unitdir} +%systemd_post crm_mon.service +%endif +if [ "$1" -eq 2 ]; then + # Package upgrade, not initial install: + # Move any pre-2.0 logs to new location to ensure they get rotated + { mv -fbS.rpmsave %{_var}/log/pacemaker.log* %{_var}/log/pacemaker \ + || mv -f %{_var}/log/pacemaker.log* %{_var}/log/pacemaker + } >/dev/null 2>/dev/null || : +fi + +%preun cli +%if %{defined _unitdir} +%systemd_preun crm_mon.service +%endif + +%postun cli +%if %{defined _unitdir} +%systemd_postun_with_restart crm_mon.service +%endif + +%pre -n %{pkgname_pcmk_libs} +getent group %{gname} >/dev/null || groupadd -r %{gname} -g %{hacluster_id} +getent passwd %{uname} >/dev/null || useradd -r -g %{gname} -u %{hacluster_id} -s /sbin/nologin -c "cluster user" %{uname} +exit 0 + +%if %{defined ldconfig_scriptlets} +%ldconfig_scriptlets libs +%ldconfig_scriptlets cluster-libs +%else +%post -n %{pkgname_pcmk_libs} -p /sbin/ldconfig +%postun -n %{pkgname_pcmk_libs} -p /sbin/ldconfig + +%post cluster-libs -p /sbin/ldconfig +%postun cluster-libs -p /sbin/ldconfig +%endif + +%files +########################################################### +%config(noreplace) %{_sysconfdir}/sysconfig/pacemaker +%{_sbindir}/pacemakerd + +%if %{defined _unitdir} +%{_unitdir}/pacemaker.service +%else +%{_initrddir}/pacemaker +%endif + +%exclude %{_libexecdir}/pacemaker/cts-log-watcher +%exclude %{_libexecdir}/pacemaker/cts-support +%exclude %{_sbindir}/pacemaker-remoted +%if %{with legacy_links} +%exclude %{_sbindir}/pacemaker_remoted +%endif +%exclude %{_datadir}/pacemaker/nagios +%{_libexecdir}/pacemaker/* + +%{_sbindir}/crm_attribute +%{_sbindir}/crm_master + +%doc %{_mandir}/man7/pacemaker-controld.* +%doc %{_mandir}/man7/pacemaker-schedulerd.* +%doc %{_mandir}/man7/pacemaker-fenced.* +%doc %{_mandir}/man7/ocf_pacemaker_controld.* +%doc %{_mandir}/man7/ocf_pacemaker_remote.* +%doc %{_mandir}/man8/crm_attribute.* +%doc %{_mandir}/man8/crm_master.* +%doc %{_mandir}/man8/pacemakerd.* + +%doc %{_datadir}/pacemaker/alerts + +%license licenses/GPLv2 +%doc COPYING +%doc ChangeLog + +%dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/cib +%dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/pengine +/usr/lib/ocf/resource.d/pacemaker/controld +/usr/lib/ocf/resource.d/pacemaker/remote + +%if %{with upstart_job} +%config(noreplace) %{_sysconfdir}/init/pacemaker.conf +%config(noreplace) %{_sysconfdir}/init/pacemaker.combined.conf +%endif + +%files cli +%dir %attr (750, root, %{gname}) %{_sysconfdir}/pacemaker +%config(noreplace) %{_sysconfdir}/logrotate.d/pacemaker +%config(noreplace) %{_sysconfdir}/sysconfig/crm_mon + +%if %{defined _unitdir} +%{_unitdir}/crm_mon.service +%endif + +%if %{with upstart_job} +%config(noreplace) %{_sysconfdir}/init/crm_mon.conf +%endif + +%{_sbindir}/attrd_updater +%{_sbindir}/cibadmin +%{_sbindir}/crm_diff +%{_sbindir}/crm_error +%{_sbindir}/crm_failcount +%{_sbindir}/crm_mon +%{_sbindir}/crm_node +%{_sbindir}/crm_resource +%{_sbindir}/crm_rule +%{_sbindir}/crm_standby +%{_sbindir}/crm_verify +%{_sbindir}/crmadmin +%{_sbindir}/iso8601 +%{_sbindir}/crm_shadow +%{_sbindir}/crm_simulate +%{_sbindir}/crm_report +%{_sbindir}/crm_ticket +%{_sbindir}/stonith_admin +# "dirname" is owned by -schemas, which is a prerequisite +%{_datadir}/pacemaker/report.collector +%{_datadir}/pacemaker/report.common +# XXX "dirname" is not owned by any prerequisite +%{_datadir}/snmp/mibs/PCMK-MIB.txt + +%exclude /usr/lib/ocf/resource.d/pacemaker/controld +%exclude /usr/lib/ocf/resource.d/pacemaker/remote + +%dir /usr/lib/ocf +%dir /usr/lib/ocf/resource.d +/usr/lib/ocf/resource.d/pacemaker + +%doc %{_mandir}/man7/* +%exclude %{_mandir}/man7/pacemaker-controld.* +%exclude %{_mandir}/man7/pacemaker-schedulerd.* +%exclude %{_mandir}/man7/pacemaker-fenced.* +%exclude %{_mandir}/man7/ocf_pacemaker_controld.* +%exclude %{_mandir}/man7/ocf_pacemaker_remote.* +%doc %{_mandir}/man8/* +%exclude %{_mandir}/man8/crm_attribute.* +%exclude %{_mandir}/man8/crm_master.* +%exclude %{_mandir}/man8/pacemakerd.* +%exclude %{_mandir}/man8/pacemaker-remoted.* + +%license licenses/GPLv2 +%doc COPYING +%doc ChangeLog + +%dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker +%dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/blackbox +%dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/cores +%dir %attr (770, %{uname}, %{gname}) %{_var}/log/pacemaker +%dir %attr (770, %{uname}, %{gname}) %{_var}/log/pacemaker/bundles + +%files -n %{pkgname_pcmk_libs} +%{_libdir}/libcib.so.* +%{_libdir}/liblrmd.so.* +%{_libdir}/libcrmservice.so.* +%{_libdir}/libcrmcommon.so.* +%{_libdir}/libpe_status.so.* +%{_libdir}/libpe_rules.so.* +%{_libdir}/libpacemaker.so.* +%{_libdir}/libstonithd.so.* +%license licenses/LGPLv2.1 +%doc COPYING +%doc ChangeLog + +%files cluster-libs +%{_libdir}/libcrmcluster.so.* +%license licenses/LGPLv2.1 +%doc COPYING +%doc ChangeLog + +%files remote +%config(noreplace) %{_sysconfdir}/sysconfig/pacemaker +%if %{defined _unitdir} +# state directory is shared between the subpackets +# let rpm take care of removing it once it isn't +# referenced anymore and empty +%ghost %dir %{_localstatedir}/lib/rpm-state/%{name} +%{_unitdir}/pacemaker_remote.service +%else +%{_initrddir}/pacemaker_remote +%endif + +%{_sbindir}/pacemaker-remoted +%if %{with legacy_links} +%{_sbindir}/pacemaker_remoted +%endif +%{_mandir}/man8/pacemaker-remoted.* +%license licenses/GPLv2 +%doc COPYING +%doc ChangeLog + +%files doc +%doc %{pcmk_docdir} +%license licenses/CC-BY-SA-4.0 + +%files cts +%{python_site}/cts +%{_datadir}/pacemaker/tests + +%{_libexecdir}/pacemaker/cts-log-watcher +%{_libexecdir}/pacemaker/cts-support + +%license licenses/GPLv2 +%doc COPYING +%doc ChangeLog + +%files -n %{pkgname_pcmk_libs}-devel +%{_includedir}/pacemaker +%{_libdir}/*.so +%if %{with coverage} +%{_var}/lib/pacemaker/gcov +%endif +%{_libdir}/pkgconfig/*.pc +%license licenses/LGPLv2.1 +%doc COPYING +%doc ChangeLog + +%files schemas +%license licenses/GPLv2 +%dir %{_datadir}/pacemaker +%{_datadir}/pacemaker/*.rng +%{_datadir}/pacemaker/*.xsl +%{_datadir}/pacemaker/api +%{_datadir}/pkgconfig/pacemaker-schemas.pc + +%files nagios-plugins-metadata +%dir %{_datadir}/pacemaker/nagios/plugins-metadata +%attr(0644,root,root) %{_datadir}/pacemaker/nagios/plugins-metadata/* +%license %{nagios_name}-%{nagios_hash}/COPYING + +%changelog +* Mon Jan 27 2020 Ken Gaillot - 2.0.3-5 +- Clear leaving node's attributes if there is no DC +- Resolves: rhbz1791841 + +* Thu Jan 16 2020 Ken Gaillot - 2.0.3-4 +- Implement shutdown-lock feature +- Resolves: rhbz1712584 + +* Wed Nov 27 2019 Ken Gaillot - 2.0.3-3 +- Rebase on Pacemaker-2.0.3 final release +- Resolves: rhbz1752538 + +* Wed Nov 13 2019 Ken Gaillot - 2.0.3-2 +- Rebase on Pacemaker-2.0.3-rc3 +- Resolves: rhbz1752538 + +* Thu Oct 31 2019 Ken Gaillot - 2.0.3-1 +- Rebase on Pacemaker-2.0.3-rc2 +- Parse crm_mon --fence-history option correctly +- Put timeout on controller waiting for scheduler response +- Offer Pacemaker Remote option for bind address +- Calculate cluster recheck interval dynamically +- Clarify crm_resource help text +- Reduce system calls after forking a child process +- Resolves: rhbz1699978 +- Resolves: rhbz1725236 +- Resolves: rhbz1743377 +- Resolves: rhbz1747553 +- Resolves: rhbz1748805 +- Resolves: rhbz1752538 +- Resolves: rhbz1762025 + +* Mon Aug 26 2019 Ken Gaillot - 2.0.2-3 +- Make pacemaker-cli require tar and bzip2 +- Resolves: rhbz#1741580 + +* Fri Jun 21 2019 Klaus Wenninger - 2.0.2-2 +- Synchronize fence-history on fenced-restart +- Cleanup leftover pending-fence-actions when fenced is restarted +- Improve fencing of remote-nodes +- Resolves: rhbz#1708380 +- Resolves: rhbz#1708378 +- Resolves: rhbz#1721198 +- Resolves: rhbz#1695737 + +* Thu Jun 6 2019 Ken Gaillot - 2.0.2-1 +- Add stonith_admin option to display XML output +- Add new crm_rule tool to check date/time rules +- List any constraints cleared by crm_resource --clear +- crm_resource --validate can now get resource parameters from command line +- Rebase on upstream version 2.0.2 +- Default concurrent-fencing to true +- Resolves: rhbz#1555939 +- Resolves: rhbz#1572116 +- Resolves: rhbz#1631752 +- Resolves: rhbz#1637020 +- Resolves: rhbz#1695737 +- Resolves: rhbz#1715426 + +* Wed May 15 2019 Ken Gaillot - 2.0.1-5 +- Add gating tests for CI +- Restore correct behavior when live migration is interrupted +- Improve clients' authentication of IPC servers (CVE-2018-16877) +- Fix use-after-free with potential information disclosure (CVE-2019-3885) +- Improve pacemakerd authentication of running subdaemons (CVE-2018-16878) +- Resolves: rhbz#1682116 +- Resolves: rhbz#1684306 +- Resolves: rhbz#1694558 +- Resolves: rhbz#1694560 +- Resolves: rhbz#1694908 + +* Tue Jan 29 2019 Ken Gaillot - 2.0.1-4 +- Remove duplicate fence history state listing in crm_mon XML output +- Resolves: rhbz#1667191 + +* Thu Jan 10 2019 Ken Gaillot - 2.0.1-3 +- Fix bundle recovery regression in 2.0.1-2 +- Resolves: rhbz#1660592 + +* Fri Dec 14 2018 Ken Gaillot - 2.0.1-2 +- Move pacemaker-doc installed files to /usr/share/doc/pacemaker-doc + to avoid conflict with RHEL 8 location of pacemaker subpackage docs +- Resolves: rhbz#1543494 + +* Thu Dec 13 2018 Ken Gaillot - 2.0.1-1 +- Rebase on upstream commit 0eb799156489376e13fb79dca47ea9160e9d4595 (Pacemaker-2.0.1-rc1) +- Follow upstream change of splitting XML schemas into separate package +- Resolves: rhbz#1543494 + +* Fri Nov 16 2018 Ken Gaillot - 2.0.0-11 +- Rebase on upstream commit efbf81b65931423b34c91cde7204a2d0a71e77e6 +- Resolves: rhbz#1543494 + +* Fri Sep 28 2018 Ken Gaillot - 2.0.0-10 +- Rebase on upstream commit b67d8d0de9794e59719608d9b156b4a3c6556344 +- Update spec for Python macro changes +- Resolves: rhbz#1543494 +- Resolves: rhbz#1633612 + +* Mon Sep 17 2018 Ken Gaillot - 2.0.0-9 +- Rebase on upstream commit c4330b46bf1c3dcd3e367b436efb3bbf82ef51cd +- Support podman as bundle container launcher +- Ignore fence history in crm_mon when using CIB_file +- Resolves: rhbz#1543494 +- Resolves: rhbz#1607898 +- Resolves: rhbz#1625231 + +* Thu Aug 30 2018 Ken Gaillot - 2.0.0-8 +- Rebase on upstream commit dd6fd26f77945b9bb100d5a3134f149b27601552 +- Fixes (unreleased) API regression +- Resolves: rhbz#1543494 +- Resolves: rhbz#1622969 + +* Mon Aug 13 2018 Ken Gaillot - 2.0.0-7 +- Include upstream master branch commits through 975347d4 +- Resolves: rhbz#1543494 +- Resolves: rhbz#1602650 +- Resolves: rhbz#1608369 + +* Mon Jul 30 2018 Florian Weimer - 2.0.0-6 +- Rebuild with fixed binutils + +* Mon Jul 9 2018 Ken Gaillot - 2.0.0-5 +- Rebase to upstream version 2.0.0 final +- Resolves: rhbz#1543494 + +* Wed Jun 6 2018 Ken Gaillot - 2.0.0-4 +- Rebase to upstream version 2.0.0-rc5 +- Resolves: rhbz#1543494 + +* Mon Apr 30 2018 Ken Gaillot - 2.0.0-2 +- Rebase to upstream version 2.0.0-rc3 +- Resolves: rhbz#1543494 + +* Tue Apr 17 2018 Ken Gaillot - 2.0.0-1 +- Rebase to upstream version 2.0.0-rc2 with later fixes +- Resolves: rhbz#1543494 + +* Tue Apr 17 2018 Josh Boyer - 1.1.17-3 +- Stop hard requiring nagios-plugins + +* Wed Oct 18 2017 Jan Pokorný - 1.1.17-2 +- Rebuilt to fix libqb vs. ld.bfd/binutils-2.29 incompatibility making + some CLI executables unusable under some circumstances (rhbz#1503843) + +* Thu Aug 03 2017 Fedora Release Engineering - 1.1.17-1.2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild + +* Thu Jul 27 2017 Fedora Release Engineering - 1.1.17-1.1 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild + +* Fri Jul 07 2017 Jan Pokorný - 1.1.17-1 +- Update for new upstream tarball: Pacemaker-1.1.17, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.17 + +* Thu Jun 22 2017 Jan Pokorný - 1.1.17-0.1.rc4 +- Update for new upstream tarball for release candidate: Pacemaker-1.1.17-rc4, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.17-rc4 +- Add an imposed lower bound for glib2 BuildRequires + +* Thu Jun 01 2017 Jan Pokorný - 1.1.17-0.1.rc3 +- Update for new upstream tarball for release candidate: Pacemaker-1.1.17-rc3, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.17-rc3 + +* Wed May 24 2017 Jan Pokorný - 1.1.17-0.1.rc2 +- Update for new upstream tarball for release candidate: Pacemaker-1.1.17-rc2, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.17-rc2 + +* Tue May 09 2017 Jan Pokorný - 1.1.17-0.1.rc1 +- Update for new upstream tarball for release candidate: Pacemaker-1.1.17-rc1, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.17-rc1 + +* Mon Feb 06 2017 Jan Pokorný - 1.1.16-2.a39ea6491.git +- Update for (slightly stabilized) snapshot beyond Pacemaker-1.1.16 + (commit a39ea6491), including: + . prevent FTBFS with new GCC 7 (a7476dd96) +- Adapt spec file more akin to upstream version including: + . better pre-release vs. tags logic (4581d4366) + +* Fri Dec 02 2016 Jan Pokorný - 1.1.16-1 +- Update for new upstream tarball: Pacemaker-1.1.16, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.16 +- Adapt spec file more akin to upstream version including: + . clarify licensing, especially for -doc (f01f734) + . fix pacemaker-remote upgrade (779e0e3) + . require python >= 2.6 (31ef7f0) + . older libqb is sufficient (based on 30fe1ce) + . remove openssl-devel and libselinux-devel as BRs (2e05c17) + . make systemd BR pkgconfig-driven (6285924) + . defines instead of some globals + error suppression (625d427) +- Rectify -nagios-plugins-metadata declared license and install + also respective license text + +* Thu Nov 03 2016 Jan Pokorný - 1.1.15-3 +- Apply fix for CVE-2016-7035 (improper IPC guarding) + +* Tue Jul 19 2016 Fedora Release Engineering - 1.1.15-2.1 +- https://fedoraproject.org/wiki/Changes/Automatic_Provides_for_Python_RPM_Packages + +* Thu Jul 07 2016 Jan Pokorný - 1.1.15-2 +- Stop building with -fstack-protector-all using the upstream patches + overhauling toolchain hardening (Fedora natively uses + -fstack-protector-strong so this effectively relaxed stack protection + is the only effect as hardened flags are already used by default: + https://fedoraproject.org/wiki/Changes/Harden_All_Packages) + +* Wed Jun 22 2016 Jan Pokorný - 1.1.15-1 +- Update for new upstream tarball: Pacemaker-1.1.15, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.15 +- Adapt spec file more akin to upstream version: + . move xml schema files + PCMK-MIB.txt (81ef956), logrotate configuration + file (ce576cf; drop it from -remote package as well), attrd_updater + (aff80ae), the normal resource agents (1fc7287), and common directories + under /var/lib/pacemaker (3492794) from main package under -cli + . simplify docdir build parameter passing and drop as of now + redundant chmod invocations (e91769e) + +* Fri May 27 2016 Jan Pokorný - 1.1.15-0.1.rc3 +- Update for new upstream tarball for release candidate: Pacemaker-1.1.15-rc3, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.15-rc3 +- Drop fence_pcmk (incl. man page) from the package (no use where no CMAN) +- Drop license macro emulation for cases when not supported natively + (several recent Fedora releases do not need that) + +* Mon May 16 2016 Jan Pokorný - 1.1.15-0.1.rc2 +- Update for new upstream tarball for release candidate: Pacemaker-1.1.15-rc2, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.15-rc2 + +* Tue Apr 26 2016 Jan Pokorný - 1.1.15-0.1.rc1 +- Update for new upstream tarball for release candidate: Pacemaker-1.1.15-rc1, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.15-rc1 +- Adapt spec file more akin to upstream version (also to reflect recent + changes like ability to built explicitly without Publican-based docs) + +* Thu Mar 31 2016 Jan Pokorný - 1.1.14-2.5a6cdd1.git +- Update for currently stabilized snapshot beyond Pacemaker-1.1.14 + (commit 5a6cdd1), but restore old-style notifications to the state at + Pacemaker-1.1.14 point release (disabled) +- Definitely get rid of Corosync v1 (Flatiron) hypothetical support +- Remove some of the spec file cruft, not required for years + (BuildRoot, AutoReqProv, "clean" scriptlet, etc.) and adapt the file + per https://github.com/ClusterLabs/pacemaker/pull/965 + +* Thu Feb 04 2016 Fedora Release Engineering - 1.1.14-1.1 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_24_Mass_Rebuild + +* Mon Jan 18 2016 Jan Pokorný - 1.1.14-1 +- Update for new upstream tarball: Pacemaker-1.1.14, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.14 +- Disable Fedora crypto policies conformance patch for now (rhbz#1179335) +- Better align specfile with the upstream version (also fix issue with + crm_mon sysconfig file not being installed) +- Further specfile modifications: + - drop unused gcc-c++ and repeatedly mentioned pkgconfig packages + from BuildRequires + - refer to python_sitearch macro first, if defined + - tolerate license macro not being defined (e.g., for EPEL rebuilds) +- Prevent console mode not available in crm_mon due to curses library test + fragility of configure script in hardened build environment (rhbz#1297985) + +* Tue Oct 20 2015 Jan Pokorný - 1.1.13-4 +- Adapt to follow Fedora crypto policies (rhbz#1179335) + +* Wed Oct 14 2015 Jan Pokorný - 1.1.13-3 +- Update to Pacemaker-1.1.13 post-release + patches (sync) +- Add nagios-plugins-metadata subpackage enabling support of selected + Nagios plugins as resources recognized by Pacemaker +- Several specfile improvements: drop irrelevant stuff, rehash the + included/excluded files + dependencies, add check scriptlet, + reflect current packaging practice, do minor cleanups + (mostly adopted from another spec) + +* Thu Aug 20 2015 Andrew Beekhof - 1.1.13-2 +- Update for new upstream tarball: Pacemaker-1.1.13 +- See included ChangeLog file or https://raw.github.com/ClusterLabs/pacemaker/master/ChangeLog for full details + +* Thu Jun 18 2015 Fedora Release Engineering - 1.1.12-2.1 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_23_Mass_Rebuild + +* Wed Nov 05 2014 Andrew Beekhof - 1.1.12-2 +- Address incorrect use of the dbus API for interacting with systemd + +* Tue Oct 28 2014 Andrew Beekhof - 1.1.12-1 +- Update for new upstream tarball: Pacemaker-1.1.12+ (a9c8177) +- See included ChangeLog file or https://raw.github.com/ClusterLabs/pacemaker/master/ChangeLog for full details + +* Sun Aug 17 2014 Fedora Release Engineering - 1.1.11-1.2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_22_Mass_Rebuild + +* Fri Jun 06 2014 Fedora Release Engineering - 1.1.11-1.1 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_Mass_Rebuild + +* Tue Feb 18 2014 Andrew Beekhof - 1.1.11-1 +- Update for new upstream tarball: Pacemaker-1.1.11 (9d39a6b) +- See included ChangeLog file or https://raw.github.com/ClusterLabs/pacemaker/master/ChangeLog for full details + +* Thu Jun 20 2013 Andrew Beekhof - 1.1.9-3 +- Update to upstream 7d8acec +- See included ChangeLog file or https://raw.github.com/ClusterLabs/pacemaker/master/ChangeLog for full details + + + Feature: Turn off auto-respawning of systemd services when the cluster starts them + + Fix: crmd: Ensure operations for cleaned up resources don't block recovery + + Fix: logging: If SIGTRAP is sent before tracing is turned on, turn it on instead of crashing + +* Mon Jun 17 2013 Andrew Beekhof - 1.1.9-2 +- Update for new upstream tarball: 781a388 +- See included ChangeLog file or https://raw.github.com/ClusterLabs/pacemaker/master/ChangeLog for full details + +* Wed May 12 2010 Andrew Beekhof - 1.1.2-1 +- Update the tarball from the upstream 1.1.2 release +- See included ChangeLog file or https://raw.github.com/ClusterLabs/pacemaker/master/ChangeLog for full details + +* Tue Jul 14 2009 Andrew Beekhof - 1.0.4-1 +- Initial checkin