diff --git a/SOURCES/018-node-names.patch b/SOURCES/018-node-names.patch new file mode 100644 index 0000000..f8b8dfb --- /dev/null +++ b/SOURCES/018-node-names.patch @@ -0,0 +1,65 @@ +From 7618c29761368262fd8d633992816b52755ec028 Mon Sep 17 00:00:00 2001 +From: Hideo Yamauchi +Date: Thu, 26 Apr 2018 12:51:06 +0900 +Subject: [PATCH] Mid: lib: Changed to lowercase comparison. + +--- + lib/cib/cib_attrs.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +diff --git a/lib/cib/cib_attrs.c b/lib/cib/cib_attrs.c +index 8287a44..060d830 100644 +--- a/lib/cib/cib_attrs.c ++++ b/lib/cib/cib_attrs.c +@@ -471,17 +471,19 @@ get_uuid_from_result(xmlNode *result, char **uuid, int *is_remote) + * - guest node in resources section + * - orphaned remote node or bundle guest node in status section + */ ++#define XPATH_UPPER_TRANS "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ++#define XPATH_LOWER_TRANS "abcdefghijklmnopqrstuvwxyz" + #define XPATH_NODE \ + "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES \ +- "/" XML_CIB_TAG_NODE "[@" XML_ATTR_UNAME "='%s']" \ ++ "/" XML_CIB_TAG_NODE "[translate(@" XML_ATTR_UNAME ",'" XPATH_UPPER_TRANS "','" XPATH_LOWER_TRANS "') ='%s']" \ + "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \ + "/" XML_CIB_TAG_RESOURCE \ +- "[@class='ocf'][@provider='pacemaker'][@type='remote'][@id='%s']" \ ++ "[@class='ocf'][@provider='pacemaker'][@type='remote'][translate(@id,'" XPATH_UPPER_TRANS "','" XPATH_LOWER_TRANS "') ='%s']" \ + "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \ + "/" XML_CIB_TAG_RESOURCE "/" XML_TAG_META_SETS "/" XML_CIB_TAG_NVPAIR \ +- "[@name='" XML_RSC_ATTR_REMOTE_NODE "'][@value='%s']" \ ++ "[@name='" XML_RSC_ATTR_REMOTE_NODE "'][translate(@value,'" XPATH_UPPER_TRANS "','" XPATH_LOWER_TRANS "') ='%s']" \ + "|/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS "/" XML_CIB_TAG_STATE \ +- "[@" XML_NODE_IS_REMOTE "='true'][@" XML_ATTR_UUID "='%s']" ++ "[@" XML_NODE_IS_REMOTE "='true'][translate(@" XML_ATTR_UUID ",'" XPATH_UPPER_TRANS "','" XPATH_LOWER_TRANS "') ='%s']" + + int + query_node_uuid(cib_t * the_cib, const char *uname, char **uuid, int *is_remote_node) +@@ -489,6 +491,7 @@ query_node_uuid(cib_t * the_cib, const char *uname, char **uuid, int *is_remote_ + int rc = pcmk_ok; + char *xpath_string; + xmlNode *xml_search = NULL; ++ char *host_lowercase = g_ascii_strdown(uname, -1); + + CRM_ASSERT(uname != NULL); + +@@ -499,7 +502,7 @@ query_node_uuid(cib_t * the_cib, const char *uname, char **uuid, int *is_remote_ + *is_remote_node = FALSE; + } + +- xpath_string = crm_strdup_printf(XPATH_NODE, uname, uname, uname, uname); ++ xpath_string = crm_strdup_printf(XPATH_NODE, host_lowercase, host_lowercase, host_lowercase, host_lowercase); + if (cib_internal_op(the_cib, CIB_OP_QUERY, NULL, xpath_string, NULL, + &xml_search, cib_sync_call|cib_scope_local|cib_xpath, + NULL) == pcmk_ok) { +@@ -509,6 +512,7 @@ query_node_uuid(cib_t * the_cib, const char *uname, char **uuid, int *is_remote_ + } + free(xpath_string); + free_xml(xml_search); ++ free(host_lowercase); + + if (rc != pcmk_ok) { + crm_debug("Could not map node name '%s' to a UUID: %s", +-- +1.8.3.1 + diff --git a/SOURCES/019-requires-quorum.patch b/SOURCES/019-requires-quorum.patch new file mode 100644 index 0000000..c190b4e --- /dev/null +++ b/SOURCES/019-requires-quorum.patch @@ -0,0 +1,660 @@ +From 85a3a174e1fc4cd4b055eb22827c1c3d0b288a85 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 22 May 2018 11:00:22 -0500 +Subject: [PATCH 1/3] Low: libpe_status: handle pending migrations correctly + +This is mainly a refactor of unpack_rsc_migration() for readability. + +The one significant change is that previously, a migrate_from operation that +was *recorded* as pending (record-pending=true) was treated differently from an +unrecorded pending migrate_from (record-pending=false). +--- + include/crm/pengine/status.h | 3 + + lib/pengine/unpack.c | 162 ++++++++++++++++++++++++------------------- + 2 files changed, 94 insertions(+), 71 deletions(-) + +diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h +index fca7f12..a8c90e2 100644 +--- a/include/crm/pengine/status.h ++++ b/include/crm/pengine/status.h +@@ -30,6 +30,9 @@ typedef struct pe_action_s pe_action_t; + typedef struct resource_s resource_t; + typedef struct ticket_s ticket_t; + ++// forward-compatible with Pacemaker 2.0.0 ++typedef struct resource_s pe_resource_t; ++ + typedef enum no_quorum_policy_e { + no_quorum_freeze, + no_quorum_stop, +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 1b8ca22..73bbe27 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2414,94 +2414,114 @@ find_lrm_op(const char *resource, const char *op, const char *node, const char * + return get_xpath_object(xpath, data_set->input, LOG_DEBUG); + } + ++static bool ++stop_happened_after(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, ++ pe_working_set_t *data_set) ++{ ++ xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id, ++ NULL, data_set); ++ ++ if (stop_op) { ++ int stop_id = 0; ++ int task_id = 0; ++ ++ crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); ++ crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); ++ if (stop_id > task_id) { ++ return TRUE; ++ } ++ } ++ return FALSE; ++} ++ + static void + unpack_rsc_migration(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) + { +- +- /* +- * The normal sequence is (now): migrate_to(Src) -> migrate_from(Tgt) -> stop(Src) +- * +- * So if a migrate_to is followed by a stop, then we don't need to care what +- * happened on the target node ++ /* A successful migration sequence is: ++ * migrate_to on source node ++ * migrate_from on target node ++ * stop on source node + * +- * Without the stop, we need to look for a successful migrate_from. +- * This would also imply we're no longer running on the source ++ * If a migrate_to is followed by a stop, the entire migration (successful ++ * or failed) is complete, and we don't care what happened on the target. + * +- * Without the stop, and without a migrate_from op we make sure the resource +- * gets stopped on both source and target (assuming the target is up) ++ * If no migrate_from has happened, the migration is considered to be ++ * "partial". If the migrate_from failed, make sure the resource gets ++ * stopped on both source and target (if up). + * ++ * If the migrate_to and migrate_from both succeeded (which also implies the ++ * resource is no longer running on the source), but there is no stop, the ++ * migration is considered to be "dangling". + */ +- int stop_id = 0; +- int task_id = 0; +- xmlNode *stop_op = +- find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id, NULL, data_set); +- +- if (stop_op) { +- crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); ++ int from_rc = 0; ++ int from_status = 0; ++ const char *migrate_source = NULL; ++ const char *migrate_target = NULL; ++ pe_node_t *target = NULL; ++ pe_node_t *source = NULL; ++ xmlNode *migrate_from = NULL; ++ ++ if (stop_happened_after(rsc, node, xml_op, data_set)) { ++ return; + } + +- crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); ++ // Clones are not allowed to migrate, so role can't be master ++ rsc->role = RSC_ROLE_STARTED; + +- if (stop_op == NULL || stop_id < task_id) { +- int from_rc = 0, from_status = 0; +- const char *migrate_source = +- crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); +- const char *migrate_target = +- crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); ++ migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); ++ migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); + +- node_t *target = pe_find_node(data_set->nodes, migrate_target); +- node_t *source = pe_find_node(data_set->nodes, migrate_source); +- xmlNode *migrate_from = +- find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, +- data_set); ++ target = pe_find_node(data_set->nodes, migrate_target); ++ source = pe_find_node(data_set->nodes, migrate_source); + +- rsc->role = RSC_ROLE_STARTED; /* can be master? */ +- if (migrate_from) { +- crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); +- crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); +- pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", +- ID(migrate_from), migrate_target, from_status, from_rc); +- } +- +- if (migrate_from && from_rc == PCMK_OCF_OK +- && from_status == PCMK_LRM_OP_DONE) { +- pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), +- migrate_source); ++ // Check whether there was a migrate_from action ++ migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, ++ migrate_source, data_set); ++ if (migrate_from) { ++ crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); ++ crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); ++ pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", ++ ID(migrate_from), migrate_target, from_status, from_rc); ++ } + +- /* all good +- * just need to arrange for the stop action to get sent +- * but _without_ affecting the target somehow +- */ +- rsc->role = RSC_ROLE_STOPPED; +- rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); ++ if (migrate_from && from_rc == PCMK_OCF_OK ++ && from_status == PCMK_LRM_OP_DONE) { ++ /* The migrate_to and migrate_from both succeeded, so mark the migration ++ * as "dangling". This will be used to schedule a stop action on the ++ * source without affecting the target. ++ */ ++ pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), ++ migrate_source); ++ rsc->role = RSC_ROLE_STOPPED; ++ rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); + +- } else if (migrate_from) { /* Failed */ +- if (target && target->details->online) { +- pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, +- target->details->online); +- native_add_running(rsc, target, data_set); +- } ++ } else if (migrate_from && (from_status != PCMK_LRM_OP_PENDING)) { // Failed ++ if (target && target->details->online) { ++ pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, ++ target->details->online); ++ native_add_running(rsc, target, data_set); ++ } + +- } else { /* Pending or complete but erased */ +- if (target && target->details->online) { +- pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, +- target->details->online); ++ } else { // Pending, or complete but erased ++ if (target && target->details->online) { ++ pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, ++ target->details->online); + +- native_add_running(rsc, target, data_set); +- if (source && source->details->online) { +- /* If we make it here we have a partial migration. The migrate_to +- * has completed but the migrate_from on the target has not. Hold on +- * to the target and source on the resource. Later on if we detect that +- * the resource is still going to run on that target, we may continue +- * the migration */ +- rsc->partial_migration_target = target; +- rsc->partial_migration_source = source; +- } +- } else { +- /* Consider it failed here - forces a restart, prevents migration */ +- set_bit(rsc->flags, pe_rsc_failed); +- clear_bit(rsc->flags, pe_rsc_allow_migrate); ++ native_add_running(rsc, target, data_set); ++ if (source && source->details->online) { ++ /* This is a partial migration: the migrate_to completed ++ * successfully on the source, but the migrate_from has not ++ * completed. Remember the source and target; if the newly ++ * chosen target remains the same when we schedule actions ++ * later, we may continue with the migration. ++ */ ++ rsc->partial_migration_target = target; ++ rsc->partial_migration_source = source; + } ++ } else { ++ /* Consider it failed here - forces a restart, prevents migration */ ++ set_bit(rsc->flags, pe_rsc_failed); ++ clear_bit(rsc->flags, pe_rsc_allow_migrate); + } + } + } +-- +1.8.3.1 + + +From 37913a1dec2bda66476bddb5559817d23058be59 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 21 May 2018 12:43:09 -0500 +Subject: [PATCH 2/3] Refactor: libpe_status: new functions for finding + resource's active nodes + +Existing code often grabs rsc->running_on->data (i.e. the first node in the +list) as the resource's current node, and often uses +g_list_length(rsc->running_on). + +However, if the resource is in the middle of a partial migration, the migration +source should be preferred as the current node. Also, if a resource has +"requires" set to "nothing" or "quorum", a clean, online node should be +preferred as the current node, and a caller should ignore unclean and offline +nodes when counting in certain cases. + +These functions will allow those issues to be addressed in later commits. +--- + include/crm/pengine/internal.h | 34 +++++++----- + lib/pengine/complex.c | 121 ++++++++++++++++++++++++++++++++++++----- + 2 files changed, 127 insertions(+), 28 deletions(-) + +diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h +index e9d7582..fe8f6a1 100644 +--- a/include/crm/pengine/internal.h ++++ b/include/crm/pengine/internal.h +@@ -1,20 +1,10 @@ + /* +- * Copyright (C) 2004 Andrew Beekhof ++ * Copyright 2004-2018 Andrew Beekhof + * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU Lesser General Public +- * License as published by the Free Software Foundation; either +- * version 2 of the License, or (at your option) any later version. +- * +- * This software is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * General Public License for more details. +- * +- * You should have received a copy of the GNU Lesser General Public +- * License along with this library; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ ++ + #ifndef PE_INTERNAL__H + # define PE_INTERNAL__H + # include +@@ -125,6 +115,22 @@ int pe_get_failcount(node_t *node, resource_t *rsc, time_t *last_failure, + uint32_t flags, xmlNode *xml_op, + pe_working_set_t *data_set); + ++ ++/* Functions for finding/counting a resource's active nodes */ ++ ++pe_node_t *pe__find_active_on(const resource_t *rsc, ++ unsigned int *count_all, ++ unsigned int *count_clean); ++pe_node_t *pe__find_active_requires(const resource_t *rsc, ++ unsigned int *count); ++ ++static inline pe_node_t * ++pe__current_node(const resource_t *rsc) ++{ ++ return pe__find_active_on(rsc, NULL, NULL); ++} ++ ++ + /* Binary like operators for lists of nodes */ + extern void node_list_exclude(GHashTable * list, GListPtr list2, gboolean merge_scores); + extern GListPtr node_list_dup(GListPtr list, gboolean reset, gboolean filter); +diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c +index 86f290c..cdd409a 100644 +--- a/lib/pengine/complex.c ++++ b/lib/pengine/complex.c +@@ -1,19 +1,8 @@ + /* +- * Copyright (C) 2004 Andrew Beekhof ++ * Copyright 2004-2018 Andrew Beekhof + * +- * This library is free software; you can redistribute it and/or +- * modify it under the terms of the GNU Lesser General Public +- * License as published by the Free Software Foundation; either +- * version 2.1 of the License, or (at your option) any later version. +- * +- * This library is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * Lesser General Public License for more details. +- * +- * You should have received a copy of the GNU Lesser General Public +- * License along with this library; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + + #include +@@ -981,3 +970,107 @@ common_free(resource_t * rsc) + free(rsc->pending_task); + free(rsc); + } ++ ++/*! ++ * \brief ++ * \internal Find a node (and optionally count all) where resource is active ++ * ++ * \param[in] rsc Resource to check ++ * \param[out] count_all If not NULL, will be set to count of active nodes ++ * \param[out] count_clean If not NULL, will be set to count of clean nodes ++ * ++ * \return An active node (or NULL if resource is not active anywhere) ++ * ++ * \note The order of preference is: an active node that is the resource's ++ * partial migration source; if the resource's "requires" is "quorum" or ++ * "nothing", the first active node in the list that is clean and online; ++ * the first active node in the list. ++ */ ++pe_node_t * ++pe__find_active_on(const resource_t *rsc, unsigned int *count_all, ++ unsigned int *count_clean) ++{ ++ pe_node_t *active = NULL; ++ pe_node_t *node = NULL; ++ bool keep_looking = FALSE; ++ bool is_happy = FALSE; ++ ++ if (count_all) { ++ *count_all = 0; ++ } ++ if (count_clean) { ++ *count_clean = 0; ++ } ++ if (rsc == NULL) { ++ return NULL; ++ } ++ ++ for (GList *node_iter = rsc->running_on; node_iter != NULL; ++ node_iter = node_iter->next) { ++ ++ node = node_iter->data; ++ keep_looking = FALSE; ++ ++ is_happy = node->details->online && !node->details->unclean; ++ ++ if (count_all) { ++ ++*count_all; ++ } ++ if (count_clean && is_happy) { ++ ++*count_clean; ++ } ++ if (count_all || count_clean) { ++ // If we're counting, we need to go through entire list ++ keep_looking = TRUE; ++ } ++ ++ if (rsc->partial_migration_source != NULL) { ++ if (node->details == rsc->partial_migration_source->details) { ++ // This is the migration source ++ active = node; ++ } else { ++ keep_looking = TRUE; ++ } ++ } else if (is_not_set(rsc->flags, pe_rsc_needs_fencing)) { ++ if (is_happy && (!active || !active->details->online ++ || active->details->unclean)) { ++ // This is the first clean node ++ active = node; ++ } else { ++ keep_looking = TRUE; ++ } ++ } ++ if (active == NULL) { ++ // This is first node in list ++ active = node; ++ } ++ ++ if (keep_looking == FALSE) { ++ // Don't waste time iterating if we don't have to ++ break; ++ } ++ } ++ return active; ++} ++ ++/*! ++ * \brief ++ * \internal Find and count active nodes according to "requires" ++ * ++ * \param[in] rsc Resource to check ++ * \param[out] count If not NULL, will be set to count of active nodes ++ * ++ * \return An active node (or NULL if resource is not active anywhere) ++ * ++ * \note This is a convenience wrapper for pe__find_active_on() where the count ++ * of all active nodes or only clean active nodes is desired according to ++ * the "requires" meta-attribute. ++ */ ++pe_node_t * ++pe__find_active_requires(const resource_t *rsc, unsigned int *count) ++{ ++ if (rsc && is_not_set(rsc->flags, pe_rsc_needs_fencing)) { ++ return pe__find_active_on(rsc, NULL, count); ++ } ++ return pe__find_active_on(rsc, count, NULL); ++} +-- +1.8.3.1 + + +From e752fcfa10ee68f8a8de48122ae0f73190ae30af Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 21 May 2018 09:36:00 -0500 +Subject: [PATCH 3/3] Fix: libpe_status: find active instances properly + according to requires + +If a resource has "requires" set to "nothing" or "quorum", that means we can +properly start it elsewhere, even if the node believed to be initially running +the resource is unclean and waiting to be fenced. + +Previously, if we did start the resource elsewhere before fencing completed, +the cluster would then consider the resource multiply active, and recover it. +Now, we don't consider such a resource multiply active if it's active on +only one clean node. + +Status displays still show the resource as started on the unclean node, to give +the administrator a better idea of the actual situation. However, the clean +node will be considered the "current" node. +--- + lib/pengine/native.c | 21 ++++++++-- + pengine/native.c | 107 +++++++++++++++++++++++++-------------------------- + 2 files changed, 70 insertions(+), 58 deletions(-) + +diff --git a/lib/pengine/native.c b/lib/pengine/native.c +index f6d1653..e01ef17 100644 +--- a/lib/pengine/native.c ++++ b/lib/pengine/native.c +@@ -17,6 +17,21 @@ + #define VARIANT_NATIVE 1 + #include "./variant.h" + ++/*! ++ * \internal ++ * \brief Check whether a resource is active on multiple nodes ++ */ ++static bool ++is_multiply_active(pe_resource_t *rsc) ++{ ++ unsigned int count = 0; ++ ++ if (rsc->variant == pe_native) { ++ pe__find_active_requires(rsc, &count); ++ } ++ return count > 1; ++} ++ + void + native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) + { +@@ -58,7 +73,7 @@ native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) + return; + } + +- if (rsc->variant == pe_native && g_list_length(rsc->running_on) > 1) { ++ if (is_multiply_active(rsc)) { + switch (rsc->recovery_type) { + case recovery_stop_only: + { +@@ -99,8 +114,8 @@ native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) + } + break; + } +- crm_debug("%s is active on %d nodes including %s: %s", +- rsc->id, g_list_length(rsc->running_on), node->details->uname, ++ crm_debug("%s is active on multiple nodes including %s: %s", ++ rsc->id, node->details->uname, + recovery2text(rsc->recovery_type)); + + } else { +diff --git a/pengine/native.c b/pengine/native.c +index e3e0c59..37ac2e4 100644 +--- a/pengine/native.c ++++ b/pengine/native.c +@@ -1163,7 +1163,9 @@ native_create_actions(resource_t * rsc, pe_working_set_t * data_set) + gboolean allow_migrate = is_set(rsc->flags, pe_rsc_allow_migrate) ? TRUE : FALSE; + + GListPtr gIter = NULL; +- int num_active_nodes = 0; ++ unsigned int num_all_active = 0; ++ unsigned int num_clean_active = 0; ++ bool multiply_active = FALSE; + enum rsc_role_e role = RSC_ROLE_UNKNOWN; + enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; + +@@ -1181,18 +1183,7 @@ native_create_actions(resource_t * rsc, pe_working_set_t * data_set) + pe_rsc_trace(rsc, "Processing state transition for %s %p: %s->%s", rsc->id, rsc, + role2text(rsc->role), role2text(rsc->next_role)); + +- if (rsc->running_on) { +- current = rsc->running_on->data; +- } +- +- for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { +- node_t *n = (node_t *) gIter->data; +- if (rsc->partial_migration_source && +- (n->details == rsc->partial_migration_source->details)) { +- current = rsc->partial_migration_source; +- } +- num_active_nodes++; +- } ++ current = pe__find_active_on(rsc, &num_all_active, &num_clean_active); + + for (gIter = rsc->dangling_migrations; gIter != NULL; gIter = gIter->next) { + node_t *current = (node_t *) gIter->data; +@@ -1207,47 +1198,57 @@ native_create_actions(resource_t * rsc, pe_working_set_t * data_set) + } + } + +- if (num_active_nodes > 1) { ++ if ((num_all_active == 2) && (num_clean_active == 2) && chosen ++ && rsc->partial_migration_source && rsc->partial_migration_target ++ && (current->details == rsc->partial_migration_source->details) ++ && (chosen->details == rsc->partial_migration_target->details)) { + +- if (num_active_nodes == 2 +- && chosen +- && rsc->partial_migration_target +- && rsc->partial_migration_source +- && (current->details == rsc->partial_migration_source->details) +- && (chosen->details == rsc->partial_migration_target->details)) { +- /* Here the chosen node is still the migration target from a partial +- * migration. Attempt to continue the migration instead of recovering +- * by stopping the resource everywhere and starting it on a single node. */ +- pe_rsc_trace(rsc, +- "Will attempt to continue with a partial migration to target %s from %s", +- rsc->partial_migration_target->details->id, +- rsc->partial_migration_source->details->id); +- } else { +- const char *type = crm_element_value(rsc->xml, XML_ATTR_TYPE); +- const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); ++ /* The chosen node is still the migration target from a partial ++ * migration. Attempt to continue the migration instead of recovering ++ * by stopping the resource everywhere and starting it on a single node. ++ */ ++ pe_rsc_trace(rsc, ++ "Will attempt to continue with a partial migration to target %s from %s", ++ rsc->partial_migration_target->details->id, ++ rsc->partial_migration_source->details->id); ++ ++ } else if (is_not_set(rsc->flags, pe_rsc_needs_fencing)) { ++ /* If a resource has "requires" set to nothing or quorum, don't consider ++ * it active on unclean nodes (similar to how all resources behave when ++ * stonith-enabled is false). We can start such resources elsewhere ++ * before fencing completes, and if we considered the resource active on ++ * the failed node, we would attempt recovery for being active on ++ * multiple nodes. ++ */ ++ multiply_active = (num_clean_active > 1); ++ } else { ++ multiply_active = (num_all_active > 1); ++ } + +- if(rsc->partial_migration_target && rsc->partial_migration_source) { +- crm_notice("Resource %s can no longer migrate to %s. Stopping on %s too", rsc->id, +- rsc->partial_migration_target->details->uname, +- rsc->partial_migration_source->details->uname); ++ if (multiply_active) { ++ if (rsc->partial_migration_target && rsc->partial_migration_source) { ++ // Migration was in progress, but we've chosen a different target ++ crm_notice("Resource %s can no longer migrate to %s. Stopping on %s too", ++ rsc->id, rsc->partial_migration_target->details->uname, ++ rsc->partial_migration_source->details->uname); + +- } else { +- pe_proc_err("Resource %s (%s::%s) is active on %d nodes %s", +- rsc->id, class, type, num_active_nodes, recovery2text(rsc->recovery_type)); +- crm_warn("See %s for more information.", +- "http://clusterlabs.org/wiki/FAQ#Resource_is_Too_Active"); +- } +- +- if (rsc->recovery_type == recovery_stop_start) { +- need_stop = TRUE; +- } ++ } else { ++ // Resource was incorrectly multiply active ++ pe_proc_err("Resource %s is active on %u nodes (%s)", ++ rsc->id, num_all_active, ++ recovery2text(rsc->recovery_type)); ++ crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ#Resource_is_Too_Active for more information"); ++ } + +- /* If by chance a partial migration is in process, +- * but the migration target is not chosen still, clear all +- * partial migration data. */ +- rsc->partial_migration_source = rsc->partial_migration_target = NULL; +- allow_migrate = FALSE; ++ if (rsc->recovery_type == recovery_stop_start) { ++ need_stop = TRUE; + } ++ ++ /* If by chance a partial migration is in process, but the migration ++ * target is not chosen still, clear all partial migration data. ++ */ ++ rsc->partial_migration_source = rsc->partial_migration_target = NULL; ++ allow_migrate = FALSE; + } + + if (is_set(rsc->flags, pe_rsc_start_pending)) { +@@ -1339,7 +1341,7 @@ native_create_actions(resource_t * rsc, pe_working_set_t * data_set) + is_not_set(rsc->flags, pe_rsc_managed) || + is_set(rsc->flags, pe_rsc_failed) || + is_set(rsc->flags, pe_rsc_start_pending) || +- (current->details->unclean == TRUE) || ++ (current && current->details->unclean) || + rsc->next_role < RSC_ROLE_STARTED) { + + allow_migrate = FALSE; +@@ -2329,12 +2331,7 @@ LogActions(resource_t * rsc, pe_working_set_t * data_set, gboolean terminal) + + next = rsc->allocated_to; + if (rsc->running_on) { +- if (g_list_length(rsc->running_on) > 1 && rsc->partial_migration_source) { +- current = rsc->partial_migration_source; +- } else { +- current = rsc->running_on->data; +- } +- ++ current = pe__current_node(rsc); + if (rsc->role == RSC_ROLE_STOPPED) { + /* + * This can occur when resources are being recovered +-- +1.8.3.1 + diff --git a/SOURCES/020-multiple-active.patch b/SOURCES/020-multiple-active.patch new file mode 100644 index 0000000..27eec76 --- /dev/null +++ b/SOURCES/020-multiple-active.patch @@ -0,0 +1,672 @@ +From 355461723733acc0f6f9d9cc1318c91ba2a0ae6c Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 22 May 2018 15:55:14 -0500 +Subject: [PATCH] Fix: all: prefer appropriate node when multiply active + +--- + lib/pengine/container.c | 8 +++---- + lib/pengine/native.c | 8 +++---- + pengine/allocate.c | 20 ++++++++--------- + pengine/clone.c | 51 +++++++++++++++++++----------------------- + pengine/graph.c | 26 +++++++++++++--------- + pengine/native.c | 17 +++++++------- + pengine/notif.c | 2 +- + tools/crm_mon.c | 14 ++++-------- + tools/crm_resource.c | 21 +++++++++++------- + tools/crm_resource_print.c | 16 ++++++------- + tools/crm_resource_runtime.c | 53 ++++++++++++++++++++++---------------------- + 11 files changed, 112 insertions(+), 124 deletions(-) + +diff --git a/lib/pengine/container.c b/lib/pengine/container.c +index b5340bf..d82948a 100644 +--- a/lib/pengine/container.c ++++ b/lib/pengine/container.c +@@ -807,11 +807,11 @@ container_fix_remote_addr_in(resource_t *rsc, xmlNode *xml, const char *field) + } + + node = tuple->docker->allocated_to; +- if(node == NULL && tuple->docker->running_on) { ++ if (node == NULL) { + /* If it won't be running anywhere after the + * transition, go with where it's running now. + */ +- node = tuple->docker->running_on->data; ++ node = pe__current_node(tuple->docker); + } + + if(node == NULL) { +@@ -1289,9 +1289,7 @@ tuple_print(container_grouping_t * tuple, const char *pre_text, long options, vo + offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)", tuple->ipaddr); + } + +- if (tuple->docker->running_on) { +- node = tuple->docker->running_on->data; +- } ++ node = pe__current_node(tuple->docker); + common_print(rsc, pre_text, buffer, node, options, print_data); + } + +diff --git a/lib/pengine/native.c b/lib/pengine/native.c +index e01ef17..eda0355 100644 +--- a/lib/pengine/native.c ++++ b/lib/pengine/native.c +@@ -457,7 +457,7 @@ native_print_xml(resource_t * rsc, const char *pre_text, long options, void *pri + if (options & pe_print_rsconly) { + status_print("/>\n"); + /* do nothing */ +- } else if (g_list_length(rsc->running_on) > 0) { ++ } else if (rsc->running_on != NULL) { + GListPtr gIter = rsc->running_on; + + status_print(">\n"); +@@ -529,7 +529,7 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n + } else if (is_set(rsc->flags, pe_rsc_failed)) { + status_print(""); + +- } else if (rsc->variant == pe_native && g_list_length(rsc->running_on) == 0) { ++ } else if (rsc->variant == pe_native && (rsc->running_on == NULL)) { + status_print(""); + + } else if (g_list_length(rsc->running_on) > 1) { +@@ -742,9 +742,7 @@ native_print(resource_t * rsc, const char *pre_text, long options, void *print_d + return; + } + +- if (rsc->running_on != NULL) { +- node = rsc->running_on->data; +- } ++ node = pe__current_node(rsc); + common_print(rsc, pre_text, rsc_printable_id(rsc), node, options, print_data); + } + +diff --git a/pengine/allocate.c b/pengine/allocate.c +index 724736c..427575b 100644 +--- a/pengine/allocate.c ++++ b/pengine/allocate.c +@@ -1101,14 +1101,14 @@ sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data) + r2_weight = -INFINITY; + + if (resource1->running_on) { +- r1_node = g_list_nth_data(resource1->running_on, 0); ++ r1_node = pe__current_node(resource1); + r1_node = g_hash_table_lookup(r1_nodes, r1_node->details->id); + if (r1_node != NULL) { + r1_weight = r1_node->weight; + } + } + if (resource2->running_on) { +- r2_node = g_list_nth_data(resource2->running_on, 0); ++ r2_node = pe__current_node(resource2); + r2_node = g_hash_table_lookup(r2_nodes, r2_node->details->id); + if (r2_node != NULL) { + r2_weight = r2_node->weight; +@@ -1925,10 +1925,7 @@ get_remote_node_state(pe_node_t *node) + remote_rsc = node->details->remote_rsc; + CRM_ASSERT(remote_rsc); + +- if(remote_rsc->running_on) { +- cluster_node = remote_rsc->running_on->data; +- } +- ++ cluster_node = pe__current_node(remote_rsc); + + /* If the cluster node the remote connection resource resides on + * is unclean or went offline, we can't process any operations +@@ -1989,11 +1986,14 @@ get_remote_node_state(pe_node_t *node) + return remote_state_alive; + } + ++/*! ++ * \internal ++ * \brief Order actions on remote node relative to actions for the connection ++ */ + static void + apply_remote_ordering(action_t *action, pe_working_set_t *data_set) + { + resource_t *remote_rsc = NULL; +- node_t *cluster_node = NULL; + enum action_tasks task = text2task(action->task); + enum remote_connection_state state = get_remote_node_state(action->node); + +@@ -2009,10 +2009,6 @@ apply_remote_ordering(action_t *action, pe_working_set_t *data_set) + remote_rsc = action->node->details->remote_rsc; + CRM_ASSERT(remote_rsc); + +- if(remote_rsc->running_on) { +- cluster_node = remote_rsc->running_on->data; +- } +- + crm_trace("Order %s action %s relative to %s%s (state: %s)", + action->task, action->uuid, + is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", +@@ -2093,6 +2089,8 @@ apply_remote_ordering(action_t *action, pe_working_set_t *data_set) + pe_order_implies_then, data_set); + + } else { ++ node_t *cluster_node = pe__current_node(remote_rsc); ++ + if(task == monitor_rsc && state == remote_state_failed) { + /* We would only be here if we do not know the + * state of the resource on the remote node. +diff --git a/pengine/clone.c b/pengine/clone.c +index 3192412..1de2661 100644 +--- a/pengine/clone.c ++++ b/pengine/clone.c +@@ -69,6 +69,10 @@ sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set) + int rc = 0; + node_t *node1 = NULL; + node_t *node2 = NULL; ++ node_t *current_node1 = NULL; ++ node_t *current_node2 = NULL; ++ unsigned int nnodes1 = 0; ++ unsigned int nnodes2 = 0; + + gboolean can1 = TRUE; + gboolean can2 = TRUE; +@@ -87,24 +91,22 @@ sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set) + * - inactive instances + */ + +- if (resource1->running_on && resource2->running_on) { +- if (g_list_length(resource1->running_on) < g_list_length(resource2->running_on)) { ++ current_node1 = pe__find_active_on(resource1, &nnodes1, NULL); ++ current_node2 = pe__find_active_on(resource2, &nnodes2, NULL); ++ ++ if (nnodes1 && nnodes2) { ++ if (nnodes1 < nnodes2) { + crm_trace("%s < %s: running_on", resource1->id, resource2->id); + return -1; + +- } else if (g_list_length(resource1->running_on) > g_list_length(resource2->running_on)) { ++ } else if (nnodes1 > nnodes2) { + crm_trace("%s > %s: running_on", resource1->id, resource2->id); + return 1; + } + } + +- if (resource1->running_on) { +- node1 = resource1->running_on->data; +- } +- if (resource2->running_on) { +- node2 = resource2->running_on->data; +- } +- ++ node1 = current_node1; ++ node2 = current_node2; + if (node1) { + node_t *match = pe_hash_table_lookup(resource1->allowed_nodes, node1->details->id); + +@@ -216,10 +218,10 @@ sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set) + GHashTable *hash2 = + g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str); + +- n = node_copy(resource1->running_on->data); ++ n = node_copy(current_node1); + g_hash_table_insert(hash1, (gpointer) n->details->id, n); + +- n = node_copy(resource2->running_on->data); ++ n = node_copy(current_node2); + g_hash_table_insert(hash2, (gpointer) n->details->id, n); + + if(resource1->parent) { +@@ -267,11 +269,8 @@ sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set) + } + + /* Current location score */ +- node1 = g_list_nth_data(resource1->running_on, 0); +- node1 = g_hash_table_lookup(hash1, node1->details->id); +- +- node2 = g_list_nth_data(resource2->running_on, 0); +- node2 = g_hash_table_lookup(hash2, node2->details->id); ++ node1 = g_hash_table_lookup(hash1, current_node1->details->id); ++ node2 = g_hash_table_lookup(hash2, current_node2->details->id); + + if (node1->weight < node2->weight) { + if (node1->weight < 0) { +@@ -295,12 +294,8 @@ sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set) + list1 = g_hash_table_get_values(hash1); + list2 = g_hash_table_get_values(hash2); + +- list1 = +- g_list_sort_with_data(list1, sort_node_weight, +- g_list_nth_data(resource1->running_on, 0)); +- list2 = +- g_list_sort_with_data(list2, sort_node_weight, +- g_list_nth_data(resource2->running_on, 0)); ++ list1 = g_list_sort_with_data(list1, sort_node_weight, current_node1); ++ list2 = g_list_sort_with_data(list2, sort_node_weight, current_node2); + max = g_list_length(list1); + if (max < g_list_length(list2)) { + max = g_list_length(list2); +@@ -528,8 +523,8 @@ distribute_children(resource_t *rsc, GListPtr children, GListPtr nodes, + + if (child->running_on && is_set(child->flags, pe_rsc_provisional) + && is_not_set(child->flags, pe_rsc_failed)) { +- node_t *child_node = child->running_on->data; +- node_t *local_node = parent_node_instance(child, child->running_on->data); ++ node_t *child_node = pe__current_node(child); ++ node_t *local_node = parent_node_instance(child, child_node); + + pe_rsc_trace(rsc, "Checking pre-allocation of %s to %s (%d remaining of %d)", + child->id, child_node->details->uname, max - allocated, max); +@@ -556,9 +551,9 @@ distribute_children(resource_t *rsc, GListPtr children, GListPtr nodes, + for (GListPtr gIter = children; gIter != NULL; gIter = gIter->next) { + resource_t *child = (resource_t *) gIter->data; + +- if (g_list_length(child->running_on) > 0) { +- node_t *child_node = child->running_on->data; +- node_t *local_node = parent_node_instance(child, child->running_on->data); ++ if (child->running_on != NULL) { ++ node_t *child_node = pe__current_node(child); ++ node_t *local_node = parent_node_instance(child, child_node); + + if (local_node == NULL) { + crm_err("%s is running on %s which isn't allowed", +diff --git a/pengine/graph.c b/pengine/graph.c +index 6d4e4c7..236b278 100644 +--- a/pengine/graph.c ++++ b/pengine/graph.c +@@ -783,6 +783,7 @@ get_router_node(action_t *action) + node_t *began_on = NULL; + node_t *ended_on = NULL; + node_t *router_node = NULL; ++ bool partial_migration = FALSE; + + if (safe_str_eq(action->task, CRM_OP_FENCE) || is_remote_node(action->node) == FALSE) { + return NULL; +@@ -790,10 +791,13 @@ get_router_node(action_t *action) + + CRM_ASSERT(action->node->details->remote_rsc != NULL); + +- if (action->node->details->remote_rsc->running_on) { +- began_on = action->node->details->remote_rsc->running_on->data; +- } ++ began_on = pe__current_node(action->node->details->remote_rsc); + ended_on = action->node->details->remote_rsc->allocated_to; ++ if (action->node->details->remote_rsc ++ && (action->node->details->remote_rsc->container == NULL) ++ && action->node->details->remote_rsc->partial_migration_target) { ++ partial_migration = TRUE; ++ } + + /* if there is only one location to choose from, + * this is easy. Check for those conditions first */ +@@ -817,6 +821,10 @@ get_router_node(action_t *action) + * are all required before the remote rsc stop action can occur.) In + * this case, we know these actions have to be routed through the initial + * cluster node the connection resource lived on before the move takes place. ++ * The exception is a partial migration of a (non-guest) remote ++ * connection resource; in that case, all actions (even these) will be ++ * ordered after the connection's pseudo-start on the migration target, ++ * so the target is the router node. + * + * 2. Everything else (start, promote, monitor, probe, refresh, clear failcount + * delete ....) must occur after the resource starts on the node it is +@@ -824,10 +832,10 @@ get_router_node(action_t *action) + */ + + /* 1. before connection rsc moves. */ +- if (safe_str_eq(action->task, "stop") || ++ if ((safe_str_eq(action->task, "stop") || + safe_str_eq(action->task, "demote") || + safe_str_eq(action->task, "migrate_from") || +- safe_str_eq(action->task, "migrate_to")) { ++ safe_str_eq(action->task, "migrate_to")) && !partial_migration) { + + router_node = began_on; + +@@ -1234,18 +1242,14 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set) + case stopped_rsc: + case action_demote: + case action_demoted: +- if(action->node->details->remote_rsc->container->running_on) { +- host = action->node->details->remote_rsc->container->running_on->data; +- } ++ host = pe__current_node(action->node->details->remote_rsc->container); + break; + case start_rsc: + case started_rsc: + case monitor_rsc: + case action_promote: + case action_promoted: +- if(action->node->details->remote_rsc->container->allocated_to) { +- host = action->node->details->remote_rsc->container->allocated_to; +- } ++ host = action->node->details->remote_rsc->container->allocated_to; + break; + default: + break; +diff --git a/pengine/native.c b/pengine/native.c +index 37ac2e4..1c26642 100644 +--- a/pengine/native.c ++++ b/pengine/native.c +@@ -102,7 +102,7 @@ native_choose_node(resource_t * rsc, node_t * prefer, pe_working_set_t * data_se + if (length > 0) { + nodes = g_hash_table_get_values(rsc->allowed_nodes); + nodes = g_list_sort_with_data(nodes, sort_node_weight, +- g_list_nth_data(rsc->running_on, 0)); ++ pe__current_node(rsc)); + + // First node in sorted list has the best score + best = g_list_nth_data(nodes, 0); +@@ -158,7 +158,7 @@ native_choose_node(resource_t * rsc, node_t * prefer, pe_working_set_t * data_se + * remaining unallocated instances to prefer a node that's already + * running another instance. + */ +- node_t *running = g_list_nth_data(rsc->running_on, 0); ++ node_t *running = pe__current_node(rsc); + + if (running && (can_run_resources(running) == FALSE)) { + pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources", +@@ -534,16 +534,14 @@ native_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) + node_t *assign_to = NULL; + + rsc->next_role = rsc->role; +- if (rsc->running_on == NULL) { ++ assign_to = pe__current_node(rsc); ++ if (assign_to == NULL) { + reason = "inactive"; + } else if (rsc->role == RSC_ROLE_MASTER) { +- assign_to = rsc->running_on->data; + reason = "master"; + } else if (is_set(rsc->flags, pe_rsc_failed)) { +- assign_to = rsc->running_on->data; + reason = "failed"; + } else { +- assign_to = rsc->running_on->data; + reason = "active"; + } + pe_rsc_info(rsc, "Unmanaged resource %s allocated to %s: %s", rsc->id, +@@ -1834,7 +1832,9 @@ rsc_ticket_constraint(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket, pe_working + rsc_lh->id, rsc_ticket->ticket->id, rsc_ticket->id, + role2text(rsc_ticket->role_lh)); + +- if (rsc_ticket->ticket->granted == FALSE && g_list_length(rsc_lh->running_on) > 0) { ++ if ((rsc_ticket->ticket->granted == FALSE) ++ && (rsc_lh->running_on != NULL)) { ++ + GListPtr gIter = NULL; + + switch (rsc_ticket->loss_policy) { +@@ -1867,7 +1867,7 @@ rsc_ticket_constraint(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket, pe_working + if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) { + return; + } +- if (g_list_length(rsc_lh->running_on) > 0) { ++ if (rsc_lh->running_on != NULL) { + clear_bit(rsc_lh->flags, pe_rsc_managed); + set_bit(rsc_lh->flags, pe_rsc_block); + } +@@ -1919,7 +1919,6 @@ native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_ + } else if ((then_rsc_role >= RSC_ROLE_STARTED) + && safe_str_eq(then->task, RSC_START) + && then->node +- && then_rsc->running_on + && g_list_length(then_rsc->running_on) == 1 + && then->node->details == ((node_t *) then_rsc->running_on->data)->details) { + /* ignore... if 'then' is supposed to be started after 'first', but +diff --git a/pengine/notif.c b/pengine/notif.c +index 3013ee0..4913249 100644 +--- a/pengine/notif.c ++++ b/pengine/notif.c +@@ -113,7 +113,7 @@ expand_node_list(GListPtr list, char **uname, char **metal) + if(node->details->remote_rsc + && node->details->remote_rsc->container + && node->details->remote_rsc->container->running_on) { +- node = node->details->remote_rsc->container->running_on->data; ++ node = pe__current_node(node->details->remote_rsc->container); + } + + if (node->details->uname == NULL) { +diff --git a/tools/crm_mon.c b/tools/crm_mon.c +index 824b12f..7c63803 100644 +--- a/tools/crm_mon.c ++++ b/tools/crm_mon.c +@@ -1953,16 +1953,10 @@ get_node_display_name(node_t *node) + + /* Host is displayed only if this is a guest node */ + if (is_container_remote_node(node)) { +- if (node->details->remote_rsc->running_on) { +- /* running_on is a list, but guest nodes will have exactly one entry +- * unless they are in the process of migrating, in which case they +- * will have two; either way, we can use the first item in the list +- */ +- node_t *host_node = (node_t *) node->details->remote_rsc->running_on->data; +- +- if (host_node && host_node->details) { +- node_host = host_node->details->uname; +- } ++ node_t *host_node = pe__current_node(node->details->remote_rsc); ++ ++ if (host_node && host_node->details) { ++ node_host = host_node->details->uname; + } + if (node_host == NULL) { + node_host = ""; /* so we at least get "uname@" to indicate guest */ +diff --git a/tools/crm_resource.c b/tools/crm_resource.c +index c64432e..0557892 100644 +--- a/tools/crm_resource.c ++++ b/tools/crm_resource.c +@@ -1015,23 +1015,27 @@ main(int argc, char **argv) + rc = cli_resource_ban(rsc_id, dest->details->uname, NULL, cib_conn); + + } else if (rsc_cmd == 'B' || rsc_cmd == 'M') { ++ pe_node_t *current = NULL; ++ unsigned int nactive = 0; ++ + rc = -EINVAL; +- if (g_list_length(rsc->running_on) == 1) { +- node_t *current = rsc->running_on->data; ++ current = pe__find_active_requires(rsc, &nactive); ++ ++ if (nactive == 1) { + rc = cli_resource_ban(rsc_id, current->details->uname, NULL, cib_conn); + + } else if(rsc->variant == pe_master) { + int count = 0; + GListPtr iter = NULL; +- node_t *current = NULL; + ++ current = NULL; + for(iter = rsc->children; iter; iter = iter->next) { + resource_t *child = (resource_t *)iter->data; + enum rsc_role_e child_role = child->fns->state(child, TRUE); + + if(child_role == RSC_ROLE_MASTER) { + count++; +- current = child->running_on->data; ++ current = pe__current_node(child); + } + } + +@@ -1039,14 +1043,15 @@ main(int argc, char **argv) + rc = cli_resource_ban(rsc_id, current->details->uname, NULL, cib_conn); + + } else { +- CMD_ERR("Resource '%s' not moved: active in %d locations (promoted in %d).", rsc_id, g_list_length(rsc->running_on), count); ++ CMD_ERR("Resource '%s' not moved: active in %d locations (promoted in %d).", ++ rsc_id, nactive, count); + CMD_ERR("You can prevent '%s' from running on a specific location with: --ban --node ", rsc_id); + CMD_ERR("You can prevent '%s' from being promoted at a specific location with:" + " --ban --master --node ", rsc_id); + } + + } else { +- CMD_ERR("Resource '%s' not moved: active in %d locations.", rsc_id, g_list_length(rsc->running_on)); ++ CMD_ERR("Resource '%s' not moved: active in %d locations.", rsc_id, nactive); + CMD_ERR("You can prevent '%s' from running on a specific location with: --ban --node ", rsc_id); + } + +@@ -1164,12 +1169,12 @@ main(int argc, char **argv) + node_t *node = pe_find_node(data_set.nodes, host_uname); + + if (node && is_remote_node(node)) { +- if (node->details->remote_rsc == NULL || node->details->remote_rsc->running_on == NULL) { ++ node = pe__current_node(node->details->remote_rsc); ++ if (node == NULL) { + CMD_ERR("No lrmd connection detected to remote node %s", host_uname); + rc = -ENXIO; + goto bail; + } +- node = node->details->remote_rsc->running_on->data; + router_node = node->details->uname; + attr_options |= attrd_opt_remote; + } +diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c +index d066c42..2463fb5 100644 +--- a/tools/crm_resource_print.c ++++ b/tools/crm_resource_print.c +@@ -68,6 +68,7 @@ cli_resource_print_cts(resource_t * rsc) + const char *rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); + const char *rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); + const char *rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); ++ pe_node_t *node = pe__current_node(rsc); + + if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH)) { + xmlNode *op = NULL; +@@ -90,10 +91,8 @@ cli_resource_print_cts(resource_t * rsc) + } + } + +- if (rsc->running_on != NULL && g_list_length(rsc->running_on) == 1) { +- node_t *tmp = rsc->running_on->data; +- +- host = tmp->details->uname; ++ if (node != NULL) { ++ host = node->details->uname; + } + + printf("Resource: %s %s %s %s %s %s %s %s %d %lld 0x%.16llx\n", +@@ -315,16 +314,15 @@ int + cli_resource_print_attribute(resource_t *rsc, const char *attr, pe_working_set_t * data_set) + { + int rc = -ENXIO; +- node_t *current = NULL; ++ unsigned int count = 0; + GHashTable *params = NULL; + const char *value = NULL; ++ node_t *current = pe__find_active_on(rsc, &count, NULL); + +- if (g_list_length(rsc->running_on) == 1) { +- current = rsc->running_on->data; +- +- } else if (g_list_length(rsc->running_on) > 1) { ++ if (count > 1) { + CMD_ERR("%s is active on more than one node," + " returning the default value for %s", rsc->id, crm_str(attr)); ++ current = NULL; + } + + params = crm_str_table_new(); +diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c +index 5e54f9e..5004935 100644 +--- a/tools/crm_resource_runtime.c ++++ b/tools/crm_resource_runtime.c +@@ -473,11 +473,11 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, + node_t *node = pe_find_node(data_set->nodes, host_uname); + + if (node && is_remote_node(node)) { +- if (node->details->remote_rsc == NULL || node->details->remote_rsc->running_on == NULL) { ++ node = pe__current_node(node->details->remote_rsc); ++ if (node == NULL) { + CMD_ERR("No lrmd connection detected to remote node %s", host_uname); + return -ENXIO; + } +- node = node->details->remote_rsc->running_on->data; + router_node = node->details->uname; + } + } +@@ -1648,11 +1648,16 @@ cli_resource_move(resource_t *rsc, const char *rsc_id, const char *host_name, + cib_t *cib, pe_working_set_t *data_set) + { + int rc = -EINVAL; +- int count = 0; ++ unsigned int count = 0; + node_t *current = NULL; + node_t *dest = pe_find_node(data_set->nodes, host_name); + bool cur_is_dest = FALSE; + ++ if (dest == NULL) { ++ CMD_ERR("Error performing operation: node '%s' is unknown", host_name); ++ return -ENXIO; ++ } ++ + if (scope_master && rsc->variant != pe_master) { + resource_t *p = uber_parent(rsc); + if(p->variant == pe_master) { +@@ -1667,8 +1672,12 @@ cli_resource_move(resource_t *rsc, const char *rsc_id, const char *host_name, + } + } + ++ current = pe__find_active_requires(rsc, &count); ++ + if(rsc->variant == pe_master) { + GListPtr iter = NULL; ++ unsigned int master_count = 0; ++ pe_node_t *master_node = NULL; + + for(iter = rsc->children; iter; iter = iter->next) { + resource_t *child = (resource_t *)iter->data; +@@ -1676,37 +1685,27 @@ cli_resource_move(resource_t *rsc, const char *rsc_id, const char *host_name, + + if(child_role == RSC_ROLE_MASTER) { + rsc = child; +- count++; ++ master_node = pe__current_node(child); ++ master_count++; + } + } +- +- if(scope_master == FALSE && count == 0) { +- count = g_list_length(rsc->running_on); ++ if (scope_master || master_count) { ++ count = master_count; ++ current = master_node; + } + +- } else if (pe_rsc_is_clone(rsc)) { +- count = g_list_length(rsc->running_on); +- +- } else if (g_list_length(rsc->running_on) > 1) { +- CMD_ERR("Resource '%s' not moved: active on multiple nodes", rsc_id); +- return rc; +- } +- +- if(dest == NULL) { +- CMD_ERR("Error performing operation: node '%s' is unknown", host_name); +- return -ENXIO; + } + +- if(g_list_length(rsc->running_on) == 1) { +- current = rsc->running_on->data; ++ if (count > 1) { ++ if (pe_rsc_is_clone(rsc)) { ++ current = NULL; ++ } else { ++ CMD_ERR("Resource '%s' not moved: active on multiple nodes", rsc_id); ++ return rc; ++ } + } + +- if(current == NULL) { +- /* Nothing to check */ +- +- } else if(scope_master && rsc->fns->state(rsc, TRUE) != RSC_ROLE_MASTER) { +- crm_trace("%s is already active on %s but not in correct state", rsc_id, dest->details->uname); +- } else if (safe_str_eq(current->details->uname, dest->details->uname)) { ++ if (current && (current->details == dest->details)) { + cur_is_dest = TRUE; + if (do_force) { + crm_info("%s is already %s on %s, reinforcing placement with location constraint.", +@@ -1736,7 +1735,7 @@ cli_resource_move(resource_t *rsc, const char *rsc_id, const char *host_name, + (void)cli_resource_ban(rsc_id, current->details->uname, NULL, cib); + + } else if(count > 1) { +- CMD_ERR("Resource '%s' is currently %s in %d locations. One may now move one to %s", ++ CMD_ERR("Resource '%s' is currently %s in %d locations. One may now move to %s", + rsc_id, scope_master?"promoted":"active", count, dest->details->uname); + CMD_ERR("You can prevent '%s' from being %s at a specific location with:" + " --ban %s--host ", rsc_id, scope_master?"promoted":"active", scope_master?"--master ":""); +-- +1.8.3.1 + diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec index 1b78478..18acc4e 100644 --- a/SPECS/pacemaker.spec +++ b/SPECS/pacemaker.spec @@ -163,7 +163,7 @@ Name: pacemaker Summary: Scalable High-Availability cluster resource manager Version: %{pcmkversion} -Release: %{pcmk_release}%{?dist}.2 +Release: %{pcmk_release}%{?dist}.3 %if %{defined _unitdir} License: GPLv2+ and LGPLv2+ %else @@ -196,6 +196,9 @@ Patch14: 014-segfault.patch Patch15: 015-fail-timeout.patch Patch16: 016-crm_diff.patch Patch17: 017-pending-notify.patch +Patch18: 018-node-names.patch +Patch19: 019-requires-quorum.patch +Patch20: 020-multiple-active.patch # patches that aren't from upstream Patch100: lrmd-protocol-version.patch @@ -868,6 +871,12 @@ exit 0 %attr(0644,root,root) %{_datadir}/pacemaker/nagios/plugins-metadata/* %changelog +* Tue Jun 5 2018 Ken Gaillot - 1.1.18-11.3 +- Fix regression in handling of mixed-case node names +- Avoid multiple recovery of stonith devices +- Resolves: rhbz#1583747 +- Resolves: rhbz#1585741 + * Fri Apr 20 2018 Ken Gaillot - 1.1.18-11.2 - Do not record pending notify actions as completed - Resolves: rhbz#1570618