diff --git a/.gitignore b/.gitignore index 1412d55..d90e995 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ +SOURCES/016-regression-tests.patch.gz SOURCES/nagios-agents-metadata-105ab8a.tar.gz SOURCES/pacemaker-c3c624e.tar.gz diff --git a/.pacemaker.metadata b/.pacemaker.metadata index 4ff0e33..77a2d9d 100644 --- a/.pacemaker.metadata +++ b/.pacemaker.metadata @@ -1,2 +1,3 @@ +3b251742f2e6ef37faae17e905612a43635b07ae SOURCES/016-regression-tests.patch.gz ea6c0a27fd0ae8ce02f84a11f08a0d79377041c3 SOURCES/nagios-agents-metadata-105ab8a.tar.gz 572f66e455beeb43106974ed547118a26834d099 SOURCES/pacemaker-c3c624e.tar.gz diff --git a/SOURCES/012-stonith-ordering.patch b/SOURCES/012-stonith-ordering.patch new file mode 100644 index 0000000..7c80cec --- /dev/null +++ b/SOURCES/012-stonith-ordering.patch @@ -0,0 +1,108 @@ +From eb2854add713f22b083a54aa7caf04be5067b469 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 13 Nov 2018 18:05:04 -0600 +Subject: [PATCH] Low: scheduler: order start after particular stonith op + +Previously, if a resource's status was unknown on a node about to be fenced, +we ordered the resource start after all_stopped. This approximated stonith_done +before that was available. However, it makes more sense to order the start +after the particular stonith op for the node in question. + +This improves recovery when multiple nodes are being fenced: resources can now +be recovered from one node when it is successfully fenced, even if the fencing +of another node fails. +--- + pengine/native.c | 63 +++++++++++++++++++++++++++++++------------------------- + 1 file changed, 35 insertions(+), 28 deletions(-) + +diff --git a/pengine/native.c b/pengine/native.c +index c6c1d55..9ee5990 100644 +--- a/pengine/native.c ++++ b/pengine/native.c +@@ -2948,13 +2948,19 @@ native_create_probe(resource_t * rsc, node_t * node, action_t * complete, + return TRUE; + } + ++/*! ++ * \internal ++ * \brief Order a resource's start and promote actions relative to fencing ++ * ++ * \param[in] rsc Resource to be ordered ++ * \param[in] stonith_op Fence action ++ * \param[in] data_set Cluster information ++ */ + static void + native_start_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set) + { + node_t *target; + GListPtr gIter = NULL; +- action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); +- action_t *stonith_done = get_pseudo_op(STONITH_DONE, data_set); + + CRM_CHECK(stonith_op && stonith_op->node, return); + target = stonith_op->node; +@@ -2962,34 +2968,35 @@ native_start_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set + for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { + action_t *action = (action_t *) gIter->data; + +- if(action->needs == rsc_req_nothing) { +- /* Anything other than start or promote requires nothing */ +- +- } else if (action->needs == rsc_req_stonith) { +- order_actions(stonith_done, action, pe_order_optional); ++ switch (action->needs) { ++ case rsc_req_nothing: ++ // Anything other than start or promote requires nothing ++ break; + +- } else if (safe_str_eq(action->task, RSC_START) +- && NULL != pe_hash_table_lookup(rsc->allowed_nodes, target->details->id) +- && NULL == pe_hash_table_lookup(rsc->known_on, target->details->id)) { +- /* if known == NULL, then we don't know if +- * the resource is active on the node +- * we're about to shoot +- * +- * in this case, regardless of action->needs, +- * the only safe option is to wait until +- * the node is shot before doing anything +- * to with the resource +- * +- * it's analogous to waiting for all the probes +- * for rscX to complete before starting rscX +- * +- * the most likely explanation is that the +- * DC died and took its status with it +- */ ++ case rsc_req_stonith: ++ order_actions(stonith_op, action, pe_order_optional); ++ break; + +- pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid, +- target->details->uname); +- order_actions(all_stopped, action, pe_order_optional | pe_order_runnable_left); ++ case rsc_req_quorum: ++ if (safe_str_eq(action->task, RSC_START) ++ && pe_hash_table_lookup(rsc->allowed_nodes, target->details->id) ++ && NULL == pe_hash_table_lookup(rsc->known_on, target->details->id)) { ++ ++ /* If we don't know the status of the resource on the node ++ * we're about to shoot, we have to assume it may be active ++ * there. Order the resource start after the fencing. This ++ * is analogous to waiting for all the probes for a resource ++ * to complete before starting it. ++ * ++ * The most likely explanation is that the DC died and took ++ * its status with it. ++ */ ++ pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid, ++ target->details->uname); ++ order_actions(stonith_op, action, ++ pe_order_optional | pe_order_runnable_left); ++ } ++ break; + } + } + } +-- +1.8.3.1 + diff --git a/SOURCES/013-pseudo-removal.patch b/SOURCES/013-pseudo-removal.patch new file mode 100644 index 0000000..0812570 --- /dev/null +++ b/SOURCES/013-pseudo-removal.patch @@ -0,0 +1,225 @@ +From dcc8e65891537cfdffb1b18e1412b12868d20241 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 16 Nov 2018 21:02:13 -0600 +Subject: [PATCH 1/2] Low: scheduler: get rid of now-unused all_stopped + pseudo-action + +--- + lib/pengine/common.c | 2 -- + pengine/allocate.c | 3 --- + pengine/native.c | 8 -------- + pengine/notif.c | 7 ------- + pengine/utils.h | 1 - + 5 files changed, 21 deletions(-) + +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index c54bc44..d04e4ae 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -280,8 +280,6 @@ text2task(const char *task) + return no_action; + } else if (safe_str_eq(task, "stonith_complete")) { + return no_action; +- } else if (safe_str_eq(task, "all_stopped")) { +- return no_action; + } + crm_trace("Unsupported action: %s", task); + #endif +diff --git a/pengine/allocate.c b/pengine/allocate.c +index adc07d8..81f3f51 100644 +--- a/pengine/allocate.c ++++ b/pengine/allocate.c +@@ -1556,7 +1556,6 @@ stage6(pe_working_set_t * data_set) + action_t *stonith_op = NULL; + action_t *last_stonith = NULL; + gboolean integrity_lost = FALSE; +- action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); + action_t *done = get_pseudo_op(STONITH_DONE, data_set); + gboolean need_stonith = TRUE; + GListPtr gIter; +@@ -1706,8 +1705,6 @@ stage6(pe_working_set_t * data_set) + order_actions(last_stonith, done, pe_order_implies_then); + } + +- order_actions(done, all_stopped, pe_order_implies_then); +- + g_list_free(stonith_ops); + return TRUE; + } +diff --git a/pengine/native.c b/pengine/native.c +index 9ee5990..bd0b7d0 100644 +--- a/pengine/native.c ++++ b/pengine/native.c +@@ -1430,14 +1430,6 @@ native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) + return; + } + +- { +- action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); +- +- custom_action_order(rsc, stop_key(rsc), NULL, +- NULL, strdup(all_stopped->task), all_stopped, +- pe_order_implies_then | pe_order_runnable_left, data_set); +- } +- + if (g_hash_table_size(rsc->utilization) > 0 + && safe_str_neq(data_set->placement_strategy, "default")) { + GHashTableIter iter; +diff --git a/pengine/notif.c b/pengine/notif.c +index cdc382d..b333e5c 100644 +--- a/pengine/notif.c ++++ b/pengine/notif.c +@@ -411,13 +411,6 @@ create_notification_boundaries(resource_t * rsc, const char *action, action_t * + if (start && end) { + order_actions(n_data->pre_done, n_data->post, pe_order_optional); + } +- +- if (safe_str_eq(action, RSC_STOP)) { +- action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); +- +- order_actions(n_data->post_done, all_stopped, pe_order_optional); +- } +- + return n_data; + } + +diff --git a/pengine/utils.h b/pengine/utils.h +index 04ee36b..aee7708 100644 +--- a/pengine/utils.h ++++ b/pengine/utils.h +@@ -66,7 +66,6 @@ pe_action_t *create_pseudo_resource_op(resource_t * rsc, const char *task, bool + + # define STONITH_UP "stonith_up" + # define STONITH_DONE "stonith_complete" +-# define ALL_STOPPED "all_stopped" + # define LOAD_STOPPED "load_stopped" + + #endif +-- +1.8.3.1 + + +From 811e6291f18b11471d8b4a98b0079de8f6b00091 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 16 Nov 2018 21:08:31 -0600 +Subject: [PATCH 2/2] Low: scheduler: get rid of now-unused stonith_complete + pseudo-action + +also last reference to long-gone stonith_up pseudo-action +--- + lib/pengine/common.c | 4 ---- + pengine/allocate.c | 21 ++------------------- + pengine/utils.h | 2 -- + 3 files changed, 2 insertions(+), 25 deletions(-) + +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index d04e4ae..d03a6aa 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -276,10 +276,6 @@ text2task(const char *task) + return no_action; + } else if (safe_str_eq(task, "fail")) { + return no_action; +- } else if (safe_str_eq(task, "stonith_up")) { +- return no_action; +- } else if (safe_str_eq(task, "stonith_complete")) { +- return no_action; + } + crm_trace("Unsupported action: %s", task); + #endif +diff --git a/pengine/allocate.c b/pengine/allocate.c +index 81f3f51..0ee8bb0 100644 +--- a/pengine/allocate.c ++++ b/pengine/allocate.c +@@ -1459,11 +1459,10 @@ any_managed_resources(pe_working_set_t * data_set) + * \brief Create pseudo-op for guest node fence, and order relative to it + * + * \param[in] node Guest node to fence +- * \param[in] done STONITH_DONE operation + * \param[in] data_set Working set of CIB state + */ + static void +-fence_guest(pe_node_t *node, pe_action_t *done, pe_working_set_t *data_set) ++fence_guest(pe_node_t *node, pe_working_set_t *data_set) + { + resource_t *container = node->details->remote_rsc->container; + pe_action_t *stop = NULL; +@@ -1540,9 +1539,6 @@ fence_guest(pe_node_t *node, pe_action_t *done, pe_working_set_t *data_set) + + /* Order/imply other actions relative to pseudo-fence as with real fence */ + stonith_constraints(node, stonith_op, data_set); +- if(done) { +- order_actions(stonith_op, done, pe_order_implies_then); +- } + } + + /* +@@ -1552,11 +1548,9 @@ gboolean + stage6(pe_working_set_t * data_set) + { + action_t *dc_down = NULL; +- action_t *dc_fence = NULL; + action_t *stonith_op = NULL; + action_t *last_stonith = NULL; + gboolean integrity_lost = FALSE; +- action_t *done = get_pseudo_op(STONITH_DONE, data_set); + gboolean need_stonith = TRUE; + GListPtr gIter; + GListPtr stonith_ops = NULL; +@@ -1587,7 +1581,7 @@ stage6(pe_working_set_t * data_set) + */ + if (is_container_remote_node(node)) { + if (node->details->remote_requires_reset && need_stonith) { +- fence_guest(node, done, data_set); ++ fence_guest(node, data_set); + } + continue; + } +@@ -1604,7 +1598,6 @@ stage6(pe_working_set_t * data_set) + + if (node->details->is_dc) { + dc_down = stonith_op; +- dc_fence = stonith_op; + + } else if (is_set(data_set->flags, pe_flag_concurrent_fencing) == FALSE) { + if (last_stonith) { +@@ -1613,7 +1606,6 @@ stage6(pe_working_set_t * data_set) + last_stonith = stonith_op; + + } else { +- order_actions(stonith_op, done, pe_order_implies_then); + stonith_ops = g_list_append(stonith_ops, stonith_op); + } + +@@ -1696,15 +1688,6 @@ stage6(pe_working_set_t * data_set) + } + } + } +- +- +- if (dc_fence) { +- order_actions(dc_down, done, pe_order_implies_then); +- +- } else if (last_stonith) { +- order_actions(last_stonith, done, pe_order_implies_then); +- } +- + g_list_free(stonith_ops); + return TRUE; + } +diff --git a/pengine/utils.h b/pengine/utils.h +index aee7708..0e81cb3 100644 +--- a/pengine/utils.h ++++ b/pengine/utils.h +@@ -64,8 +64,6 @@ extern void calculate_utilization(GHashTable * current_utilization, + extern void process_utilization(resource_t * rsc, node_t ** prefer, pe_working_set_t * data_set); + pe_action_t *create_pseudo_resource_op(resource_t * rsc, const char *task, bool optional, bool runnable, pe_working_set_t *data_set); + +-# define STONITH_UP "stonith_up" +-# define STONITH_DONE "stonith_complete" + # define LOAD_STOPPED "load_stopped" + + #endif +-- +1.8.3.1 + diff --git a/SOURCES/014-cli-test.patch b/SOURCES/014-cli-test.patch new file mode 100644 index 0000000..960170a --- /dev/null +++ b/SOURCES/014-cli-test.patch @@ -0,0 +1,32 @@ +From b47749de4916c6090d0e1593139553a84be97db1 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 30 Nov 2018 16:39:53 -0600 +Subject: [PATCH] Test: cts-cli: update regression test for all_stopped removal + +--- + tools/regression.tools.exp | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/tools/regression.tools.exp b/tools/regression.tools.exp +index ce9f352..f5a2a42 100644 +--- a/tools/regression.tools.exp ++++ b/tools/regression.tools.exp +@@ -1805,7 +1805,6 @@ Executing cluster transition: + * Resource action: Fence stop on node1 + * Resource action: Fence monitor on node3 + * Resource action: Fence monitor on node2 +- * Pseudo action: all_stopped + * Resource action: Fence start on node2 + + Revised cluster status: +@@ -2074,7 +2073,6 @@ Transition Summary: + + Executing cluster transition: + * Resource action: dummy stop on node1 +- * Pseudo action: all_stopped + * Resource action: dummy start on node3 + + Revised cluster status: +-- +1.8.3.1 + diff --git a/SOURCES/015-remote-ordering.patch b/SOURCES/015-remote-ordering.patch new file mode 100644 index 0000000..f36418e --- /dev/null +++ b/SOURCES/015-remote-ordering.patch @@ -0,0 +1,51 @@ +From e4dae772074c964e10da59e2678f329c9c8a3bf1 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 13 Nov 2018 17:51:30 -0600 +Subject: [PATCH] Fix: scheduler: order resource moves after connection starts + +This addresses a regression in behavior since 1.1.18 (via 3a34fed). By allowing +stops to proceed before probes finished, that change allowed the stop of a +resource moving to a coming-up remote node to happen before the remote node +connection's start. If the remote connection start fails, the resource will +have to be started again where it was, leading to unnecessary downtime. + +Now, order the resource's stop after the remote connection's start. + +RHBZ#1648507 +--- + pengine/allocate.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/pengine/allocate.c b/pengine/allocate.c +index 0ee8bb0..126ba90 100644 +--- a/pengine/allocate.c ++++ b/pengine/allocate.c +@@ -2224,6 +2224,25 @@ apply_remote_node_ordering(pe_working_set_t *data_set) + continue; + } + ++ /* Another special case: if a resource is moving to a Pacemaker Remote ++ * node, order the stop on the original node after any start of the ++ * remote connection. This ensures that if the connection fails to ++ * start, we leave the resource running on the original node. ++ */ ++ if (safe_str_eq(action->task, RSC_START)) { ++ for (GList *item = action->rsc->actions; item != NULL; ++ item = item->next) { ++ pe_action_t *rsc_action = item->data; ++ ++ if ((rsc_action->node->details != action->node->details) ++ && safe_str_eq(rsc_action->task, RSC_STOP)) { ++ custom_action_order(remote, start_key(remote), NULL, ++ action->rsc, NULL, rsc_action, ++ pe_order_optional, data_set); ++ } ++ } ++ } ++ + /* The action occurs across a remote connection, so create + * ordering constraints that guarantee the action occurs while the node + * is active (after start, before stop ... things like that). +-- +1.8.3.1 + diff --git a/SOURCES/017-cleanup-pending-op.patch b/SOURCES/017-cleanup-pending-op.patch new file mode 100644 index 0000000..94cef2c --- /dev/null +++ b/SOURCES/017-cleanup-pending-op.patch @@ -0,0 +1,150 @@ +From a48d73f23c6800ae51522c1a91d0f0e1eb967078 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 8 Jan 2019 15:31:14 -0600 +Subject: [PATCH] Fix: controller: directly acknowledge unrecordable operation + results + +Regression introduced in 2.0.1-rc1 by 0363985dd + +Before that commit, if an operation result arrived when there was no resource +information available, a warning would be logged and the operation would be +directly acknowledged. This could occur, for example, if resource history were +cleaned while an operation was pending on that resource. + +After that commit, in that situation, an assertion and error would be logged, +and no acknowledgement would be sent, leading to a transition timeout. + +Restore the direct ack. Also improve related log messages. +--- + crmd/lrm.c | 80 ++++++++++++++++++++++++++++++++++++++++++-------------------- + 1 file changed, 55 insertions(+), 25 deletions(-) + +diff --git a/crmd/lrm.c b/crmd/lrm.c +index 0d64f59..51cb50b 100644 +--- a/crmd/lrm.c ++++ b/crmd/lrm.c +@@ -2497,6 +2497,7 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, + int update_id = 0; + gboolean remove = FALSE; + gboolean removed = FALSE; ++ bool need_direct_ack = FALSE; + lrmd_rsc_info_t *rsc = NULL; + const char *node_name = NULL; + +@@ -2527,7 +2528,6 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, + op_key, op->rsc_id); + } + } +- CRM_LOG_ASSERT(rsc != NULL); // If it's still NULL, there's a bug somewhere + + // Get node name if available (from executor state or action XML) + if (lrm_state) { +@@ -2559,51 +2559,81 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, + } + + if (op->op_status != PCMK_LRM_OP_CANCELLED) { ++ /* We might not record the result, so directly acknowledge it to the ++ * originator instead, so it doesn't time out waiting for the result ++ * (especially important if part of a transition). ++ */ ++ need_direct_ack = TRUE; ++ + if (controld_action_is_recordable(op->op_type)) { + if (node_name && rsc) { ++ // We should record the result, and happily, we can + update_id = do_update_resource(node_name, rsc, op); ++ need_direct_ack = FALSE; ++ ++ } else if (op->rsc_deleted) { ++ /* We shouldn't record the result (likely the resource was ++ * refreshed, cleaned, or removed while this operation was ++ * in flight). ++ */ ++ crm_notice("Not recording %s result in CIB because " ++ "resource information was removed since it was initiated", ++ op_key); + } else { +- // @TODO Should we direct ack? +- crm_err("Unable to record %s result in CIB: %s", +- op_key, ++ /* This shouldn't be possible; the executor didn't consider the ++ * resource deleted, but we couldn't find resource or node ++ * information. ++ */ ++ crm_err("Unable to record %s result in CIB: %s", op_key, + (node_name? "No resource information" : "No node name")); + } +- } else { +- send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); + } ++ + } else if (op->interval == 0) { +- /* This will occur when "crm resource cleanup" is called while actions are in-flight */ +- crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id); +- send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); ++ /* A non-recurring operation was cancelled. Most likely, the ++ * never-initiated action was removed from the executor's pending ++ * operations list upon resource removal. ++ */ ++ need_direct_ack = TRUE; + + } else if (pending == NULL) { +- /* We don't need to do anything for cancelled ops +- * that are not in our pending op list. There are no +- * transition actions waiting on these operations. */ ++ /* This recurring operation was cancelled, but was not pending. No ++ * transition actions are waiting on it, nothing needs to be done. ++ */ + + } else if (op->user_data == NULL) { +- /* At this point we have a pending entry, but no transition +- * key present in the user_data field. report this */ +- crm_err("Op %s (call=%d): No user data", op_key, op->call_id); ++ /* This recurring operation was cancelled and pending, but we don't ++ * have a transition key. This should never happen. ++ */ ++ crm_err("Recurring operation %s was cancelled without transition information", ++ op_key); + + } else if (pending->remove) { +- /* The tengine canceled this op, we have been waiting for the cancel to finish. */ ++ /* This recurring operation was cancelled (by us) and pending, and we ++ * have been waiting for it to finish. ++ */ + if (lrm_state) { + erase_lrm_history_by_op(lrm_state, op); + } + + } else if (op->rsc_deleted) { +- /* The tengine initiated this op, but it was cancelled outside of the +- * tengine's control during a resource cleanup/re-probe request. The tengine +- * must be alerted that this operation completed, otherwise the tengine +- * will continue waiting for this update to occur until it is timed out. +- * We don't want this update going to the cib though, so use a direct ack. */ +- crm_trace("Op %s (call=%d): cancelled due to rsc deletion", op_key, op->call_id); +- send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); ++ /* This recurring operation was cancelled (but not by us, and the ++ * executor does not have resource information, likely due to resource ++ * cleanup, refresh, or removal) and pending. ++ */ ++ crm_debug("Recurring op %s was cancelled due to resource deletion", ++ op_key); ++ need_direct_ack = TRUE; + + } else { +- /* Before a stop is called, no need to direct ack */ +- crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id); ++ /* This recurring operation was cancelled (but not by us, likely by the ++ * executor before stopping the resource) and pending. We don't need to ++ * do anything special. ++ */ ++ } ++ ++ if (need_direct_ack) { ++ send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); + } + + if(remove == FALSE) { +-- +1.8.3.1 + diff --git a/SOURCES/2.0-cleanup-behavior.patch b/SOURCES/2.0-cleanup-behavior.patch index bd547d7..c0e13c9 100644 --- a/SOURCES/2.0-cleanup-behavior.patch +++ b/SOURCES/2.0-cleanup-behavior.patch @@ -4,11 +4,11 @@ Date: Tue, 7 Aug 2018 11:56:06 -0500 Subject: [PATCH] Feature: tools: enable 2.0 behavior of crm_resource clean-up --- - tools/crm_resource.c | 15 +++++---------- - 1 file changed, 5 insertions(+), 10 deletions(-) + tools/crm_resource.c | 49 +++++---------- + 1 file changed, 20 insertions(+), 29 deletions(-) diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index 5b20873..479c69d 100644 +index 128d075..bbdba25 100644 --- a/tools/crm_resource.c +++ b/tools/crm_resource.c @@ -212,8 +212,6 @@ static struct crm_option long_options[] = { @@ -71,6 +71,47 @@ index 5b20873..479c69d 100644 find_flags = pe_find_renamed|pe_find_anon; break; +@@ -1120,21 +1115,25 @@ main(int argc, char **argv) + start_mainloop(); + } + +- } else if ((rsc_cmd == 'R') && rsc) { +- if (do_force == FALSE) { +- rsc = uber_parent(rsc); +- } +- crmd_replies_needed = 0; +- +- crm_debug("Re-checking the state of %s (%s requested) on %s", +- rsc->id, rsc_id, (host_uname? host_uname: "all nodes")); +- rc = cli_resource_delete(crmd_channel, host_uname, rsc, +- NULL, 0, FALSE, &data_set); +- +- if ((rc == pcmk_ok) && !BE_QUIET) { +- // Show any reasons why resource might stay stopped +- cli_resource_check(cib_conn, rsc); +- } ++ } else if ((rsc_cmd == 'R') && rsc) { ++ if (do_force == FALSE) { ++ rsc = uber_parent(rsc); ++ } ++ crmd_replies_needed = 0; ++ ++ crm_debug("Re-checking the state of %s (%s requested) on %s", ++ rsc->id, rsc_id, (host_uname? host_uname: "all nodes")); ++ rc = cli_resource_delete(crmd_channel, host_uname, rsc, ++ NULL, 0, FALSE, &data_set); ++ ++ if ((rc == pcmk_ok) && !BE_QUIET) { ++ // Show any reasons why resource might stay stopped ++ cli_resource_check(cib_conn, rsc); ++ } ++ ++ if (rc == pcmk_ok) { ++ start_mainloop(); ++ } + + } else if (rsc_cmd == 'R') { + #if HAVE_ATOMIC_ATTRD -- 1.8.3.1 diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec index b4ed85d..fa6d2f2 100644 --- a/SPECS/pacemaker.spec +++ b/SPECS/pacemaker.spec @@ -160,7 +160,7 @@ Name: pacemaker Summary: Scalable High-Availability cluster resource manager Version: %{pcmkversion} -Release: %{pcmk_release}%{?dist}.2 +Release: %{pcmk_release}%{?dist}.4 %if %{defined _unitdir} License: GPLv2+ and LGPLv2+ %else @@ -187,6 +187,12 @@ Patch8: 008-bundle-ordering.patch Patch9: 009-sbd-guest.patch Patch10: 010-route-notify.patch Patch11: 011-notifs.patch +Patch12: 012-stonith-ordering.patch +Patch13: 013-pseudo-removal.patch +Patch14: 014-cli-test.patch +Patch15: 015-remote-ordering.patch +Patch16: 016-regression-tests.patch.gz +Patch17: 017-cleanup-pending-op.patch # patches that aren't from upstream Patch100: lrmd-protocol-version.patch @@ -871,6 +877,16 @@ exit 0 %attr(0644,root,root) %{_datadir}/pacemaker/nagios/plugins-metadata/* %changelog +* Mon Jan 14 2019 Ken Gaillot - 1.1.19-8.4 +- Fix regression in resource clean-up/refresh when an operation is pending +- Resolves: rhbz#1665816 + +* Tue Jan 8 2019 Ken Gaillot - 1.1.19-8.3 +- Fix regression in crm_resource --refresh +- Order resource moves after remote connection starts +- Resolves: rhbz#1664242 +- Resolves: rhbz#1664243 + * Thu Nov 29 2018 Ken Gaillot - 1.1.19-8.2 - Don't schedule clone notifications behind stopped remote connection - Resolves: rhbz#1654602