From d14fb0110208f270491c696bea0072300db2a947 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 29 Mar 2019 12:37:13 -0500 Subject: [PATCH 1/6] Refactor: scheduler: functionize handling of restart ordering for readability --- pengine/native.c | 97 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 60 insertions(+), 37 deletions(-) diff --git a/pengine/native.c b/pengine/native.c index 2f8c011..653a93a 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -1942,6 +1942,65 @@ native_action_flags(action_t * action, node_t * node) return action->flags; } +static inline bool +is_primitive_action(pe_action_t *action) +{ + return action && action->rsc && (action->rsc->variant == pe_native); +} + +/*! + * \internal + * \brief Set action bits appropriately when pe_restart_order is used + * + * \param[in] first 'First' action in an ordering with pe_restart_order + * \param[in] then 'Then' action in an ordering with pe_restart_order + * \param[in] filter What ordering flags to care about + * + * \note pe_restart_order is set for "stop resource before starting it" and + * "stop later group member before stopping earlier group member" + */ +static void +handle_restart_ordering(pe_action_t *first, pe_action_t *then, + enum pe_action_flags filter) +{ + const char *reason = NULL; + + CRM_ASSERT(is_primitive_action(first)); + CRM_ASSERT(is_primitive_action(then)); + + if ((filter & pe_action_runnable) + && (then->flags & pe_action_runnable) == 0 + && (then->rsc->flags & pe_rsc_managed)) { + reason = "shutdown"; + } + + if ((filter & pe_action_optional) && (then->flags & pe_action_optional) == 0) { + reason = "recover"; + } + + if (reason && is_set(first->flags, pe_action_optional)) { + if (is_set(first->flags, pe_action_runnable) + || is_not_set(then->flags, pe_action_optional)) { + pe_rsc_trace(first->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); + pe_action_implies(first, then, pe_action_optional); + } + } + + if (reason && is_not_set(first->flags, pe_action_optional) + && is_not_set(first->flags, pe_action_runnable)) { + pe_rsc_trace(then->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); + pe_action_implies(then, first, pe_action_runnable); + } + + if (reason && + is_not_set(first->flags, pe_action_optional) && + is_set(first->flags, pe_action_migrate_runnable) && + is_not_set(then->flags, pe_action_migrate_runnable)) { + + pe_action_implies(first, then, pe_action_migrate_runnable); + } +} + enum pe_graph_flags native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type) @@ -2069,43 +2128,7 @@ native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_ } if (is_set(type, pe_order_restart)) { - const char *reason = NULL; - - CRM_ASSERT(first->rsc && first->rsc->variant == pe_native); - CRM_ASSERT(then->rsc && then->rsc->variant == pe_native); - - if ((filter & pe_action_runnable) - && (then->flags & pe_action_runnable) == 0 - && (then->rsc->flags & pe_rsc_managed)) { - reason = "shutdown"; - } - - if ((filter & pe_action_optional) && (then->flags & pe_action_optional) == 0) { - reason = "recover"; - } - - if (reason && is_set(first->flags, pe_action_optional)) { - if (is_set(first->flags, pe_action_runnable) - || is_not_set(then->flags, pe_action_optional)) { - pe_rsc_trace(first->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); - pe_action_implies(first, then, pe_action_optional); - } - } - - if (reason && is_not_set(first->flags, pe_action_optional) - && is_not_set(first->flags, pe_action_runnable)) { - pe_rsc_trace(then->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); - pe_action_implies(then, first, pe_action_runnable); - } - - if (reason && - is_not_set(first->flags, pe_action_optional) && - is_set(first->flags, pe_action_migrate_runnable) && - is_not_set(then->flags, pe_action_migrate_runnable)) { - - pe_action_implies(first, then, pe_action_migrate_runnable); - } - + handle_restart_ordering(first, then, filter); } if (then_flags != then->flags) { -- 1.8.3.1 From dda6a0eb480c8a3079b5c15176e27ed62f98a6ac Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 29 Mar 2019 17:46:44 -0500 Subject: [PATCH 2/6] Refactor: scheduler: use is_set()/is_not_set() in restart ordering instead of direct bit comparisons, for readability and consistency --- pengine/native.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pengine/native.c b/pengine/native.c index 653a93a..f43d3cf 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -1968,13 +1968,14 @@ handle_restart_ordering(pe_action_t *first, pe_action_t *then, CRM_ASSERT(is_primitive_action(first)); CRM_ASSERT(is_primitive_action(then)); - if ((filter & pe_action_runnable) - && (then->flags & pe_action_runnable) == 0 - && (then->rsc->flags & pe_rsc_managed)) { + if (is_set(filter, pe_action_runnable) + && is_not_set(then->flags, pe_action_runnable) + && is_set(then->rsc->flags, pe_rsc_managed)) { reason = "shutdown"; } - if ((filter & pe_action_optional) && (then->flags & pe_action_optional) == 0) { + if (is_set(filter, pe_action_optional) + && is_not_set(then->flags, pe_action_optional)) { reason = "recover"; } -- 1.8.3.1 From 69951375e4af9d1d1152980afddc94bb5881af5a Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 29 Mar 2019 17:49:10 -0500 Subject: [PATCH 3/6] Log: scheduler: improve restart ordering trace logs --- pengine/native.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/pengine/native.c b/pengine/native.c index f43d3cf..dfcc910 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -1968,33 +1968,41 @@ handle_restart_ordering(pe_action_t *first, pe_action_t *then, CRM_ASSERT(is_primitive_action(first)); CRM_ASSERT(is_primitive_action(then)); + // We need to update the action in two cases: + + // ... if 'then' is required + if (is_set(filter, pe_action_optional) + && is_not_set(then->flags, pe_action_optional)) { + reason = "restart"; + } + + // ... if 'then' is managed but unrunnable if (is_set(filter, pe_action_runnable) && is_not_set(then->flags, pe_action_runnable) && is_set(then->rsc->flags, pe_rsc_managed)) { - reason = "shutdown"; + reason = "stop"; } - if (is_set(filter, pe_action_optional) - && is_not_set(then->flags, pe_action_optional)) { - reason = "recover"; + if (reason == NULL) { + return; } - if (reason && is_set(first->flags, pe_action_optional)) { + pe_rsc_trace(first->rsc, "Handling %s -> %s for %s", + first->uuid, then->uuid, reason); + + if (is_set(first->flags, pe_action_optional)) { if (is_set(first->flags, pe_action_runnable) || is_not_set(then->flags, pe_action_optional)) { - pe_rsc_trace(first->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); pe_action_implies(first, then, pe_action_optional); } } - if (reason && is_not_set(first->flags, pe_action_optional) + if (is_not_set(first->flags, pe_action_optional) && is_not_set(first->flags, pe_action_runnable)) { - pe_rsc_trace(then->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); pe_action_implies(then, first, pe_action_runnable); } - if (reason && - is_not_set(first->flags, pe_action_optional) && + if (is_not_set(first->flags, pe_action_optional) && is_set(first->flags, pe_action_migrate_runnable) && is_not_set(then->flags, pe_action_migrate_runnable)) { -- 1.8.3.1 From 32da90e58a89d7f9f3cd6d1e3f961c24b646d734 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 29 Mar 2019 17:50:07 -0500 Subject: [PATCH 4/6] Refactor: scheduler: simplify handling of restart ordering Don't condition pe_action_implies() on the desired state not being already present, because pe_action_implies() handles that. Don't condition clearing first's pe_action_migrate_runnable on first being required, because if it's optional it doesn't matter, and if (now or in the future) optional can be changed to required later, it will actually be important. --- pengine/native.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/pengine/native.c b/pengine/native.c index dfcc910..8912aa1 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -1980,6 +1980,7 @@ handle_restart_ordering(pe_action_t *first, pe_action_t *then, if (is_set(filter, pe_action_runnable) && is_not_set(then->flags, pe_action_runnable) && is_set(then->rsc->flags, pe_rsc_managed)) { + // If a resource should restart but can't start, we still want to stop reason = "stop"; } @@ -1990,24 +1991,26 @@ handle_restart_ordering(pe_action_t *first, pe_action_t *then, pe_rsc_trace(first->rsc, "Handling %s -> %s for %s", first->uuid, then->uuid, reason); - if (is_set(first->flags, pe_action_optional)) { - if (is_set(first->flags, pe_action_runnable) - || is_not_set(then->flags, pe_action_optional)) { - pe_action_implies(first, then, pe_action_optional); - } + // Make 'first' required if it is runnable + if (is_set(first->flags, pe_action_runnable)) { + pe_action_implies(first, then, pe_action_optional); } - if (is_not_set(first->flags, pe_action_optional) - && is_not_set(first->flags, pe_action_runnable)) { - pe_action_implies(then, first, pe_action_runnable); + // Make 'first' required if 'then' is required + if (is_not_set(then->flags, pe_action_optional)) { + pe_action_implies(first, then, pe_action_optional); } - if (is_not_set(first->flags, pe_action_optional) && - is_set(first->flags, pe_action_migrate_runnable) && - is_not_set(then->flags, pe_action_migrate_runnable)) { - + // Make 'first' unmigratable if 'then' is unmigratable + if (is_not_set(then->flags, pe_action_migrate_runnable)) { pe_action_implies(first, then, pe_action_migrate_runnable); } + + // Make 'then' unrunnable if 'first' is required but unrunnable + if (is_not_set(first->flags, pe_action_optional) + && is_not_set(first->flags, pe_action_runnable)) { + pe_action_implies(then, first, pe_action_runnable); + } } enum pe_graph_flags -- 1.8.3.1 From 8cfe743d4373fad6b4e50ee64894a16f7f24afa1 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 29 Mar 2019 19:11:25 -0500 Subject: [PATCH 5/6] Fix: scheduler: one group stop shouldn't make another required 1.1.7's 8d2f237d reused pe_order_restart ("stop resource before stopping it") for "stop later group member before stopping earlier group member". pe_order_restart includes a check for an unrunnable 'then', because in a restart, even if the start is unrunnable, we still want to perform the stop. However this check does not make sense for group stop ordering, and as of 1.1.10, this caused a regression where a group member could be unnecessarily stopped. Example scenario: if a resource is ordered after a group member, and the resource failed with on-fail=block, that would make the group member's (optional) stop blocked as well, and that blocked stop would unnecessarily make stops of later group members required. This commit fixes the issue by only applying the check when the 'then' action is a start. (RHBZ#1609453) --- pengine/native.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pengine/native.c b/pengine/native.c index 8912aa1..747cb10 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -1976,11 +1976,13 @@ handle_restart_ordering(pe_action_t *first, pe_action_t *then, reason = "restart"; } - // ... if 'then' is managed but unrunnable + /* ... if 'then' is unrunnable start of managed resource (if a resource + * should restart but can't start, we still want to stop) + */ if (is_set(filter, pe_action_runnable) && is_not_set(then->flags, pe_action_runnable) - && is_set(then->rsc->flags, pe_rsc_managed)) { - // If a resource should restart but can't start, we still want to stop + && is_set(then->rsc->flags, pe_rsc_managed) + && safe_str_eq(then->task, RSC_START)) { reason = "stop"; } -- 1.8.3.1 From b8e388eff56143632ef848d52eddad9560aad2cf Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 29 Mar 2019 19:44:39 -0500 Subject: [PATCH 6/6] Test: scheduler: one group stop shouldn't make another required --- pengine/regression.sh | 1 + pengine/test10/group-stop-ordering.dot | 2 + pengine/test10/group-stop-ordering.exp | 1 + pengine/test10/group-stop-ordering.scores | 17 ++++ pengine/test10/group-stop-ordering.summary | 25 ++++++ pengine/test10/group-stop-ordering.xml | 132 +++++++++++++++++++++++++++++ 6 files changed, 178 insertions(+) create mode 100644 pengine/test10/group-stop-ordering.dot create mode 100644 pengine/test10/group-stop-ordering.exp create mode 100644 pengine/test10/group-stop-ordering.scores create mode 100644 pengine/test10/group-stop-ordering.summary create mode 100644 pengine/test10/group-stop-ordering.xml diff --git a/pengine/regression.sh b/pengine/regression.sh index e25990d..504de46 100755 --- a/pengine/regression.sh +++ b/pengine/regression.sh @@ -68,6 +68,7 @@ do_test group-fail "Ensure stop order is preserved for partially active groups" do_test group-unmanaged "No need to restart r115 because r114 is unmanaged" do_test group-unmanaged-stopped "Make sure r115 is stopped when r114 fails" do_test group-dependents "Account for the location preferences of things colocated with a group" +do_test group-stop-ordering "Ensure blocked group member stop does not force other member stops" echo "" do_test rsc_dep1 "Must not " diff --git a/pengine/test10/group-stop-ordering.dot b/pengine/test10/group-stop-ordering.dot new file mode 100644 index 0000000..4b30191 --- /dev/null +++ b/pengine/test10/group-stop-ordering.dot @@ -0,0 +1,2 @@ +digraph "g" { +} diff --git a/pengine/test10/group-stop-ordering.exp b/pengine/test10/group-stop-ordering.exp new file mode 100644 index 0000000..56e315f --- /dev/null +++ b/pengine/test10/group-stop-ordering.exp @@ -0,0 +1 @@ + diff --git a/pengine/test10/group-stop-ordering.scores b/pengine/test10/group-stop-ordering.scores new file mode 100644 index 0000000..5f144d2 --- /dev/null +++ b/pengine/test10/group-stop-ordering.scores @@ -0,0 +1,17 @@ +Allocation scores: +group_color: grp allocation score on fastvm-rhel-7-5-73: 0 +group_color: grp allocation score on fastvm-rhel-7-5-74: 0 +group_color: inside_resource_2 allocation score on fastvm-rhel-7-5-73: 0 +group_color: inside_resource_2 allocation score on fastvm-rhel-7-5-74: 0 +group_color: inside_resource_3 allocation score on fastvm-rhel-7-5-73: 0 +group_color: inside_resource_3 allocation score on fastvm-rhel-7-5-74: 0 +native_color: fence-fastvm-rhel-7-5-73 allocation score on fastvm-rhel-7-5-73: -INFINITY +native_color: fence-fastvm-rhel-7-5-73 allocation score on fastvm-rhel-7-5-74: 0 +native_color: fence-fastvm-rhel-7-5-74 allocation score on fastvm-rhel-7-5-73: 0 +native_color: fence-fastvm-rhel-7-5-74 allocation score on fastvm-rhel-7-5-74: -INFINITY +native_color: inside_resource_2 allocation score on fastvm-rhel-7-5-73: 0 +native_color: inside_resource_2 allocation score on fastvm-rhel-7-5-74: 0 +native_color: inside_resource_3 allocation score on fastvm-rhel-7-5-73: -INFINITY +native_color: inside_resource_3 allocation score on fastvm-rhel-7-5-74: 0 +native_color: outside_resource allocation score on fastvm-rhel-7-5-73: INFINITY +native_color: outside_resource allocation score on fastvm-rhel-7-5-74: 0 diff --git a/pengine/test10/group-stop-ordering.summary b/pengine/test10/group-stop-ordering.summary new file mode 100644 index 0000000..0ec8eb6 --- /dev/null +++ b/pengine/test10/group-stop-ordering.summary @@ -0,0 +1,25 @@ + +Current cluster status: +Online: [ fastvm-rhel-7-5-73 fastvm-rhel-7-5-74 ] + + fence-fastvm-rhel-7-5-73 (stonith:fence_xvm): Started fastvm-rhel-7-5-74 + fence-fastvm-rhel-7-5-74 (stonith:fence_xvm): Started fastvm-rhel-7-5-73 + outside_resource (ocf::pacemaker:Dummy): FAILED fastvm-rhel-7-5-73 (blocked) + Resource Group: grp + inside_resource_2 (ocf::pacemaker:Dummy): Started fastvm-rhel-7-5-74 + inside_resource_3 (ocf::pacemaker:Dummy): Started fastvm-rhel-7-5-74 + +Transition Summary: + +Executing cluster transition: + +Revised cluster status: +Online: [ fastvm-rhel-7-5-73 fastvm-rhel-7-5-74 ] + + fence-fastvm-rhel-7-5-73 (stonith:fence_xvm): Started fastvm-rhel-7-5-74 + fence-fastvm-rhel-7-5-74 (stonith:fence_xvm): Started fastvm-rhel-7-5-73 + outside_resource (ocf::pacemaker:Dummy): FAILED fastvm-rhel-7-5-73 (blocked) + Resource Group: grp + inside_resource_2 (ocf::pacemaker:Dummy): Started fastvm-rhel-7-5-74 + inside_resource_3 (ocf::pacemaker:Dummy): Started fastvm-rhel-7-5-74 + diff --git a/pengine/test10/group-stop-ordering.xml b/pengine/test10/group-stop-ordering.xml new file mode 100644 index 0000000..8439c1f --- /dev/null +++ b/pengine/test10/group-stop-ordering.xml @@ -0,0 +1,132 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- 1.8.3.1