From b0347f7b8e609420a7055d5fe537cc40ac0d1bb2 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 16 Jul 2021 11:08:05 -0500 Subject: [PATCH 1/3] Fix: scheduler: don't schedule probes of unmanaged resources on pending nodes Previously, custom_action() would set an action's optional or runnable flag in the same, exclusive if-else sequence. This means that if an action should be optional *and* runnable, only one would be set. In particular, this meant that if a resource is unmanaged *and* its allocated node is pending, any probe would be set to optional, but not unrunnable, and the controller could wrongly attempt the probe before the join completed. Now, optional is checked separately. --- lib/pengine/utils.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index 5ef742e..965824b 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -541,6 +541,20 @@ custom_action(pe_resource_t * rsc, char *key, const char *task, FALSE, data_set); } + // Make the action optional if its resource is unmanaged + if (!pcmk_is_set(action->flags, pe_action_pseudo) + && (action->node != NULL) + && !pcmk_is_set(action->rsc->flags, pe_rsc_managed) + && (g_hash_table_lookup(action->meta, + XML_LRM_ATTR_INTERVAL_MS) == NULL)) { + pe_rsc_debug(rsc, "%s on %s is optional (%s is unmanaged)", + action->uuid, action->node->details->uname, + action->rsc->id); + pe__set_action_flags(action, pe_action_optional); + // We shouldn't clear runnable here because ... something + } + + // Make the action runnable or unrunnable as appropriate if (pcmk_is_set(action->flags, pe_action_pseudo)) { /* leave untouched */ @@ -549,14 +563,6 @@ custom_action(pe_resource_t * rsc, char *key, const char *task, action->uuid); pe__clear_action_flags(action, pe_action_runnable); - } else if (!pcmk_is_set(rsc->flags, pe_rsc_managed) - && g_hash_table_lookup(action->meta, - XML_LRM_ATTR_INTERVAL_MS) == NULL) { - pe_rsc_debug(rsc, "%s on %s is optional (%s is unmanaged)", - action->uuid, action->node->details->uname, rsc->id); - pe__set_action_flags(action, pe_action_optional); - //pe__clear_action_flags(action, pe_action_runnable); - } else if (!pcmk_is_set(action->flags, pe_action_dc) && !(action->node->details->online) && (!pe__is_guest_node(action->node) -- 1.8.3.1 From 520303b90eb707f5b7a9afa9b106e4a38b90f0f9 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 14 Jul 2021 17:18:44 -0500 Subject: [PATCH 2/3] Test: scheduler: update existing tests for probe scheduling change This is an improvement. Looking at bundle-probe-order-2 for example, the bundle's first instance has this status to start: * Replica[0] * galera (ocf::heartbeat:galera): Stopped (unmanaged) * galera-bundle-docker-0 (ocf::heartbeat:docker): Started centos2 (unmanaged) * galera-bundle-0 (ocf::pacemaker:remote): Started centos2 (unmanaged) After the changes, we now schedule recurring monitors for galera-bundle-docker-0 and galera-bundle-0 on centos2, and a probe of galera:0 on galera-bundle-0, all of which are possible. --- cts/scheduler/dot/bundle-probe-order-2.dot | 3 ++ cts/scheduler/dot/bundle-probe-order-3.dot | 1 + cts/scheduler/exp/bundle-probe-order-2.exp | 33 ++++++++++++++++++++-- cts/scheduler/exp/bundle-probe-order-3.exp | 21 ++++++++++---- cts/scheduler/summary/bundle-probe-order-2.summary | 3 ++ cts/scheduler/summary/bundle-probe-order-3.summary | 1 + 6 files changed, 53 insertions(+), 9 deletions(-) diff --git a/cts/scheduler/dot/bundle-probe-order-2.dot b/cts/scheduler/dot/bundle-probe-order-2.dot index 0cce3fd..7706195 100644 --- a/cts/scheduler/dot/bundle-probe-order-2.dot +++ b/cts/scheduler/dot/bundle-probe-order-2.dot @@ -1,6 +1,9 @@ digraph "g" { +"galera-bundle-0_monitor_30000 centos2" [ style=bold color="green" fontcolor="black"] +"galera-bundle-docker-0_monitor_60000 centos2" [ style=bold color="green" fontcolor="black"] "galera-bundle-docker-1_monitor_0 centos2" [ style=bold color="green" fontcolor="black"] "galera-bundle-docker-2_monitor_0 centos1" [ style=bold color="green" fontcolor="black"] "galera-bundle-docker-2_monitor_0 centos2" [ style=bold color="green" fontcolor="black"] "galera-bundle-docker-2_monitor_0 centos3" [ style=bold color="green" fontcolor="black"] +"galera:0_monitor_0 galera-bundle-0" [ style=bold color="green" fontcolor="black"] } diff --git a/cts/scheduler/dot/bundle-probe-order-3.dot b/cts/scheduler/dot/bundle-probe-order-3.dot index a4b109f..53a384b 100644 --- a/cts/scheduler/dot/bundle-probe-order-3.dot +++ b/cts/scheduler/dot/bundle-probe-order-3.dot @@ -2,6 +2,7 @@ "galera-bundle-0_monitor_0 centos1" [ style=bold color="green" fontcolor="black"] "galera-bundle-0_monitor_0 centos2" [ style=bold color="green" fontcolor="black"] "galera-bundle-0_monitor_0 centos3" [ style=bold color="green" fontcolor="black"] +"galera-bundle-docker-0_monitor_60000 centos2" [ style=bold color="green" fontcolor="black"] "galera-bundle-docker-1_monitor_0 centos2" [ style=bold color="green" fontcolor="black"] "galera-bundle-docker-2_monitor_0 centos1" [ style=bold color="green" fontcolor="black"] "galera-bundle-docker-2_monitor_0 centos2" [ style=bold color="green" fontcolor="black"] diff --git a/cts/scheduler/exp/bundle-probe-order-2.exp b/cts/scheduler/exp/bundle-probe-order-2.exp index d6174e7..5b28050 100644 --- a/cts/scheduler/exp/bundle-probe-order-2.exp +++ b/cts/scheduler/exp/bundle-probe-order-2.exp @@ -1,6 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -8,7 +35,7 @@ - + @@ -17,7 +44,7 @@ - + @@ -26,7 +53,7 @@ - + diff --git a/cts/scheduler/exp/bundle-probe-order-3.exp b/cts/scheduler/exp/bundle-probe-order-3.exp index e1f60e7..69140a4 100644 --- a/cts/scheduler/exp/bundle-probe-order-3.exp +++ b/cts/scheduler/exp/bundle-probe-order-3.exp @@ -1,6 +1,15 @@ + + + + + + + + + @@ -8,7 +17,7 @@ - + @@ -17,7 +26,7 @@ - + @@ -26,7 +35,7 @@ - + @@ -35,7 +44,7 @@ - + @@ -44,7 +53,7 @@ - + @@ -53,7 +62,7 @@ - + diff --git a/cts/scheduler/summary/bundle-probe-order-2.summary b/cts/scheduler/summary/bundle-probe-order-2.summary index 681d607..024c472 100644 --- a/cts/scheduler/summary/bundle-probe-order-2.summary +++ b/cts/scheduler/summary/bundle-probe-order-2.summary @@ -13,6 +13,9 @@ Current cluster status: Transition Summary: Executing Cluster Transition: + * Resource action: galera:0 monitor on galera-bundle-0 + * Resource action: galera-bundle-docker-0 monitor=60000 on centos2 + * Resource action: galera-bundle-0 monitor=30000 on centos2 * Resource action: galera-bundle-docker-1 monitor on centos2 * Resource action: galera-bundle-docker-2 monitor on centos3 * Resource action: galera-bundle-docker-2 monitor on centos2 diff --git a/cts/scheduler/summary/bundle-probe-order-3.summary b/cts/scheduler/summary/bundle-probe-order-3.summary index f089618..331bd87 100644 --- a/cts/scheduler/summary/bundle-probe-order-3.summary +++ b/cts/scheduler/summary/bundle-probe-order-3.summary @@ -12,6 +12,7 @@ Current cluster status: Transition Summary: Executing Cluster Transition: + * Resource action: galera-bundle-docker-0 monitor=60000 on centos2 * Resource action: galera-bundle-0 monitor on centos3 * Resource action: galera-bundle-0 monitor on centos2 * Resource action: galera-bundle-0 monitor on centos1 -- 1.8.3.1 From cb9c294a7ef22916866e0e42e51e88c2b1a61c2e Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 14 Jul 2021 17:23:11 -0500 Subject: [PATCH 3/3] Test: scheduler: add test for probe of unmanaged resource on pending node No probes should be scheduled in this case --- cts/cts-scheduler.in | 1 + cts/scheduler/dot/probe-pending-node.dot | 2 + cts/scheduler/exp/probe-pending-node.exp | 1 + cts/scheduler/scores/probe-pending-node.scores | 61 ++++++ cts/scheduler/summary/probe-pending-node.summary | 55 +++++ cts/scheduler/xml/probe-pending-node.xml | 247 +++++++++++++++++++++++ 6 files changed, 367 insertions(+) create mode 100644 cts/scheduler/dot/probe-pending-node.dot create mode 100644 cts/scheduler/exp/probe-pending-node.exp create mode 100644 cts/scheduler/scores/probe-pending-node.scores create mode 100644 cts/scheduler/summary/probe-pending-node.summary create mode 100644 cts/scheduler/xml/probe-pending-node.xml diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in index fc9790b..7ba2415 100644 --- a/cts/cts-scheduler.in +++ b/cts/cts-scheduler.in @@ -110,6 +110,7 @@ TESTS = [ [ "probe-2", "Correctly re-probe cloned groups" ], [ "probe-3", "Probe (pending node)" ], [ "probe-4", "Probe (pending node + stopped resource)" ], + [ "probe-pending-node", "Probe (pending node + unmanaged resource)" ], [ "standby", "Standby" ], [ "comments", "Comments" ], ], diff --git a/cts/scheduler/dot/probe-pending-node.dot b/cts/scheduler/dot/probe-pending-node.dot new file mode 100644 index 0000000..d8f1c9f --- /dev/null +++ b/cts/scheduler/dot/probe-pending-node.dot @@ -0,0 +1,2 @@ + digraph "g" { +} diff --git a/cts/scheduler/exp/probe-pending-node.exp b/cts/scheduler/exp/probe-pending-node.exp new file mode 100644 index 0000000..56e315f --- /dev/null +++ b/cts/scheduler/exp/probe-pending-node.exp @@ -0,0 +1 @@ + diff --git a/cts/scheduler/scores/probe-pending-node.scores b/cts/scheduler/scores/probe-pending-node.scores new file mode 100644 index 0000000..020a1a0 --- /dev/null +++ b/cts/scheduler/scores/probe-pending-node.scores @@ -0,0 +1,61 @@ + +pcmk__clone_allocate: fs_UC5_SAPMNT-clone allocation score on gcdoubwap01: 0 +pcmk__clone_allocate: fs_UC5_SAPMNT-clone allocation score on gcdoubwap02: 0 +pcmk__clone_allocate: fs_UC5_SAPMNT:0 allocation score on gcdoubwap01: 0 +pcmk__clone_allocate: fs_UC5_SAPMNT:0 allocation score on gcdoubwap02: 0 +pcmk__clone_allocate: fs_UC5_SAPMNT:1 allocation score on gcdoubwap01: 0 +pcmk__clone_allocate: fs_UC5_SAPMNT:1 allocation score on gcdoubwap02: 0 +pcmk__clone_allocate: fs_UC5_SYS-clone allocation score on gcdoubwap01: 0 +pcmk__clone_allocate: fs_UC5_SYS-clone allocation score on gcdoubwap02: 0 +pcmk__clone_allocate: fs_UC5_SYS:0 allocation score on gcdoubwap01: 0 +pcmk__clone_allocate: fs_UC5_SYS:0 allocation score on gcdoubwap02: 0 +pcmk__clone_allocate: fs_UC5_SYS:1 allocation score on gcdoubwap01: 0 +pcmk__clone_allocate: fs_UC5_SYS:1 allocation score on gcdoubwap02: 0 +pcmk__group_allocate: fs_UC5_ascs allocation score on gcdoubwap01: 0 +pcmk__group_allocate: fs_UC5_ascs allocation score on gcdoubwap02: 0 +pcmk__group_allocate: fs_UC5_ers allocation score on gcdoubwap01: 0 +pcmk__group_allocate: fs_UC5_ers allocation score on gcdoubwap02: 0 +pcmk__group_allocate: grp_UC5_ascs allocation score on gcdoubwap01: 0 +pcmk__group_allocate: grp_UC5_ascs allocation score on gcdoubwap02: 0 +pcmk__group_allocate: grp_UC5_ers allocation score on gcdoubwap01: 0 +pcmk__group_allocate: grp_UC5_ers allocation score on gcdoubwap02: 0 +pcmk__group_allocate: rsc_sap_UC5_ASCS11 allocation score on gcdoubwap01: 0 +pcmk__group_allocate: rsc_sap_UC5_ASCS11 allocation score on gcdoubwap02: 0 +pcmk__group_allocate: rsc_sap_UC5_ERS12 allocation score on gcdoubwap01: 0 +pcmk__group_allocate: rsc_sap_UC5_ERS12 allocation score on gcdoubwap02: 0 +pcmk__group_allocate: rsc_vip_gcp_ascs allocation score on gcdoubwap01: INFINITY +pcmk__group_allocate: rsc_vip_gcp_ascs allocation score on gcdoubwap02: 0 +pcmk__group_allocate: rsc_vip_gcp_ers allocation score on gcdoubwap01: 0 +pcmk__group_allocate: rsc_vip_gcp_ers allocation score on gcdoubwap02: 0 +pcmk__group_allocate: rsc_vip_init_ers allocation score on gcdoubwap01: 0 +pcmk__group_allocate: rsc_vip_init_ers allocation score on gcdoubwap02: 0 +pcmk__group_allocate: rsc_vip_int_ascs allocation score on gcdoubwap01: 0 +pcmk__group_allocate: rsc_vip_int_ascs allocation score on gcdoubwap02: 0 +pcmk__native_allocate: fs_UC5_SAPMNT:0 allocation score on gcdoubwap01: 0 +pcmk__native_allocate: fs_UC5_SAPMNT:0 allocation score on gcdoubwap02: -INFINITY +pcmk__native_allocate: fs_UC5_SAPMNT:1 allocation score on gcdoubwap01: 0 +pcmk__native_allocate: fs_UC5_SAPMNT:1 allocation score on gcdoubwap02: -INFINITY +pcmk__native_allocate: fs_UC5_SYS:0 allocation score on gcdoubwap01: 0 +pcmk__native_allocate: fs_UC5_SYS:0 allocation score on gcdoubwap02: -INFINITY +pcmk__native_allocate: fs_UC5_SYS:1 allocation score on gcdoubwap01: 0 +pcmk__native_allocate: fs_UC5_SYS:1 allocation score on gcdoubwap02: -INFINITY +pcmk__native_allocate: fs_UC5_ascs allocation score on gcdoubwap01: 0 +pcmk__native_allocate: fs_UC5_ascs allocation score on gcdoubwap02: -INFINITY +pcmk__native_allocate: fs_UC5_ers allocation score on gcdoubwap01: -INFINITY +pcmk__native_allocate: fs_UC5_ers allocation score on gcdoubwap02: -INFINITY +pcmk__native_allocate: rsc_sap_UC5_ASCS11 allocation score on gcdoubwap01: -INFINITY +pcmk__native_allocate: rsc_sap_UC5_ASCS11 allocation score on gcdoubwap02: -INFINITY +pcmk__native_allocate: rsc_sap_UC5_ERS12 allocation score on gcdoubwap01: -INFINITY +pcmk__native_allocate: rsc_sap_UC5_ERS12 allocation score on gcdoubwap02: -INFINITY +pcmk__native_allocate: rsc_vip_gcp_ascs allocation score on gcdoubwap01: -INFINITY +pcmk__native_allocate: rsc_vip_gcp_ascs allocation score on gcdoubwap02: -INFINITY +pcmk__native_allocate: rsc_vip_gcp_ers allocation score on gcdoubwap01: -INFINITY +pcmk__native_allocate: rsc_vip_gcp_ers allocation score on gcdoubwap02: -INFINITY +pcmk__native_allocate: rsc_vip_init_ers allocation score on gcdoubwap01: 0 +pcmk__native_allocate: rsc_vip_init_ers allocation score on gcdoubwap02: -INFINITY +pcmk__native_allocate: rsc_vip_int_ascs allocation score on gcdoubwap01: INFINITY +pcmk__native_allocate: rsc_vip_int_ascs allocation score on gcdoubwap02: -INFINITY +pcmk__native_allocate: stonith_gcdoubwap01 allocation score on gcdoubwap01: -INFINITY +pcmk__native_allocate: stonith_gcdoubwap01 allocation score on gcdoubwap02: 0 +pcmk__native_allocate: stonith_gcdoubwap02 allocation score on gcdoubwap01: 0 +pcmk__native_allocate: stonith_gcdoubwap02 allocation score on gcdoubwap02: -INFINITY diff --git a/cts/scheduler/summary/probe-pending-node.summary b/cts/scheduler/summary/probe-pending-node.summary new file mode 100644 index 0000000..208186b --- /dev/null +++ b/cts/scheduler/summary/probe-pending-node.summary @@ -0,0 +1,55 @@ +Using the original execution date of: 2021-06-11 13:55:24Z + + *** Resource management is DISABLED *** + The cluster will not attempt to start, stop or recover services + +Current cluster status: + * Node List: + * Node gcdoubwap02: pending + * Online: [ gcdoubwap01 ] + + * Full List of Resources: + * stonith_gcdoubwap01 (stonith:fence_gce): Stopped (unmanaged) + * stonith_gcdoubwap02 (stonith:fence_gce): Stopped (unmanaged) + * Clone Set: fs_UC5_SAPMNT-clone [fs_UC5_SAPMNT] (unmanaged): + * Stopped: [ gcdoubwap01 gcdoubwap02 ] + * Clone Set: fs_UC5_SYS-clone [fs_UC5_SYS] (unmanaged): + * Stopped: [ gcdoubwap01 gcdoubwap02 ] + * Resource Group: grp_UC5_ascs (unmanaged): + * rsc_vip_int_ascs (ocf:heartbeat:IPaddr2): Stopped (unmanaged) + * rsc_vip_gcp_ascs (ocf:heartbeat:gcp-vpc-move-vip): Started gcdoubwap01 (unmanaged) + * fs_UC5_ascs (ocf:heartbeat:Filesystem): Stopped (unmanaged) + * rsc_sap_UC5_ASCS11 (ocf:heartbeat:SAPInstance): Stopped (unmanaged) + * Resource Group: grp_UC5_ers (unmanaged): + * rsc_vip_init_ers (ocf:heartbeat:IPaddr2): Stopped (unmanaged) + * rsc_vip_gcp_ers (ocf:heartbeat:gcp-vpc-move-vip): Stopped (unmanaged) + * fs_UC5_ers (ocf:heartbeat:Filesystem): Stopped (unmanaged) + * rsc_sap_UC5_ERS12 (ocf:heartbeat:SAPInstance): Stopped (unmanaged) + +Transition Summary: + +Executing Cluster Transition: +Using the original execution date of: 2021-06-11 13:55:24Z + +Revised Cluster Status: + * Node List: + * Node gcdoubwap02: pending + * Online: [ gcdoubwap01 ] + + * Full List of Resources: + * stonith_gcdoubwap01 (stonith:fence_gce): Stopped (unmanaged) + * stonith_gcdoubwap02 (stonith:fence_gce): Stopped (unmanaged) + * Clone Set: fs_UC5_SAPMNT-clone [fs_UC5_SAPMNT] (unmanaged): + * Stopped: [ gcdoubwap01 gcdoubwap02 ] + * Clone Set: fs_UC5_SYS-clone [fs_UC5_SYS] (unmanaged): + * Stopped: [ gcdoubwap01 gcdoubwap02 ] + * Resource Group: grp_UC5_ascs (unmanaged): + * rsc_vip_int_ascs (ocf:heartbeat:IPaddr2): Stopped (unmanaged) + * rsc_vip_gcp_ascs (ocf:heartbeat:gcp-vpc-move-vip): Started gcdoubwap01 (unmanaged) + * fs_UC5_ascs (ocf:heartbeat:Filesystem): Stopped (unmanaged) + * rsc_sap_UC5_ASCS11 (ocf:heartbeat:SAPInstance): Stopped (unmanaged) + * Resource Group: grp_UC5_ers (unmanaged): + * rsc_vip_init_ers (ocf:heartbeat:IPaddr2): Stopped (unmanaged) + * rsc_vip_gcp_ers (ocf:heartbeat:gcp-vpc-move-vip): Stopped (unmanaged) + * fs_UC5_ers (ocf:heartbeat:Filesystem): Stopped (unmanaged) + * rsc_sap_UC5_ERS12 (ocf:heartbeat:SAPInstance): Stopped (unmanaged) diff --git a/cts/scheduler/xml/probe-pending-node.xml b/cts/scheduler/xml/probe-pending-node.xml new file mode 100644 index 0000000..9f55c92 --- /dev/null +++ b/cts/scheduler/xml/probe-pending-node.xml @@ -0,0 +1,247 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- 1.8.3.1