From 73da74305b69b086f8bc7cae697063e2534a79f4 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 22 Nov 2016 16:37:07 -0600 Subject: [PATCH 1/6] Low: pengine: remove unnecessary assert it was made obsolete with 1420ff88 --- lib/pengine/unpack.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index a9fbcc0..2ef9343 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1402,6 +1402,10 @@ determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node) resource_t *rsc = this_node->details->remote_rsc; resource_t *container = NULL; + /* If there is a node state entry for a (former) Pacemaker Remote node + * but no resource creating that node, the node's connection resource will + * be NULL. Consider it an offline remote node in that case. + */ if (rsc == NULL) { this_node->details->online = FALSE; goto remote_online_done; @@ -1409,8 +1413,6 @@ determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node) container = rsc->container; - CRM_ASSERT(rsc != NULL); - /* If the resource is currently started, mark it online. */ if (rsc->role == RSC_ROLE_STARTED) { crm_trace("Remote node %s is set to ONLINE. role == started", this_node->details->id); -- 1.8.3.1 From 5156074d560d85ee84de31b9d1e0bd893999fa4e Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 23 Nov 2016 13:40:47 -0600 Subject: [PATCH 2/6] Log: pengine: improve trace messages for Pacemaker Remote nodes --- lib/pengine/unpack.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 2ef9343..a49e108 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -70,10 +70,15 @@ pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason) /* A guest node is fenced by marking its container as failed */ if (is_container_remote_node(node)) { resource_t *rsc = node->details->remote_rsc->container; + if (is_set(rsc->flags, pe_rsc_failed) == FALSE) { crm_warn("Guest node %s will be fenced (by recovering %s) %s", node->details->uname, rsc->id, reason); - /* node->details->unclean = TRUE; */ + + /* We don't mark the node as unclean, because that would prevent the + * node from running resources. We want to allow it to run resources + * in this transition if the recovery succeeds. + */ node->details->remote_requires_reset = TRUE; set_bit(rsc->flags, pe_rsc_failed); } @@ -1415,30 +1420,35 @@ determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node) /* If the resource is currently started, mark it online. */ if (rsc->role == RSC_ROLE_STARTED) { - crm_trace("Remote node %s is set to ONLINE. role == started", this_node->details->id); + crm_trace("%s node %s presumed ONLINE because connection resource is started", + (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = TRUE; } /* consider this node shutting down if transitioning start->stop */ if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) { - crm_trace("Remote node %s shutdown. transition from start to stop role", this_node->details->id); + crm_trace("%s node %s shutting down because connection resource is stopping", + (container? "Guest" : "Remote"), this_node->details->id); this_node->details->shutdown = TRUE; } /* Now check all the failure conditions. */ if(container && is_set(container->flags, pe_rsc_failed)) { - crm_trace("Remote node %s is set to UNCLEAN. rsc failed.", this_node->details->id); + crm_trace("Guest node %s UNCLEAN because guest resource failed", + this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } else if(is_set(rsc->flags, pe_rsc_failed)) { - crm_trace("Remote node %s is set to OFFLINE. rsc failed.", this_node->details->id); + crm_trace("%s node %s OFFLINE because connection resource failed", + (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; } else if (rsc->role == RSC_ROLE_STOPPED || (container && container->role == RSC_ROLE_STOPPED)) { - crm_trace("Remote node %s is set to OFFLINE. node is stopped.", this_node->details->id); + crm_trace("%s node %s OFFLINE because its resource is stopped", + (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = FALSE; } -- 1.8.3.1 From 225d20cacc5643e113d42159fc713071172d88da Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 22 Nov 2016 16:40:52 -0600 Subject: [PATCH 3/6] Fix: pengine: guest node fencing doesn't require stonith enabled Comments elsewhere say as much, but stage6() didn't get the memo --- lib/pengine/utils.c | 13 +++++++++++++ pengine/allocate.c | 5 +---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index cc97db1..6be9bb1 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -36,6 +36,19 @@ void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * contain static xmlNode *find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled); +/*! + * \internal + * \brief Check whether we can fence a particular node + * + * \param[in] data_set Working set for cluster + * \param[in] node Name of node to check + * + * \return TRUE if node can be fenced, FALSE otherwise + * + * \note This function should only be called for cluster nodes and baremetal + * remote nodes; guest nodes are fenced by stopping their container + * resource, so fence execution requirements do not apply to them. + */ bool pe_can_fence(pe_working_set_t * data_set, node_t *node) { if(is_not_set(data_set->flags, pe_flag_stonith_enabled)) { diff --git a/pengine/allocate.c b/pengine/allocate.c index 82abd36..bdf03e5 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -1374,10 +1374,7 @@ stage6(pe_working_set_t * data_set) * guest's host. */ if (is_container_remote_node(node)) { - /* Guest */ - if (need_stonith - && node->details->remote_requires_reset - && pe_can_fence(data_set, node)) { + if (node->details->remote_requires_reset && need_stonith) { resource_t *container = node->details->remote_rsc->container; char *key = stop_key(container); GListPtr stop_list = find_actions(container->actions, key, NULL); -- 1.8.3.1 From b11887869723f23a330af8b1b0e9ffd935b68ae0 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 23 Mar 2016 17:57:50 -0500 Subject: [PATCH 4/6] Test: pengine: add regression test for when a guest node's host goes away As of this commit, the PE handles this situation badly. Adding the test before the fix allows the changes in behavior to be highlighted. --- pengine/regression.sh | 1 + pengine/test10/guest-node-host-dies.dot | 107 ++++++ pengine/test10/guest-node-host-dies.exp | 539 ++++++++++++++++++++++++++++ pengine/test10/guest-node-host-dies.scores | 80 +++++ pengine/test10/guest-node-host-dies.summary | 73 ++++ pengine/test10/guest-node-host-dies.xml | 294 +++++++++++++++ 6 files changed, 1094 insertions(+) create mode 100644 pengine/test10/guest-node-host-dies.dot create mode 100644 pengine/test10/guest-node-host-dies.exp create mode 100644 pengine/test10/guest-node-host-dies.scores create mode 100644 pengine/test10/guest-node-host-dies.summary create mode 100644 pengine/test10/guest-node-host-dies.xml diff --git a/pengine/regression.sh b/pengine/regression.sh index be1734b..1bc8e1e 100755 --- a/pengine/regression.sh +++ b/pengine/regression.sh @@ -817,6 +817,7 @@ do_test whitebox-unexpectedly-running "Recover container nodes the cluster did n do_test whitebox-migrate1 "Migrate both container and connection resource" do_test whitebox-imply-stop-on-fence "imply stop action on container node rsc when host node is fenced" do_test whitebox-nested-group "Verify guest remote-node works nested in a group" +do_test guest-node-host-dies "Verify guest node is recovered if host goes away" echo "" do_test remote-startup-probes "Baremetal remote-node startup probes" diff --git a/pengine/test10/guest-node-host-dies.dot b/pengine/test10/guest-node-host-dies.dot new file mode 100644 index 0000000..01858b3 --- /dev/null +++ b/pengine/test10/guest-node-host-dies.dot @@ -0,0 +1,107 @@ +digraph "g" { +"Fencing_monitor_120000 rhel7-4" [ style=bold color="green" fontcolor="black"] +"Fencing_start_0 rhel7-4" -> "Fencing_monitor_120000 rhel7-4" [ style = bold] +"Fencing_start_0 rhel7-4" [ style=bold color="green" fontcolor="black"] +"Fencing_stop_0 rhel7-4" -> "Fencing_start_0 rhel7-4" [ style = bold] +"Fencing_stop_0 rhel7-4" -> "all_stopped" [ style = bold] +"Fencing_stop_0 rhel7-4" [ style=bold color="green" fontcolor="black"] +"all_stopped" [ style=bold color="green" fontcolor="orange"] +"container1_start_0 rhel7-2" -> "lxc1_start_0 rhel7-2" [ style = bold] +"container1_start_0 rhel7-2" [ style=bold color="green" fontcolor="black"] +"container1_stop_0 rhel7-1" -> "all_stopped" [ style = bold] +"container1_stop_0 rhel7-1" -> "container1_start_0 rhel7-2" [ style = bold] +"container1_stop_0 rhel7-1" [ style=bold color="green" fontcolor="orange"] +"container2_start_0 rhel7-3" -> "lxc2_start_0 rhel7-3" [ style = bold] +"container2_start_0 rhel7-3" [ style=bold color="green" fontcolor="black"] +"container2_stop_0 rhel7-1" -> "all_stopped" [ style = bold] +"container2_stop_0 rhel7-1" -> "container2_start_0 rhel7-3" [ style = bold] +"container2_stop_0 rhel7-1" [ style=bold color="green" fontcolor="orange"] +"lxc-ms-master_demote_0" -> "lxc-ms-master_demoted_0" [ style = bold] +"lxc-ms-master_demote_0" -> "lxc-ms_demote_0 lxc1" [ style = bold] +"lxc-ms-master_demote_0" [ style=bold color="green" fontcolor="orange"] +"lxc-ms-master_demoted_0" -> "lxc-ms-master_promote_0" [ style = bold] +"lxc-ms-master_demoted_0" -> "lxc-ms-master_start_0" [ style = bold] +"lxc-ms-master_demoted_0" -> "lxc-ms-master_stop_0" [ style = bold] +"lxc-ms-master_demoted_0" [ style=bold color="green" fontcolor="orange"] +"lxc-ms-master_promote_0" -> "lxc-ms_promote_0 lxc1" [ style = bold] +"lxc-ms-master_promote_0" [ style=bold color="green" fontcolor="orange"] +"lxc-ms-master_promoted_0" [ style=bold color="green" fontcolor="orange"] +"lxc-ms-master_running_0" -> "lxc-ms-master_promote_0" [ style = bold] +"lxc-ms-master_running_0" [ style=bold color="green" fontcolor="orange"] +"lxc-ms-master_start_0" -> "lxc-ms-master_running_0" [ style = bold] +"lxc-ms-master_start_0" -> "lxc-ms_start_0 lxc1" [ style = bold] +"lxc-ms-master_start_0" -> "lxc-ms_start_0 lxc2" [ style = bold] +"lxc-ms-master_start_0" [ style=bold color="green" fontcolor="orange"] +"lxc-ms-master_stop_0" -> "lxc-ms-master_stopped_0" [ style = bold] +"lxc-ms-master_stop_0" -> "lxc-ms_stop_0 lxc1" [ style = bold] +"lxc-ms-master_stop_0" -> "lxc-ms_stop_0 lxc2" [ style = bold] +"lxc-ms-master_stop_0" [ style=bold color="green" fontcolor="orange"] +"lxc-ms-master_stopped_0" -> "lxc-ms-master_promote_0" [ style = bold] +"lxc-ms-master_stopped_0" -> "lxc-ms-master_start_0" [ style = bold] +"lxc-ms-master_stopped_0" [ style=bold color="green" fontcolor="orange"] +"lxc-ms_demote_0 lxc1" -> "lxc-ms-master_demoted_0" [ style = bold] +"lxc-ms_demote_0 lxc1" -> "lxc-ms_promote_0 lxc1" [ style = bold] +"lxc-ms_demote_0 lxc1" -> "lxc-ms_stop_0 lxc1" [ style = bold] +"lxc-ms_demote_0 lxc1" [ style=bold color="green" fontcolor="black"] +"lxc-ms_monitor_10000 lxc2" [ style=bold color="green" fontcolor="black"] +"lxc-ms_promote_0 lxc1" -> "lxc-ms-master_promoted_0" [ style = bold] +"lxc-ms_promote_0 lxc1" [ style=bold color="green" fontcolor="black"] +"lxc-ms_start_0 lxc1" -> "lxc-ms-master_running_0" [ style = bold] +"lxc-ms_start_0 lxc1" -> "lxc-ms_promote_0 lxc1" [ style = bold] +"lxc-ms_start_0 lxc1" [ style=bold color="green" fontcolor="black"] +"lxc-ms_start_0 lxc2" -> "lxc-ms-master_running_0" [ style = bold] +"lxc-ms_start_0 lxc2" -> "lxc-ms_monitor_10000 lxc2" [ style = bold] +"lxc-ms_start_0 lxc2" [ style=bold color="green" fontcolor="black"] +"lxc-ms_stop_0 lxc1" -> "all_stopped" [ style = bold] +"lxc-ms_stop_0 lxc1" -> "lxc-ms-master_stopped_0" [ style = bold] +"lxc-ms_stop_0 lxc1" -> "lxc-ms_start_0 lxc1" [ style = bold] +"lxc-ms_stop_0 lxc1" [ style=bold color="green" fontcolor="orange"] +"lxc-ms_stop_0 lxc2" -> "all_stopped" [ style = bold] +"lxc-ms_stop_0 lxc2" -> "lxc-ms-master_stopped_0" [ style = bold] +"lxc-ms_stop_0 lxc2" -> "lxc-ms_start_0 lxc2" [ style = bold] +"lxc-ms_stop_0 lxc2" [ style=bold color="green" fontcolor="orange"] +"lxc1_monitor_30000 rhel7-2" [ style=bold color="green" fontcolor="black"] +"lxc1_start_0 rhel7-2" -> "lxc-ms_promote_0 lxc1" [ style = bold] +"lxc1_start_0 rhel7-2" -> "lxc-ms_start_0 lxc1" [ style = bold] +"lxc1_start_0 rhel7-2" -> "lxc1_monitor_30000 rhel7-2" [ style = bold] +"lxc1_start_0 rhel7-2" [ style=bold color="green" fontcolor="black"] +"lxc1_stop_0 rhel7-1" -> "all_stopped" [ style = bold] +"lxc1_stop_0 rhel7-1" -> "container1_stop_0 rhel7-1" [ style = bold] +"lxc1_stop_0 rhel7-1" -> "lxc1_start_0 rhel7-2" [ style = bold] +"lxc1_stop_0 rhel7-1" [ style=bold color="green" fontcolor="orange"] +"lxc2_monitor_30000 rhel7-3" [ style=bold color="green" fontcolor="black"] +"lxc2_start_0 rhel7-3" -> "lxc-ms_monitor_10000 lxc2" [ style = bold] +"lxc2_start_0 rhel7-3" -> "lxc-ms_start_0 lxc2" [ style = bold] +"lxc2_start_0 rhel7-3" -> "lxc2_monitor_30000 rhel7-3" [ style = bold] +"lxc2_start_0 rhel7-3" [ style=bold color="green" fontcolor="black"] +"lxc2_stop_0 rhel7-1" -> "all_stopped" [ style = bold] +"lxc2_stop_0 rhel7-1" -> "container2_stop_0 rhel7-1" [ style = bold] +"lxc2_stop_0 rhel7-1" -> "lxc2_start_0 rhel7-3" [ style = bold] +"lxc2_stop_0 rhel7-1" [ style=bold color="green" fontcolor="orange"] +"rsc_rhel7-1_monitor_5000 rhel7-5" [ style=bold color="green" fontcolor="black"] +"rsc_rhel7-1_start_0 rhel7-5" -> "rsc_rhel7-1_monitor_5000 rhel7-5" [ style = bold] +"rsc_rhel7-1_start_0 rhel7-5" [ style=bold color="green" fontcolor="black"] +"rsc_rhel7-1_stop_0 rhel7-1" -> "all_stopped" [ style = bold] +"rsc_rhel7-1_stop_0 rhel7-1" -> "rsc_rhel7-1_start_0 rhel7-5" [ style = bold] +"rsc_rhel7-1_stop_0 rhel7-1" [ style=bold color="green" fontcolor="orange"] +"stonith 'reboot' rhel7-1" -> "container1_stop_0 rhel7-1" [ style = bold] +"stonith 'reboot' rhel7-1" -> "container2_stop_0 rhel7-1" [ style = bold] +"stonith 'reboot' rhel7-1" -> "lxc-ms-master_stop_0" [ style = bold] +"stonith 'reboot' rhel7-1" -> "lxc-ms_stop_0 lxc1" [ style = bold] +"stonith 'reboot' rhel7-1" -> "lxc-ms_stop_0 lxc2" [ style = bold] +"stonith 'reboot' rhel7-1" -> "lxc1_stop_0 rhel7-1" [ style = bold] +"stonith 'reboot' rhel7-1" -> "lxc2_stop_0 rhel7-1" [ style = bold] +"stonith 'reboot' rhel7-1" -> "rsc_rhel7-1_stop_0 rhel7-1" [ style = bold] +"stonith 'reboot' rhel7-1" -> "stonith_complete" [ style = bold] +"stonith 'reboot' rhel7-1" [ style=bold color="green" fontcolor="black"] +"stonith_complete" -> "all_stopped" [ style = bold] +"stonith_complete" -> "container1_start_0 rhel7-2" [ style = bold] +"stonith_complete" -> "container2_start_0 rhel7-3" [ style = bold] +"stonith_complete" -> "lxc-ms_promote_0 lxc1" [ style = bold] +"stonith_complete" -> "lxc-ms_start_0 lxc1" [ style = bold] +"stonith_complete" -> "lxc-ms_start_0 lxc2" [ style = bold] +"stonith_complete" -> "lxc1_start_0 rhel7-2" [ style = bold] +"stonith_complete" -> "lxc2_start_0 rhel7-3" [ style = bold] +"stonith_complete" -> "rsc_rhel7-1_start_0 rhel7-5" [ style = bold] +"stonith_complete" [ style=bold color="green" fontcolor="orange"] +} diff --git a/pengine/test10/guest-node-host-dies.exp b/pengine/test10/guest-node-host-dies.exp new file mode 100644 index 0000000..b3c24be --- /dev/null +++ b/pengine/test10/guest-node-host-dies.exp @@ -0,0 +1,539 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/guest-node-host-dies.scores b/pengine/test10/guest-node-host-dies.scores new file mode 100644 index 0000000..0d7ad3f --- /dev/null +++ b/pengine/test10/guest-node-host-dies.scores @@ -0,0 +1,80 @@ +Allocation scores: +clone_color: lxc-ms-master allocation score on lxc1: INFINITY +clone_color: lxc-ms-master allocation score on lxc2: INFINITY +clone_color: lxc-ms-master allocation score on rhel7-1: 0 +clone_color: lxc-ms-master allocation score on rhel7-2: 0 +clone_color: lxc-ms-master allocation score on rhel7-3: 0 +clone_color: lxc-ms-master allocation score on rhel7-4: 0 +clone_color: lxc-ms-master allocation score on rhel7-5: 0 +clone_color: lxc-ms:0 allocation score on lxc1: INFINITY +clone_color: lxc-ms:0 allocation score on lxc2: INFINITY +clone_color: lxc-ms:0 allocation score on rhel7-1: 0 +clone_color: lxc-ms:0 allocation score on rhel7-2: 0 +clone_color: lxc-ms:0 allocation score on rhel7-3: 0 +clone_color: lxc-ms:0 allocation score on rhel7-4: 0 +clone_color: lxc-ms:0 allocation score on rhel7-5: 0 +clone_color: lxc-ms:1 allocation score on lxc1: INFINITY +clone_color: lxc-ms:1 allocation score on lxc2: INFINITY +clone_color: lxc-ms:1 allocation score on rhel7-1: 0 +clone_color: lxc-ms:1 allocation score on rhel7-2: 0 +clone_color: lxc-ms:1 allocation score on rhel7-3: 0 +clone_color: lxc-ms:1 allocation score on rhel7-4: 0 +clone_color: lxc-ms:1 allocation score on rhel7-5: 0 +lxc-ms:0 promotion score on lxc1: INFINITY +lxc-ms:1 promotion score on lxc2: INFINITY +native_color: Fencing allocation score on lxc1: -INFINITY +native_color: Fencing allocation score on lxc2: -INFINITY +native_color: Fencing allocation score on rhel7-1: 0 +native_color: Fencing allocation score on rhel7-2: 0 +native_color: Fencing allocation score on rhel7-3: 0 +native_color: Fencing allocation score on rhel7-4: 0 +native_color: Fencing allocation score on rhel7-5: 0 +native_color: container1 allocation score on lxc1: -INFINITY +native_color: container1 allocation score on lxc2: -INFINITY +native_color: container1 allocation score on rhel7-1: -INFINITY +native_color: container1 allocation score on rhel7-2: 0 +native_color: container1 allocation score on rhel7-3: 0 +native_color: container1 allocation score on rhel7-4: 0 +native_color: container1 allocation score on rhel7-5: 0 +native_color: container2 allocation score on lxc1: -INFINITY +native_color: container2 allocation score on lxc2: -INFINITY +native_color: container2 allocation score on rhel7-1: -INFINITY +native_color: container2 allocation score on rhel7-2: 0 +native_color: container2 allocation score on rhel7-3: 0 +native_color: container2 allocation score on rhel7-4: 0 +native_color: container2 allocation score on rhel7-5: 0 +native_color: lxc-ms:0 allocation score on lxc1: INFINITY +native_color: lxc-ms:0 allocation score on lxc2: INFINITY +native_color: lxc-ms:0 allocation score on rhel7-1: -INFINITY +native_color: lxc-ms:0 allocation score on rhel7-2: 0 +native_color: lxc-ms:0 allocation score on rhel7-3: 0 +native_color: lxc-ms:0 allocation score on rhel7-4: 0 +native_color: lxc-ms:0 allocation score on rhel7-5: 0 +native_color: lxc-ms:1 allocation score on lxc1: -INFINITY +native_color: lxc-ms:1 allocation score on lxc2: INFINITY +native_color: lxc-ms:1 allocation score on rhel7-1: -INFINITY +native_color: lxc-ms:1 allocation score on rhel7-2: 0 +native_color: lxc-ms:1 allocation score on rhel7-3: 0 +native_color: lxc-ms:1 allocation score on rhel7-4: 0 +native_color: lxc-ms:1 allocation score on rhel7-5: 0 +native_color: lxc1 allocation score on lxc1: -INFINITY +native_color: lxc1 allocation score on lxc2: -INFINITY +native_color: lxc1 allocation score on rhel7-1: -INFINITY +native_color: lxc1 allocation score on rhel7-2: 0 +native_color: lxc1 allocation score on rhel7-3: -INFINITY +native_color: lxc1 allocation score on rhel7-4: -INFINITY +native_color: lxc1 allocation score on rhel7-5: -INFINITY +native_color: lxc2 allocation score on lxc1: -INFINITY +native_color: lxc2 allocation score on lxc2: -INFINITY +native_color: lxc2 allocation score on rhel7-1: -INFINITY +native_color: lxc2 allocation score on rhel7-2: -INFINITY +native_color: lxc2 allocation score on rhel7-3: 0 +native_color: lxc2 allocation score on rhel7-4: -INFINITY +native_color: lxc2 allocation score on rhel7-5: -INFINITY +native_color: rsc_rhel7-1 allocation score on lxc1: -INFINITY +native_color: rsc_rhel7-1 allocation score on lxc2: -INFINITY +native_color: rsc_rhel7-1 allocation score on rhel7-1: 100 +native_color: rsc_rhel7-1 allocation score on rhel7-2: 0 +native_color: rsc_rhel7-1 allocation score on rhel7-3: 0 +native_color: rsc_rhel7-1 allocation score on rhel7-4: 0 +native_color: rsc_rhel7-1 allocation score on rhel7-5: 0 diff --git a/pengine/test10/guest-node-host-dies.summary b/pengine/test10/guest-node-host-dies.summary new file mode 100644 index 0000000..8a1bfd4 --- /dev/null +++ b/pengine/test10/guest-node-host-dies.summary @@ -0,0 +1,73 @@ + +Current cluster status: +Node rhel7-1 (1): UNCLEAN (offline) +Online: [ rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] +Containers: [ lxc1:container1 lxc2:container2 ] + + Fencing (stonith:fence_xvm): Started rhel7-4 + rsc_rhel7-1 (ocf::heartbeat:IPaddr2): Started rhel7-1 ( UNCLEAN ) + container1 (ocf::heartbeat:VirtualDomain): Started rhel7-1 ( UNCLEAN ) + container2 (ocf::heartbeat:VirtualDomain): Started rhel7-1 ( UNCLEAN ) + Master/Slave Set: lxc-ms-master [lxc-ms] + Masters: [ lxc1 ] + Slaves: [ lxc2 ] + +Transition Summary: + * Restart Fencing (Started rhel7-4) + * Move rsc_rhel7-1 (Started rhel7-1 -> rhel7-5) + * Move container1 (Started rhel7-1 -> rhel7-2) + * Move container2 (Started rhel7-1 -> rhel7-3) + * Restart lxc-ms:0 (Master lxc1) + * Restart lxc-ms:1 (Slave lxc2) + * Move lxc1 (Started rhel7-1 -> rhel7-2) + * Move lxc2 (Started rhel7-1 -> rhel7-3) + +Executing cluster transition: + * Resource action: Fencing stop on rhel7-4 + * Resource action: Fencing start on rhel7-4 + * Resource action: Fencing monitor=120000 on rhel7-4 + * Pseudo action: lxc-ms-master_demote_0 + * Fencing rhel7-1 (reboot) + * Pseudo action: stonith_complete + * Pseudo action: rsc_rhel7-1_stop_0 + * Resource action: lxc-ms demote on lxc1 + * Pseudo action: lxc-ms-master_demoted_0 + * Pseudo action: lxc-ms-master_stop_0 + * Pseudo action: lxc1_stop_0 + * Pseudo action: lxc2_stop_0 + * Resource action: rsc_rhel7-1 start on rhel7-5 + * Pseudo action: container1_stop_0 + * Pseudo action: container2_stop_0 + * Pseudo action: lxc-ms_stop_0 + * Pseudo action: lxc-ms_stop_0 + * Pseudo action: lxc-ms-master_stopped_0 + * Pseudo action: lxc-ms-master_start_0 + * Pseudo action: all_stopped + * Resource action: rsc_rhel7-1 monitor=5000 on rhel7-5 + * Resource action: container1 start on rhel7-2 + * Resource action: container2 start on rhel7-3 + * Resource action: lxc1 start on rhel7-2 + * Resource action: lxc2 start on rhel7-3 + * Resource action: lxc-ms start on lxc1 + * Resource action: lxc-ms start on lxc2 + * Resource action: lxc-ms monitor=10000 on lxc2 + * Pseudo action: lxc-ms-master_running_0 + * Resource action: lxc1 monitor=30000 on rhel7-2 + * Resource action: lxc2 monitor=30000 on rhel7-3 + * Pseudo action: lxc-ms-master_promote_0 + * Resource action: lxc-ms promote on lxc1 + * Pseudo action: lxc-ms-master_promoted_0 + +Revised cluster status: +Online: [ rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] +OFFLINE: [ rhel7-1 ] +Containers: [ lxc1:container1 lxc2:container2 ] + + Fencing (stonith:fence_xvm): Started rhel7-4 + rsc_rhel7-1 (ocf::heartbeat:IPaddr2): Started rhel7-5 + container1 (ocf::heartbeat:VirtualDomain): Started rhel7-2 + container2 (ocf::heartbeat:VirtualDomain): Started rhel7-3 + Master/Slave Set: lxc-ms-master [lxc-ms] + Masters: [ lxc1 ] + Slaves: [ lxc2 ] + diff --git a/pengine/test10/guest-node-host-dies.xml b/pengine/test10/guest-node-host-dies.xml new file mode 100644 index 0000000..a840da1 --- /dev/null +++ b/pengine/test10/guest-node-host-dies.xml @@ -0,0 +1,294 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- 1.8.3.1 From beab7718e14a54f1b50d7c5ff4b0086e09332da3 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 15 Apr 2016 13:10:17 -0500 Subject: [PATCH 5/6] Fix: pengine: create a pseudo-fence for guest node recovery If a guest node needs to be recovered, the PE would previously order actions in relation to the stop action for the guest's container resource, if one was scheduled. This had problems: for implied stops due to fencing the guest's host, there would be no stop action, so no ordering could be done; ordering in relation to the stop action made stonith_constraints() mistakenly assume that the host node (the node for the stop action) was the fence target, and thus mistakenly mark the wrong stops/demotes as implied; and, clone notifications for fence events would not get called for guest node recoveries, whether explicit or implied. Now, a fence pseudo-event is created for guest node recovery, regardless of whether there is an explicit stop action scheduled for the container. This addresses all those issues, and will allow the crmd to be able to detect implied stops. This also allows us to simplify the implied stop/demote detection, since we will check the pseudo-op for implied actions -- we don't need to check the real fence op for implied actions on guest nodes. --- crmd/te_utils.c | 8 ++++++ pengine/allocate.c | 82 ++++++++++++++++++++++++++++++++++++++++++++---------- pengine/graph.c | 14 ++++------ pengine/native.c | 48 ++------------------------------ 4 files changed, 83 insertions(+), 69 deletions(-) diff --git a/crmd/te_utils.c b/crmd/te_utils.c index 4c708a1..e7bf7ff 100644 --- a/crmd/te_utils.c +++ b/crmd/te_utils.c @@ -331,6 +331,14 @@ tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) /* The DC always sends updates */ send_stonith_update(NULL, st_event->target, uuid); + /* @TODO Ideally, at this point, we'd check whether the fenced node + * hosted any guest nodes, and call remote_node_down() for them. + * Unfortunately, the crmd doesn't have a simple, reliable way to + * map hosts to guests. It might be possible to track this in the + * peer cache via crm_remote_peer_cache_refresh(). For now, we rely + * on the PE creating fence pseudo-events for the guests. + */ + if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) { /* Abort the current transition graph if it wasn't us diff --git a/pengine/allocate.c b/pengine/allocate.c index bdf03e5..74b57fb 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -1341,6 +1341,70 @@ any_managed_resources(pe_working_set_t * data_set) return FALSE; } +/*! + * \internal + * \brief Create pseudo-op for guest node fence, and order relative to it + * + * \param[in] node Guest node to fence + * \param[in] done STONITH_DONE operation + * \param[in] data_set Working set of CIB state + */ +static void +fence_guest(pe_node_t *node, pe_action_t *done, pe_working_set_t *data_set) +{ + resource_t *container = node->details->remote_rsc->container; + pe_action_t *stop = NULL; + pe_action_t *stonith_op = NULL; + + /* The fence action is just a label; we don't do anything differently for + * off vs. reboot. We specify it explicitly, rather than let it default to + * cluster's default action, because we are not _initiating_ fencing -- we + * are creating a pseudo-event to describe fencing that is already occurring + * by other means (container recovery). + */ + const char *fence_action = "off"; + + /* Check whether guest's container resource is has any explicit stop or + * start (the stop may be implied by fencing of the guest's host). + */ + if (container) { + stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP, NULL); + + if (find_first_action(container->actions, NULL, CRMD_ACTION_START, NULL)) { + fence_action = "reboot"; + } + } + + /* Create a fence pseudo-event, so we have an event to order actions + * against, and crmd can always detect it. + */ + stonith_op = pe_fence_op(node, fence_action, FALSE, data_set); + update_action_flags(stonith_op, pe_action_pseudo | pe_action_runnable); + + + /* We want to imply stops/demotes after the guest is stopped, not wait until + * it is restarted, so we always order pseudo-fencing after stop, not start + * (even though start might be closer to what is done for a real reboot). + */ + if (stop) { + order_actions(stop, stonith_op, + pe_order_runnable_left|pe_order_implies_then); + crm_info("Implying guest node %s is down (action %d) " + "after container %s is stopped (action %d)", + node->details->uname, stonith_op->id, + container->id, stop->id); + } else { + crm_info("Implying guest node %s is down (action %d) ", + node->details->uname, stonith_op->id); + } + + /* @TODO: Order pseudo-fence after any (optional) fence of guest's host */ + + /* Order/imply other actions relative to pseudo-fence as with real fence */ + stonith_constraints(node, stonith_op, data_set); + order_actions(stonith_op, done, pe_order_implies_then); +} + /* * Create dependencies for stonith and shutdown operations */ @@ -1369,24 +1433,12 @@ stage6(pe_working_set_t * data_set) for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; - /* Guest nodes are "fenced" by recovering their container resource. - * The container stop may be explicit, or implied by the fencing of the - * guest's host. + /* Guest nodes are "fenced" by recovering their container resource, + * so handle them separately. */ if (is_container_remote_node(node)) { if (node->details->remote_requires_reset && need_stonith) { - resource_t *container = node->details->remote_rsc->container; - char *key = stop_key(container); - GListPtr stop_list = find_actions(container->actions, key, NULL); - - crm_info("Implying node %s is down when container %s is stopped (%p)", - node->details->uname, container->id, stop_list); - if(stop_list) { - stonith_constraints(node, stop_list->data, data_set); - } - - g_list_free(stop_list); - free(key); + fence_guest(node, done, data_set); } continue; } diff --git a/pengine/graph.c b/pengine/graph.c index ee7c7c8..569cf6e 100644 --- a/pengine/graph.c +++ b/pengine/graph.c @@ -715,13 +715,7 @@ stonith_constraints(node_t * node, action_t * stonith_op, pe_working_set_t * dat CRM_CHECK(stonith_op != NULL, return FALSE); for (r = data_set->resources; r != NULL; r = r->next) { - resource_t *rsc = (resource_t *) r->data; - - if ((stonith_op->rsc == NULL) - || ((stonith_op->rsc != rsc) && (stonith_op->rsc != rsc->container))) { - - rsc_stonith_ordering(rsc, stonith_op, data_set); - } + rsc_stonith_ordering((resource_t *) r->data, stonith_op, data_set); } return TRUE; } @@ -888,7 +882,11 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set) } if (safe_str_eq(action->task, CRM_OP_FENCE)) { - action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); + /* All fences need node info; guest node fences are pseudo-events */ + action_xml = create_xml_node(NULL, + is_set(action->flags, pe_action_pseudo)? + XML_GRAPH_TAG_PSEUDO_EVENT : + XML_GRAPH_TAG_CRM_EVENT); } else if (safe_str_eq(action->task, CRM_OP_SHUTDOWN)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); diff --git a/pengine/native.c b/pengine/native.c index 56a1434..ff4467b 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -2902,48 +2902,6 @@ native_start_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set } } -/* User data to pass to guest node iterator */ -struct action_list_s { - GListPtr search_list; /* list of actions to search */ - GListPtr result_list; /* list of matching actions for this node */ - const char *key; /* action key to match */ -}; - -/*! - * \internal - * \brief Prepend a node's actions matching a key to a list - * - * \param[in] node Guest node - * \param[in/out] data User data - */ -static void prepend_node_actions(const node_t *node, void *data) -{ - GListPtr actions; - struct action_list_s *info = (struct action_list_s *) data; - - actions = find_actions(info->search_list, info->key, node); - info->result_list = g_list_concat(actions, info->result_list); -} - -static GListPtr -find_fence_target_node_actions(GListPtr search_list, const char *key, node_t *fence_target, pe_working_set_t *data_set) -{ - struct action_list_s action_list; - - /* Actions on the target that match the key are implied by the fencing */ - action_list.search_list = search_list; - action_list.result_list = find_actions(search_list, key, fence_target); - action_list.key = key; - - /* - * If the target is a host for any guest nodes, actions on those nodes - * that match the key are also implied by the fencing. - */ - pe_foreach_guest_node(data_set, fence_target, prepend_node_actions, &action_list); - - return action_list.result_list; -} - static void native_stop_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set) { @@ -2963,8 +2921,7 @@ native_stop_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_ /* Get a list of stop actions potentially implied by the fencing */ key = stop_key(rsc); - action_list = find_fence_target_node_actions(rsc->actions, key, target, - data_set); + action_list = find_actions(rsc->actions, key, target); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { @@ -3061,8 +3018,7 @@ native_stop_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_ /* Get a list of demote actions potentially implied by the fencing */ key = demote_key(rsc); - action_list = find_fence_target_node_actions(rsc->actions, key, target, - data_set); + action_list = find_actions(rsc->actions, key, target); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { -- 1.8.3.1 From b7ce740edf3d71fcccead2288bf0ab11037f9672 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 23 Nov 2016 14:56:29 -0600 Subject: [PATCH 6/6] Fix: pengine: consider guest node unclean if its host is unclean --- lib/pengine/unpack.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index a49e108..6737273 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1406,6 +1406,7 @@ determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node) { resource_t *rsc = this_node->details->remote_rsc; resource_t *container = NULL; + pe_node_t *host = NULL; /* If there is a node state entry for a (former) Pacemaker Remote node * but no resource creating that node, the node's connection resource will @@ -1418,6 +1419,10 @@ determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node) container = rsc->container; + if (container && (g_list_length(rsc->running_on) == 1)) { + host = rsc->running_on->data; + } + /* If the resource is currently started, mark it online. */ if (rsc->role == RSC_ROLE_STARTED) { crm_trace("%s node %s presumed ONLINE because connection resource is started", @@ -1451,6 +1456,13 @@ determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node) (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = FALSE; + + } else if (host && (host->details->online == FALSE) + && host->details->unclean) { + crm_trace("Guest node %s UNCLEAN because host is unclean", + this_node->details->id); + this_node->details->online = FALSE; + this_node->details->remote_requires_reset = TRUE; } remote_online_done: -- 1.8.3.1