Blob Blame History Raw
From 00fed62ed4899412f65644e6b84e49fbf89d09bb Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 2 Aug 2017 19:18:25 -0500
Subject: [PATCH] Fix: pengine: avoid fence loop for remote nodes

This leaves the original issue unaddressed, for investigation later.
---
 pengine/allocate.c                            |  3 +++
 pengine/test10/remote-fence-unclean-3.dot     |  5 -----
 pengine/test10/remote-fence-unclean-3.exp     | 29 ---------------------------
 pengine/test10/remote-fence-unclean-3.summary |  3 ---
 4 files changed, 3 insertions(+), 37 deletions(-)

diff --git a/pengine/allocate.c b/pengine/allocate.c
index 3a883ad..e3cb4cc 100644
--- a/pengine/allocate.c
+++ b/pengine/allocate.c
@@ -905,10 +905,13 @@ probe_resources(pe_working_set_t * data_set)
             continue;
 
         } else if (node->details->online == FALSE && node->details->remote_rsc) {
+            // TODO figure out why this results in fence loop
+            /*
             enum remote_connection_state state = get_remote_node_state(node);
             if(state == remote_state_failed) {
                 pe_fence_node(data_set, node, "the connection is unrecoverable");
             }
+            */
             continue;
 
         } else if(node->details->online == FALSE) {
diff --git a/pengine/test10/remote-fence-unclean-3.dot b/pengine/test10/remote-fence-unclean-3.dot
index b32b77e..14adaef 100644
--- a/pengine/test10/remote-fence-unclean-3.dot
+++ b/pengine/test10/remote-fence-unclean-3.dot
@@ -1,5 +1,4 @@
 digraph "g" {
-"all_stopped" -> "fence1_start_0 overcloud-controller-0" [ style = bold]
 "all_stopped" [ style=bold color="green" fontcolor="orange"]
 "fence1_monitor_0 overcloud-controller-0" -> "fence1_start_0 overcloud-controller-0" [ style = bold]
 "fence1_monitor_0 overcloud-controller-0" [ style=bold color="green" fontcolor="black"]
@@ -12,8 +11,4 @@ digraph "g" {
 "fence1_start_0 overcloud-controller-0" [ style=bold color="green" fontcolor="black"]
 "overcloud-novacompute-0_stop_0 overcloud-controller-0" -> "all_stopped" [ style = bold]
 "overcloud-novacompute-0_stop_0 overcloud-controller-0" [ style=bold color="green" fontcolor="black"]
-"stonith 'reboot' overcloud-novacompute-0" -> "stonith_complete" [ style = bold]
-"stonith 'reboot' overcloud-novacompute-0" [ style=bold color="green" fontcolor="black"]
-"stonith_complete" -> "all_stopped" [ style = bold]
-"stonith_complete" [ style=bold color="green" fontcolor="orange"]
 }
diff --git a/pengine/test10/remote-fence-unclean-3.exp b/pengine/test10/remote-fence-unclean-3.exp
index 2e341bd..64e5a62 100644
--- a/pengine/test10/remote-fence-unclean-3.exp
+++ b/pengine/test10/remote-fence-unclean-3.exp
@@ -21,9 +21,6 @@
     </action_set>
     <inputs>
       <trigger>
-        <pseudo_event id="42" operation="all_stopped" operation_key="all_stopped"/>
-      </trigger>
-      <trigger>
         <rsc_op id="43" operation="monitor" operation_key="fence1_monitor_0" on_node="overcloud-controller-0" on_node_uuid="1"/>
       </trigger>
       <trigger>
@@ -75,29 +72,6 @@
   </synapse>
   <synapse id="6">
     <action_set>
-      <pseudo_event id="209" operation="stonith_complete" operation_key="stonith_complete">
-        <attributes />
-      </pseudo_event>
-    </action_set>
-    <inputs>
-      <trigger>
-        <crm_event id="46" operation="stonith" operation_key="stonith-overcloud-novacompute-0-reboot" on_node="overcloud-novacompute-0" on_node_uuid="overcloud-novacompute-0"/>
-      </trigger>
-    </inputs>
-  </synapse>
-  <synapse id="7">
-    <action_set>
-      <crm_event id="46" operation="stonith" operation_key="stonith-overcloud-novacompute-0-reboot" on_node="overcloud-novacompute-0" on_node_uuid="overcloud-novacompute-0">
-        <attributes CRM_meta_compute_role="true" CRM_meta_on_node="overcloud-novacompute-0" CRM_meta_on_node_uuid="overcloud-novacompute-0" CRM_meta_stonith_action="reboot" />
-        <downed>
-          <node id="overcloud-novacompute-0"/>
-        </downed>
-      </crm_event>
-    </action_set>
-    <inputs/>
-  </synapse>
-  <synapse id="8">
-    <action_set>
       <pseudo_event id="42" operation="all_stopped" operation_key="all_stopped">
         <attributes />
       </pseudo_event>
@@ -106,9 +80,6 @@
       <trigger>
         <rsc_op id="30" operation="stop" operation_key="overcloud-novacompute-0_stop_0" on_node="overcloud-controller-0" on_node_uuid="1"/>
       </trigger>
-      <trigger>
-        <pseudo_event id="209" operation="stonith_complete" operation_key="stonith_complete"/>
-      </trigger>
     </inputs>
   </synapse>
 </transition_graph>
diff --git a/pengine/test10/remote-fence-unclean-3.summary b/pengine/test10/remote-fence-unclean-3.summary
index ec24500..ec54d8e 100644
--- a/pengine/test10/remote-fence-unclean-3.summary
+++ b/pengine/test10/remote-fence-unclean-3.summary
@@ -34,7 +34,6 @@ Containers: [ galera-bundle-0:galera-bundle-docker-0 galera-bundle-1:galera-bund
    openstack-cinder-backup-docker-0	(ocf::heartbeat:docker):	Started overcloud-controller-1
 
 Transition Summary:
- * Fence (reboot) overcloud-novacompute-0 'the connection is unrecoverable'
  * Start   fence1	(overcloud-controller-0)
  * Stop    overcloud-novacompute-0	(overcloud-controller-0)
 
@@ -43,8 +42,6 @@ Executing cluster transition:
  * Resource action: fence1          monitor on overcloud-controller-1
  * Resource action: fence1          monitor on overcloud-controller-0
  * Resource action: overcloud-novacompute-0 stop on overcloud-controller-0
- * Fencing overcloud-novacompute-0 (reboot)
- * Pseudo action:   stonith_complete
  * Pseudo action:   all_stopped
  * Resource action: fence1          start on overcloud-controller-0
  * Resource action: fence1          monitor=60000 on overcloud-controller-0
-- 
1.8.3.1