bbaaef
From 308df38edabc40221b9ce293e73ea4ac71eb965e Mon Sep 17 00:00:00 2001
bbaaef
From: Numan Siddique <numans@ovn.org>
bbaaef
Date: Tue, 5 Nov 2019 23:11:56 +0530
bbaaef
Subject: [PATCH ovn] Fix ha chassis failover issues for stale ha chassis
bbaaef
 entries
bbaaef
bbaaef
If ha chassis rows of an HA chassis group become stale i.e the HA_Chassis.chassis
bbaaef
column is empty (because ovn-controller is not running in that chassis)
bbaaef
except one row and when ha_chassis_group_is_active()
bbaaef
is called on that ovn-controller, then it returns false. Ideally it should
bbaaef
become active since its the only active chassis. This patch fixes this issue.
bbaaef
bbaaef
Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1762777
bbaaef
Reported-by: Daniel Alvarez <dalvarez@redhat.com>
bbaaef
bbaaef
Acked-by: Dumitru Ceara <dceara@redhat.com>
bbaaef
Signed-off-by: Numan Siddique <numans@ovn.org>
bbaaef
---
bbaaef
 ovn/controller/ha-chassis.c | 25 +++++++++++++++++++++++++
bbaaef
 tests/ovn.at                | 20 +++++++++++++++++++-
bbaaef
 2 files changed, 44 insertions(+), 1 deletion(-)
bbaaef
bbaaef
diff --git a/ovn/controller/ha-chassis.c b/ovn/controller/ha-chassis.c
bbaaef
index 6d9426a..d6ec7b6 100644
bbaaef
--- a/ovn/controller/ha-chassis.c
bbaaef
+++ b/ovn/controller/ha-chassis.c
bbaaef
@@ -142,6 +142,27 @@ ha_chassis_destroy_ordered(struct ha_chassis_ordered *ordered_ha_ch)
bbaaef
     }
bbaaef
 }
bbaaef
 
bbaaef
+/* Returns true if there is only one active ha chassis in the chassis group
bbaaef
+ * (i.e HA_Chassis.chassis column is set) and that active ha chassis is
bbaaef
+ * local chassis.
bbaaef
+ * Returns false otherwise. */
bbaaef
+static bool
bbaaef
+is_local_chassis_only_candidate(const struct sbrec_ha_chassis_group *ha_ch_grp,
bbaaef
+                                const struct sbrec_chassis *local_chassis)
bbaaef
+{
bbaaef
+    size_t n_active_ha_chassis = 0;
bbaaef
+    bool local_chassis_present = false;
bbaaef
+    for (size_t i = 0; i < ha_ch_grp->n_ha_chassis; i++) {
bbaaef
+        if (ha_ch_grp->ha_chassis[i]->chassis) {
bbaaef
+            n_active_ha_chassis++;
bbaaef
+            if (ha_ch_grp->ha_chassis[i]->chassis == local_chassis) {
bbaaef
+                local_chassis_present = true;
bbaaef
+            }
bbaaef
+        }
bbaaef
+    }
bbaaef
+
bbaaef
+    return (local_chassis_present && n_active_ha_chassis == 1);
bbaaef
+}
bbaaef
 
bbaaef
 /* Returns true if the local_chassis is the master of
bbaaef
  * the HA chassis group, false otherwise. */
bbaaef
@@ -159,6 +180,10 @@ ha_chassis_group_is_active(
bbaaef
         return (ha_ch_grp->ha_chassis[0]->chassis == local_chassis);
bbaaef
     }
bbaaef
 
bbaaef
+    if (is_local_chassis_only_candidate(ha_ch_grp, local_chassis)) {
bbaaef
+        return true;
bbaaef
+    }
bbaaef
+
bbaaef
     if (sset_is_empty(active_tunnels)) {
bbaaef
         /* If active tunnel sset is empty, it means it has lost
bbaaef
          * connectivity with other chassis. */
bbaaef
diff --git a/tests/ovn.at b/tests/ovn.at
bbaaef
index 410f4b5..cb7903d 100644
bbaaef
--- a/tests/ovn.at
bbaaef
+++ b/tests/ovn.at
bbaaef
@@ -13413,7 +13413,25 @@ OVS_WAIT_UNTIL(
bbaaef
 logical_port=ls1-lp_ext1`
bbaaef
     test "$chassis" = "$hv1_uuid"])
bbaaef
 
bbaaef
-OVN_CLEANUP([hv1],[hv2],[hv3])
bbaaef
+# Stop ovn-controllers on hv1 and hv3.
bbaaef
+as hv1 ovs-appctl -t ovn-controller exit
bbaaef
+as hv3 ovs-appctl -t ovn-controller exit
bbaaef
+
bbaaef
+# hv2 should be master and claim ls1-lp_ext1
bbaaef
+OVS_WAIT_UNTIL(
bbaaef
+    [chassis=`ovn-sbctl --bare --columns chassis find port_binding \
bbaaef
+logical_port=ls1-lp_ext1`
bbaaef
+    test "$chassis" = "$hv2_uuid"])
bbaaef
+
bbaaef
+as hv1
bbaaef
+OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
bbaaef
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
bbaaef
+
bbaaef
+as hv3
bbaaef
+OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
bbaaef
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
bbaaef
+
bbaaef
+OVN_CLEANUP([hv2])
bbaaef
 AT_CLEANUP
bbaaef
 
bbaaef
 AT_SETUP([ovn -- Address Set Incremental Processing])
bbaaef
-- 
bbaaef
1.8.3.1
bbaaef