From 308df38edabc40221b9ce293e73ea4ac71eb965e Mon Sep 17 00:00:00 2001
From: Numan Siddique <numans@ovn.org>
Date: Tue, 5 Nov 2019 23:11:56 +0530
Subject: [PATCH ovn] Fix ha chassis failover issues for stale ha chassis
entries
If ha chassis rows of an HA chassis group become stale i.e the HA_Chassis.chassis
column is empty (because ovn-controller is not running in that chassis)
except one row and when ha_chassis_group_is_active()
is called on that ovn-controller, then it returns false. Ideally it should
become active since its the only active chassis. This patch fixes this issue.
Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1762777
Reported-by: Daniel Alvarez <dalvarez@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Numan Siddique <numans@ovn.org>
---
ovn/controller/ha-chassis.c | 25 +++++++++++++++++++++++++
tests/ovn.at | 20 +++++++++++++++++++-
2 files changed, 44 insertions(+), 1 deletion(-)
diff --git a/ovn/controller/ha-chassis.c b/ovn/controller/ha-chassis.c
index 6d9426a..d6ec7b6 100644
--- a/ovn/controller/ha-chassis.c
+++ b/ovn/controller/ha-chassis.c
@@ -142,6 +142,27 @@ ha_chassis_destroy_ordered(struct ha_chassis_ordered *ordered_ha_ch)
}
}
+/* Returns true if there is only one active ha chassis in the chassis group
+ * (i.e HA_Chassis.chassis column is set) and that active ha chassis is
+ * local chassis.
+ * Returns false otherwise. */
+static bool
+is_local_chassis_only_candidate(const struct sbrec_ha_chassis_group *ha_ch_grp,
+ const struct sbrec_chassis *local_chassis)
+{
+ size_t n_active_ha_chassis = 0;
+ bool local_chassis_present = false;
+ for (size_t i = 0; i < ha_ch_grp->n_ha_chassis; i++) {
+ if (ha_ch_grp->ha_chassis[i]->chassis) {
+ n_active_ha_chassis++;
+ if (ha_ch_grp->ha_chassis[i]->chassis == local_chassis) {
+ local_chassis_present = true;
+ }
+ }
+ }
+
+ return (local_chassis_present && n_active_ha_chassis == 1);
+}
/* Returns true if the local_chassis is the master of
* the HA chassis group, false otherwise. */
@@ -159,6 +180,10 @@ ha_chassis_group_is_active(
return (ha_ch_grp->ha_chassis[0]->chassis == local_chassis);
}
+ if (is_local_chassis_only_candidate(ha_ch_grp, local_chassis)) {
+ return true;
+ }
+
if (sset_is_empty(active_tunnels)) {
/* If active tunnel sset is empty, it means it has lost
* connectivity with other chassis. */
diff --git a/tests/ovn.at b/tests/ovn.at
index 410f4b5..cb7903d 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -13413,7 +13413,25 @@ OVS_WAIT_UNTIL(
logical_port=ls1-lp_ext1`
test "$chassis" = "$hv1_uuid"])
-OVN_CLEANUP([hv1],[hv2],[hv3])
+# Stop ovn-controllers on hv1 and hv3.
+as hv1 ovs-appctl -t ovn-controller exit
+as hv3 ovs-appctl -t ovn-controller exit
+
+# hv2 should be master and claim ls1-lp_ext1
+OVS_WAIT_UNTIL(
+ [chassis=`ovn-sbctl --bare --columns chassis find port_binding \
+logical_port=ls1-lp_ext1`
+ test "$chassis" = "$hv2_uuid"])
+
+as hv1
+OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as hv3
+OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+OVN_CLEANUP([hv2])
AT_CLEANUP
AT_SETUP([ovn -- Address Set Incremental Processing])
--
1.8.3.1