Blob Blame History Raw
From 536d6aa32497ab17e12767446f294fc8467cfc7c Mon Sep 17 00:00:00 2001
From: Dumitru Ceara <dceara@redhat.com>
Date: Thu, 3 Sep 2020 17:03:45 +0200
Subject: [PATCH 1/2] chassis: Fix the way encaps are updated for a chassis
 record.

ovn-controller always stores the last configured system-id/chassis-id in
memory regardless if the connection to the SB is up or down. This is OK
as long as the change can be committed successfully when the SB DB
connection comes back up.

Without this change, if the chassis-id changes while the SB connection is
down, ovn-controller will fail to create the new record but nevertheless
update its in-memory chassis-id. When the SB connection is restored
ovn-controller tries to find the record corresponding to the chassis-id
it stored in memory. This fails causing ovn-controller to try to insert
a new record. But at this point a constraint violation is hit in the SB
because the Encap records of the "stale" chassis still exist in the DB,
along with the old chassis record.

This commit changes the way we search for a "stale" chassis record in the
SB to cover the above mentioned case. Also, in such cases there's no need
to recreate the Encaps, it's sufficient to update the chassis_name field.

Fixes: 5344f24ecb1a ("ovn-controller: Refactor chassis.c to abstract the string parsing")
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Numan Siddique <numans@ovn.org>

(cherry-picked from master commit 94a32fca2d2b825fece0ef5b1873459bd9857dd3)

(cherry picked from upstream commit 0716c4f4cf2d97f03c1f2e5099fece92f3183d43)

Change-Id: I20f0c61fddffba599b16f43d15b110414a6a6e6b
---
 controller/chassis.c    | 60 +++++++++++++++++++++++++++++++------------------
 tests/ovn-controller.at | 17 ++++++++++++++
 2 files changed, 55 insertions(+), 22 deletions(-)

diff --git a/controller/chassis.c b/controller/chassis.c
index 6ac591e..773d966 100644
--- a/controller/chassis.c
+++ b/controller/chassis.c
@@ -397,10 +397,7 @@ chassis_tunnels_changed(const struct sset *encap_type_set,
 {
     size_t encap_type_count = 0;
 
-    for (int i = 0; i < chassis_rec->n_encaps; i++) {
-        if (strcmp(chassis_rec->name, chassis_rec->encaps[i]->chassis_name)) {
-            return true;
-        }
+    for (size_t i = 0; i < chassis_rec->n_encaps; i++) {
 
         if (!sset_contains(encap_type_set, chassis_rec->encaps[i]->type)) {
             return true;
@@ -473,6 +470,19 @@ chassis_build_encaps(struct ovsdb_idl_txn *ovnsb_idl_txn,
 }
 
 /*
+ * Updates encaps for a given chassis. This can happen when the chassis
+ * name has changed. Also, the only thing we support updating is the
+ * chassis_name. For other changes the encaps will be recreated.
+ */
+static void
+chassis_update_encaps(const struct sbrec_chassis *chassis)
+{
+    for (size_t i = 0; i < chassis->n_encaps; i++) {
+        sbrec_encap_set_chassis_name(chassis->encaps[i], chassis->name);
+    }
+}
+
+/*
  * Returns a pointer to a chassis record from 'chassis_table' that
  * matches at least one tunnel config.
  */
@@ -503,9 +513,10 @@ chassis_get_stale_record(const struct sbrec_chassis_table *chassis_table,
 /* If this is a chassis config update after we initialized the record once
  * then we should always be able to find it with the ID we saved in
  * chassis_state.
- * Otherwise (i.e., first time we create the record) then we check if there's
- * a stale record from a previous controller run that didn't end gracefully
- * and reuse it. If not then we create a new record.
+ * Otherwise (i.e., first time we create the record or if the system-id
+ * changed) then we check if there's a stale record from a previous
+ * controller run that didn't end gracefully and reuse it. If not then we
+ * create a new record.
  *
  * Sets '*chassis_rec' to point to the local chassis record.
  * Returns true if this record was already in the database, false if it was
@@ -519,28 +530,32 @@ chassis_get_record(struct ovsdb_idl_txn *ovnsb_idl_txn,
                    const char *chassis_id,
                    const struct sbrec_chassis **chassis_rec)
 {
+    const struct sbrec_chassis *chassis = NULL;
+
     if (chassis_info_id_inited(&chassis_state)) {
-        *chassis_rec = chassis_lookup_by_name(sbrec_chassis_by_name,
-                                              chassis_info_id(&chassis_state));
-        if (!(*chassis_rec)) {
-            VLOG_DBG("Could not find Chassis, will create it"
-                     ": stored (%s) ovs (%s)",
+        chassis = chassis_lookup_by_name(sbrec_chassis_by_name,
+                                         chassis_info_id(&chassis_state));
+        if (!chassis) {
+            VLOG_DBG("Could not find Chassis, will check if the id changed: "
+                     "stored (%s) ovs (%s)",
                      chassis_info_id(&chassis_state), chassis_id);
-            if (ovnsb_idl_txn) {
-                /* Recreate the chassis record.  */
-                *chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
-                return false;
-            }
         }
-    } else {
-        *chassis_rec =
-            chassis_get_stale_record(chassis_table, ovs_cfg, chassis_id);
+    }
 
-        if (!(*chassis_rec) && ovnsb_idl_txn) {
+    if (!chassis) {
+        chassis = chassis_get_stale_record(chassis_table, ovs_cfg, chassis_id);
+    }
+
+    if (!chassis) {
+        /* Recreate the chassis record. */
+        VLOG_DBG("Could not find Chassis, will create it: %s", chassis_id);
+        if (ovnsb_idl_txn) {
             *chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
-            return false;
         }
+        return false;
     }
+
+    *chassis_rec = chassis;
     return true;
 }
 
@@ -602,6 +617,7 @@ chassis_update(const struct sbrec_chassis *chassis_rec,
                                 &ovs_cfg->encap_ip_set, ovs_cfg->encap_csum,
                                 chassis_rec);
     if (!tunnels_changed) {
+        chassis_update_encaps(chassis_rec);
         return updated;
     }
 
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index 1b96934..f2faf1f 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -196,6 +196,23 @@ OVS_WAIT_UNTIL([
     test "${sysid}" = "${chassis_id}"
 ])
 
+# Simulate system-id changing while ovn-controller is disconnected from the
+# SB.
+valid_remote=$(ovs-vsctl get Open_vSwitch . external_ids:ovn-remote)
+invalid_remote=tcp:0.0.0.0:4242
+ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${invalid_remote}
+expected_state="not connected"
+OVS_WAIT_UNTIL([
+    test "${expected_state}" = "$(ovn-appctl -t ovn-controller connection-status)"
+])
+sysid=${sysid}-bar
+ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}"
+ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${valid_remote}
+OVS_WAIT_UNTIL([
+    chassis_id=$(ovn-sbctl get Chassis "${sysid}" name)
+    test "${sysid}" = "${chassis_id}"
+])
+
 # Gracefully terminate daemons
 OVN_CLEANUP_SBOX([hv])
 OVN_CLEANUP_VSWITCH([main])
-- 
1.8.3.1