Blob Blame History Raw
From f1b715c3f0f222cbf6bf07733792a16c6442a085 Mon Sep 17 00:00:00 2001
From: Numan Siddique <numans@ovn.org>
Date: Fri, 13 Nov 2020 23:44:10 +0530
Subject: [PATCH 01/16] Provide the option to pin ovn-controller and ovn-northd
 to a specific version.

OVN recommends updating/upgrading ovn-controllers first and then
ovn-northd and OVN DB ovsdb-servers.  This is to ensure that any
new functionality specified by the database or logical flows created
by ovn-northd is understood by ovn-controller.

However certain deployments may upgrade ovn-northd services first and
then ovn-controllers.  In a large scal deployment, this can result in
downtime during upgrades as old ovn-controllers may not understand
new logical flows or new actions added by ovn-northd.

Even upgrading ovn-controllers first can result in ovn-controllers
rejecting some of the logical flows if an existing OVN action is
changed.  One such example is ct_commit action which recently was updated
to take new arguments.

To avoid such downtimes during upgrades, this patch adds the
functionality of pinning ovn-controller and ovn-northd to a specific
version.  An internal OVN version is generated and this version is stored
by ovn-northd in the Southbound SB_Global table's
options:northd_internal_version.

When ovn-controller notices that the internal version has changed, it
stops handling the database changes - both Southbound and OVS. All the
existing OF flows are preserved.  When ovn-controller is upgraded to the
same version as ovn-northd services, it will process the database
changes.

This feature is made optional and disabled by default.  A CMS can
enable it by configuring the OVS local database with the option -
ovn-match-northd-version=true.

Change-Id: I5f6c693953db1a5532b62cf0a7d588f78bb353c1
Acked-by: Mark Michelson <mmichels@redhat.com>
Signed-off-by: Numan Siddique <numans@ovn.org>
---
 .../contributing/submitting-patches.rst       |  12 ++
 NEWS                                          |   3 +
 controller/ovn-controller.8.xml               |  11 ++
 controller/ovn-controller.c                   |  52 ++++++-
 include/ovn/actions.h                         |   4 +
 lib/ovn-util.c                                |  14 ++
 lib/ovn-util.h                                |   4 +
 northd/ovn-northd.c                           |  18 ++-
 tests/ovn.at                                  | 147 ++++++++++++++++++
 9 files changed, 260 insertions(+), 5 deletions(-)

diff --git a/Documentation/internals/contributing/submitting-patches.rst b/Documentation/internals/contributing/submitting-patches.rst
index 0a9de5866..31a3ca747 100644
--- a/Documentation/internals/contributing/submitting-patches.rst
+++ b/Documentation/internals/contributing/submitting-patches.rst
@@ -397,6 +397,18 @@ Remember to follow-up and actually remove the feature from OVN codebase once
 deprecation grace period has expired and users had opportunity to use at least
 one OVN release that would have informed them about feature deprecation!
 
+OVN upgrades
+------------
+
+If the patch introduces any new OVN actions or updates existing OVN actions,
+then make sure to check the function ovn_get_internal_version() in
+lib/ovn-util.c and increment the macro - OVN_INTERNAL_MINOR_VER.
+
+Adding new OVN actions or changing existing OVN actions can have datapath
+disruptions during OVN upgrades. To minimize disruptions, OVN supports
+version matching between ovn-northd and ovn-controller and it is important
+to update the internal OVN version when the patch introduces such changes.
+
 Comments
 --------
 
diff --git a/NEWS b/NEWS
index 46140208f..5968ca341 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,9 @@ Post-v20.09.0
 ---------------------
    - Support other_config:vlan-passthru=true to allow VLAN tagged incoming
      traffic.
+   - Support version pinning between ovn-northd and ovn-controller as an
+     option. If the option is enabled and the versions don't match,
+     ovn-controller will not process any DB changes.
 
 OVN v20.09.0 - 28 Sep 2020
 --------------------------
diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml
index 16bc47b20..b5c0800f0 100644
--- a/controller/ovn-controller.8.xml
+++ b/controller/ovn-controller.8.xml
@@ -233,6 +233,17 @@
         The boolean flag indicates if the chassis is used as an
         interconnection gateway.
       </dd>
+
+      <dt><code>external_ids:ovn-match-northd-version</code></dt>
+      <dd>
+        The boolean flag indicates if <code>ovn-controller</code> needs to
+        check <code>ovn-northd</code> version. If this
+        flag is set to true and the <code>ovn-northd's</code> version (reported
+        in the Southbound database) doesn't match with the
+        <code>ovn-controller's</code> internal version, then it will stop
+        processing the southbound and local Open vSwitch database changes.
+        The default value is considered false if this option is not defined.
+      </dd>
     </dl>
 
     <p>
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index e5479cf3e..faae787f3 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -2159,6 +2159,49 @@ struct ovn_controller_exit_args {
     bool *restart;
 };
 
+/* Returns false if the northd internal version stored in SB_Global
+ * and ovn-controller internal version don't match.
+ */
+static bool
+check_northd_version(struct ovsdb_idl *ovs_idl, struct ovsdb_idl *ovnsb_idl,
+                     const char *version)
+{
+    static bool version_mismatch;
+
+    const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(ovs_idl);
+    if (!cfg || !smap_get_bool(&cfg->external_ids, "ovn-match-northd-version",
+                               false)) {
+        version_mismatch = false;
+        return true;
+    }
+
+    const struct sbrec_sb_global *sb = sbrec_sb_global_first(ovnsb_idl);
+    if (!sb) {
+        version_mismatch = true;
+        return false;
+    }
+
+    const char *northd_version =
+        smap_get_def(&sb->options, "northd_internal_version", "");
+
+    if (strcmp(northd_version, version)) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+        VLOG_WARN_RL(&rl, "controller version - %s mismatch with northd "
+                     "version - %s", version, northd_version);
+        version_mismatch = true;
+        return false;
+    }
+
+    /* If there used to be a mismatch and ovn-northd got updated, force a
+     * full recompute.
+     */
+    if (version_mismatch) {
+        engine_set_force_recompute(true);
+    }
+    version_mismatch = false;
+    return true;
+}
+
 int
 main(int argc, char *argv[])
 {
@@ -2453,6 +2496,9 @@ main(int argc, char *argv[])
         .enable_lflow_cache = true
     };
 
+    char *ovn_version = ovn_get_internal_version();
+    VLOG_INFO("OVN internal version is : [%s]", ovn_version);
+
     /* Main loop. */
     exiting = false;
     restart = false;
@@ -2506,7 +2552,9 @@ main(int argc, char *argv[])
 
         engine_set_context(&eng_ctx);
 
-        if (ovsdb_idl_has_ever_connected(ovnsb_idl_loop.idl)) {
+        if (ovsdb_idl_has_ever_connected(ovnsb_idl_loop.idl) &&
+            check_northd_version(ovs_idl_loop.idl, ovnsb_idl_loop.idl,
+                                 ovn_version)) {
             /* Contains the transport zones that this Chassis belongs to */
             struct sset transport_zones = SSET_INITIALIZER(&transport_zones);
             sset_from_delimited_string(&transport_zones,
@@ -2795,6 +2843,7 @@ loop_done:
         }
     }
 
+    free(ovn_version);
     unixctl_server_destroy(unixctl);
     lflow_destroy();
     ofctrl_destroy();
@@ -2847,6 +2896,7 @@ parse_options(int argc, char *argv[])
 
         case 'V':
             ovs_print_version(OFP15_VERSION, OFP15_VERSION);
+            printf("SB DB Schema %s\n", sbrec_get_db_version());
             exit(EXIT_SUCCESS);
 
         VLOG_OPTION_HANDLERS
diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index 630bbe79e..7ba24cd60 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -48,6 +48,10 @@ struct ovn_extend_table;
  *    action.  Its first member must have type "struct ovnact" and name
  *    "ovnact".  The structure must have a fixed length, that is, it may not
  *    end with a flexible array member.
+ *
+ * These OVNACTS are used to generate the OVN internal version.   See
+ * ovn_get_internal_version() in lib/ovn-util.c.  If any OVNACT is updated,
+ * increment the OVN_INTERNAL_MINOR_VER macro in lib/ovn-util.c.
  */
 #define OVNACTS                                       \
     OVNACT(OUTPUT,            ovnact_null)            \
diff --git a/lib/ovn-util.c b/lib/ovn-util.c
index 8722f7a48..e94a16422 100644
--- a/lib/ovn-util.c
+++ b/lib/ovn-util.c
@@ -20,6 +20,7 @@
 #include <unistd.h>
 
 #include "daemon.h"
+#include "include/ovn/actions.h"
 #include "openvswitch/ofp-parse.h"
 #include "openvswitch/vlog.h"
 #include "ovn-dirs.h"
@@ -720,3 +721,16 @@ ip_address_and_port_from_lb_key(const char *key, char **ip_address,
     *addr_family = ss.ss_family;
     return true;
 }
+
+/* Increment this for any logical flow changes or if existing OVN action is
+ * modified. */
+#define OVN_INTERNAL_MINOR_VER 0
+
+/* Returns the OVN version. The caller must free the returned value. */
+char *
+ovn_get_internal_version(void)
+{
+    return xasprintf("%s-%s-%d.%d", OVN_PACKAGE_VERSION,
+                     sbrec_get_db_version(),
+                     N_OVNACTS, OVN_INTERNAL_MINOR_VER);
+}
diff --git a/lib/ovn-util.h b/lib/ovn-util.h
index f72a801df..aa737a20c 100644
--- a/lib/ovn-util.h
+++ b/lib/ovn-util.h
@@ -231,4 +231,8 @@ char *str_tolower(const char *orig);
 bool ip_address_and_port_from_lb_key(const char *key, char **ip_address,
                                      uint16_t *port, int *addr_family);
 
+/* Returns the internal OVN version. The caller must free the returned
+ * value. */
+char *ovn_get_internal_version(void);
+
 #endif
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index bb31e04fa..3884e08eb 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -11939,7 +11939,8 @@ ovnnb_db_run(struct northd_context *ctx,
              struct ovsdb_idl_loop *sb_loop,
              struct hmap *datapaths, struct hmap *ports,
              struct ovs_list *lr_list,
-             int64_t loop_start_time)
+             int64_t loop_start_time,
+             const char *ovn_internal_version)
 {
     if (!ctx->ovnsb_txn || !ctx->ovnnb_txn) {
         return;
@@ -12016,6 +12017,8 @@ ovnnb_db_run(struct northd_context *ctx,
     smap_replace(&options, "max_tunid", max_tunid);
     free(max_tunid);
 
+    smap_replace(&options, "northd_internal_version", ovn_internal_version);
+
     nbrec_nb_global_verify_options(nb);
     nbrec_nb_global_set_options(nb, &options);
 
@@ -12626,7 +12629,8 @@ ovnsb_db_run(struct northd_context *ctx,
 static void
 ovn_db_run(struct northd_context *ctx,
            struct ovsdb_idl_index *sbrec_chassis_by_name,
-           struct ovsdb_idl_loop *ovnsb_idl_loop)
+           struct ovsdb_idl_loop *ovnsb_idl_loop,
+           const char *ovn_internal_version)
 {
     struct hmap datapaths, ports;
     struct ovs_list lr_list;
@@ -12636,7 +12640,8 @@ ovn_db_run(struct northd_context *ctx,
 
     int64_t start_time = time_wall_msec();
     ovnnb_db_run(ctx, sbrec_chassis_by_name, ovnsb_idl_loop,
-                 &datapaths, &ports, &lr_list, start_time);
+                 &datapaths, &ports, &lr_list, start_time,
+                 ovn_internal_version);
     ovnsb_db_run(ctx, ovnsb_idl_loop, &ports, start_time);
     destroy_datapaths_and_ports(&datapaths, &ports, &lr_list);
 }
@@ -13003,6 +13008,9 @@ main(int argc, char *argv[])
     unixctl_command_register("sb-connection-status", "", 0, 0,
                              ovn_conn_show, ovnsb_idl_loop.idl);
 
+    char *ovn_internal_version = ovn_get_internal_version();
+    VLOG_INFO("OVN internal version is : [%s]", ovn_internal_version);
+
     /* Main loop. */
     exiting = false;
     state.had_lock = false;
@@ -13044,7 +13052,8 @@ main(int argc, char *argv[])
             }
 
             if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
-                ovn_db_run(&ctx, sbrec_chassis_by_name, &ovnsb_idl_loop);
+                ovn_db_run(&ctx, sbrec_chassis_by_name, &ovnsb_idl_loop,
+                           ovn_internal_version);
                 if (ctx.ovnsb_txn) {
                     check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
                     check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
@@ -13106,6 +13115,7 @@ main(int argc, char *argv[])
         }
     }
 
+    free(ovn_internal_version);
     unixctl_server_destroy(unixctl);
     ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
     ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
diff --git a/tests/ovn.at b/tests/ovn.at
index 8f18ca9e5..f771b7563 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -23235,3 +23235,150 @@ OVS_WAIT_UNTIL(
 
 OVN_CLEANUP([hv1], [hv2])
 AT_CLEANUP
+
+AT_SETUP([ovn -- check ovn-northd and ovn-controller version pinning])
+ovn_start
+
+net_add n1
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.10
+
+check ovn-nbctl ls-add sw0
+check ovn-nbctl lsp-add sw0 sw0-p1
+check ovn-nbctl lsp-add sw0 sw0-p2
+
+as hv1
+ovs-vsctl \
+    -- add-port br-int vif1 \
+    -- set Interface vif1 external_ids:iface-id=sw0-p1 \
+    ofport-request=1
+ovs-vsctl \
+    -- add-port br-int vif2 \
+    -- set Interface vif2 external_ids:iface-id=sw0-p2 \
+    ofport-request=2
+
+# Wait for port to be bound.
+wait_row_count Chassis 1 name=hv1
+ch=$(fetch_column Chassis _uuid name=hv1)
+wait_row_count Port_Binding 1 logical_port=sw0-p1 chassis=$ch
+wait_row_count Port_Binding 1 logical_port=sw0-p2 chassis=$ch
+
+northd_version=$(ovn-sbctl get SB_Global . options:northd_internal_version | sed s/\"//g)
+echo "northd version = $northd_version"
+AT_CHECK([grep -c $northd_version hv1/ovn-controller.log], [0], [1
+])
+
+# Stop ovn-northd so that we can modify the northd_version.
+as northd
+OVS_APP_EXIT_AND_WAIT([ovn-northd])
+
+as northd-backup
+OVS_APP_EXIT_AND_WAIT([ovn-northd])
+
+check ovn-sbctl set SB_Global . options:northd_internal_version=foo
+
+as hv1
+check ovs-vsctl set interface vif2 external_ids:iface-id=foo
+
+# ovn-controller should release the lport sw0-p2 since ovn-match-northd-version
+# is not true.
+wait_row_count Port_Binding 1 logical_port=sw0-p2 'chassis=[[]]'
+
+as hv1 ovs-ofctl dump-flows br-int table=0 > offlows_table0.txt
+AT_CAPTURE_FILE([offlows_table0.txt])
+AT_CHECK_UNQUOTED([grep -c "in_port=2" offlows_table0.txt], [1], [dnl
+0
+])
+
+echo
+echo "__file__:__line__: Pin ovn-controller to ovn-northd version."
+
+as hv1
+check ovs-vsctl set open . external_ids:ovn-match-northd-version=true
+
+OVS_WAIT_UNTIL(
+    [test 1 = $(grep -c "controller version - $northd_version mismatch with northd version - foo" hv1/ovn-controller.log)
+])
+
+as hv1
+check ovs-vsctl set interface vif2 external_ids:iface-id=sw0-p2
+
+# ovn-controller should not claim sw0-p2 since there is version mismatch
+as hv1 ovn-appctl -t ovn-controller recompute
+wait_row_count Port_Binding 1 logical_port=sw0-p2 'chassis=[[]]'
+
+as hv1 ovs-ofctl dump-flows br-int table=0 > offlows_table0.txt
+AT_CAPTURE_FILE([offlows_table0.txt])
+AT_CHECK_UNQUOTED([grep -c "in_port=2" offlows_table0.txt], [1], [dnl
+0
+])
+
+check ovn-sbctl set SB_Global . options:northd_internal_version=$northd_version
+
+# It should claim sw0-p2
+wait_row_count Port_Binding 1 logical_port=sw0-p2 chassis=$ch
+
+as hv1 ovs-ofctl dump-flows br-int table=0 > offlows_table0.txt
+AT_CAPTURE_FILE([offlows_table0.txt])
+AT_CHECK_UNQUOTED([grep -c "in_port=2" offlows_table0.txt], [0], [dnl
+1
+])
+
+as hv1
+ovn_remote=$(ovs-vsctl get open . external_ids:ovn-remote | sed s/\"//g)
+ovs-vsctl set open . external_ids:ovn-remote=unix:foo
+check ovs-vsctl set interface vif2 external_ids:iface-id=foo
+
+# ovn-controller is not connected to the SB DB. Even though it
+# releases sw0-p2, it will not delete the OF flows.
+as hv1 ovs-ofctl dump-flows br-int table=0 > offlows_table0.txt
+AT_CAPTURE_FILE([offlows_table0.txt])
+AT_CHECK_UNQUOTED([grep -c "in_port=2" offlows_table0.txt], [0], [dnl
+1
+])
+
+# Change the version to incorrect one and reconnect to the SB DB.
+check ovn-sbctl set SB_Global . options:northd_internal_version=bar
+
+as hv1
+check ovs-vsctl set open . external_ids:ovn-remote=$ovn_remote
+
+sleep 1
+
+as hv1 ovs-ofctl dump-flows br-int table=0 > offlows_table0.txt
+AT_CAPTURE_FILE([offlows_table0.txt])
+AT_CHECK_UNQUOTED([grep -c "in_port=2" offlows_table0.txt], [0], [dnl
+1
+])
+
+wait_row_count Port_Binding 1 logical_port=sw0-p2 chassis=$ch
+
+# Change the ovn-remote to incorrect and set the correct northd version
+# and then change back to the correct ovn-remote
+as hv1
+check ovs-vsctl set open . external_ids:ovn-remote=unix:foo
+
+check ovn-sbctl set SB_Global . options:northd_internal_version=$northd_version
+
+as hv1
+check ovs-vsctl set open . external_ids:ovn-remote=$ovn_remote
+
+wait_row_count Port_Binding 1 logical_port=sw0-p2 'chassis=[[]]'
+as hv1 ovs-ofctl dump-flows br-int table=0 > offlows_table0.txt
+AT_CAPTURE_FILE([offlows_table0.txt])
+AT_CHECK_UNQUOTED([grep -c "in_port=2" offlows_table0.txt], [1], [dnl
+0
+])
+
+as hv1
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+AT_CLEANUP
-- 
2.28.0