diff --git a/.ovn.metadata b/.ovn.metadata index 6f36ca7..1121eb0 100644 --- a/.ovn.metadata +++ b/.ovn.metadata @@ -4,3 +4,4 @@ f56373e54eec629b9d6e88e8b1c0c880bd498809 SOURCES/ovn-20.12.0.tar.gz d34f96421a86004aa5d26ecf975edefd09f948b1 SOURCES/Pygments-1.4.tar.gz 6beb30f18ffac3de7689b7fd63e9a8a7d9c8df3a SOURCES/Sphinx-1.1.3.tar.gz +b7cb5bddcefce929e60e4533da84d13dc8ce4fd0 SOURCES/openvswitch-ac85cdb.tar.gz diff --git a/SOURCES/gen_config_group.sh b/SOURCES/gen_config_group.sh index 651a0c5..d1c06fe 100755 --- a/SOURCES/gen_config_group.sh +++ b/SOURCES/gen_config_group.sh @@ -207,10 +207,10 @@ do done popd >/dev/null -echo -n "For each arch ( " +printf "For each arch ( " for ((i=0; i < ${#OVS_DPDK_CONF_MACH_ARCH[@]}; i++)); do - echo -n "${OVS_DPDK_CONF_MACH_ARCH[i]} " + printf "${OVS_DPDK_CONF_MACH_ARCH[i]} " done echo "):" echo "1. ensure you enable the requisite hw" diff --git a/SOURCES/ovn-20.12.0.patch b/SOURCES/ovn-20.12.0.patch index b8ea6d5..be57187 100644 --- a/SOURCES/ovn-20.12.0.patch +++ b/SOURCES/ovn-20.12.0.patch @@ -196,7 +196,7 @@ index 65b1f4a40..cee99c63d 100644 By default all files are installed under ``/usr/local``. OVN expects to find its database in ``/usr/local/etc/ovn`` by default. diff --git a/Makefile.am b/Makefile.am -index 7ce3d27e4..04a6d7c63 100644 +index 7ce3d27e4..e6ed719ec 100644 --- a/Makefile.am +++ b/Makefile.am @@ -48,6 +48,8 @@ AM_CFLAGS = -Wstrict-prototypes @@ -227,16 +227,17 @@ index 7ce3d27e4..04a6d7c63 100644 SUFFIXES += .in .in: -@@ -216,6 +221,8 @@ dist-hook-git: distfiles +@@ -216,6 +221,9 @@ dist-hook-git: distfiles @if test -e $(srcdir)/.git && (git --version) >/dev/null 2>&1; then \ (cd $(srcdir) && git ls-files) | grep -v '\.gitignore$$' | \ grep -v '\.gitattributes$$' | \ + grep -v '\.gitmodules$$' | \ + grep -v "$(submodules)" | \ ++ grep -v 'redhat' | \ LC_ALL=C sort -u > all-gitfiles; \ LC_ALL=C comm -1 -3 distfiles all-gitfiles > missing-distfiles; \ if test -s missing-distfiles; then \ -@@ -247,8 +254,8 @@ ALL_LOCAL += config-h-check +@@ -247,8 +255,8 @@ ALL_LOCAL += config-h-check config-h-check: @cd $(srcdir); \ if test -e .git && (git --version) >/dev/null 2>&1 && \ @@ -247,7 +248,7 @@ index 7ce3d27e4..04a6d7c63 100644 then \ echo "See above for list of violations of the rule that"; \ echo "every C source file must #include ."; \ -@@ -261,8 +268,7 @@ ALL_LOCAL += printf-check +@@ -261,8 +269,7 @@ ALL_LOCAL += printf-check printf-check: @cd $(srcdir); \ if test -e .git && (git --version) >/dev/null 2>&1 && \ @@ -257,7 +258,7 @@ index 7ce3d27e4..04a6d7c63 100644 then \ echo "See above for list of violations of the rule that"; \ echo "'z', 't', 'j', 'hh' printf() type modifiers are"; \ -@@ -288,7 +294,7 @@ ALL_LOCAL += check-assert-h-usage +@@ -288,7 +295,7 @@ ALL_LOCAL += check-assert-h-usage check-assert-h-usage: @if test -e $(srcdir)/.git && (git --version) >/dev/null 2>&1 && \ (cd $(srcdir) && git --no-pager grep -l -E '[<]assert.h[>]') | \ @@ -266,7 +267,7 @@ index 7ce3d27e4..04a6d7c63 100644 then \ echo "Files listed above unexpectedly #include <""assert.h"">."; \ echo "Please use ovs_assert (from util.h) instead of assert."; \ -@@ -304,8 +310,7 @@ ALL_LOCAL += check-endian +@@ -304,8 +311,7 @@ ALL_LOCAL += check-endian check-endian: @if test -e $(srcdir)/.git && (git --version) >/dev/null 2>&1 && \ (cd $(srcdir) && git --no-pager grep -l -E \ @@ -276,19 +277,19 @@ index 7ce3d27e4..04a6d7c63 100644 then \ echo "See above for list of files that misuse LITTLE""_ENDIAN"; \ echo "or BIG""_ENDIAN. Please use WORDS_BIGENDIAN instead."; \ -@@ -329,9 +334,9 @@ check-tabs: +@@ -329,9 +335,9 @@ check-tabs: @cd $(srcdir); \ if test -e .git && (git --version) >/dev/null 2>&1 && \ grep -ln "^ " \ - `git ls-files \ -+ `git ls-files | grep -v $(submodules) \ ++ `git ls-files | grep -v $(submodules) | grep -v redhat \ | grep -v -f build-aux/initial-tab-whitelist` /dev/null \ - | $(EGREP) -v ':[ ]*/?\*'; \ + | $(EGREP) -v ':[ ]*/?\*'; \ then \ echo "See above for files that use tabs for indentation."; \ echo "Please use spaces instead."; \ -@@ -344,8 +349,7 @@ thread-safety-check: +@@ -344,8 +350,7 @@ thread-safety-check: @cd $(srcdir); \ if test -e .git && (git --version) >/dev/null 2>&1 && \ grep -n -f build-aux/thread-safety-blacklist \ @@ -298,7 +299,7 @@ index 7ce3d27e4..04a6d7c63 100644 | $(EGREP) -v ':[ ]*/?\*'; \ then \ echo "See above for list of calls to functions that are"; \ -@@ -361,7 +365,7 @@ ALL_LOCAL += check-ifconfig +@@ -361,7 +366,7 @@ ALL_LOCAL += check-ifconfig check-ifconfig: @if test -e $(srcdir)/.git && (git --version) >/dev/null 2>&1 && \ (cd $(srcdir) && git --no-pager grep -l -E -e 'ifconfig' | \ @@ -308,10 +309,10 @@ index 7ce3d27e4..04a6d7c63 100644 echo "See above for list of files that use or reference"; \ echo "'ifconfig'. Please use 'ip' instead."; \ diff --git a/NEWS b/NEWS -index f71ec329c..57a9ba939 100644 +index f71ec329c..ae34dab73 100644 --- a/NEWS +++ b/NEWS -@@ -1,3 +1,19 @@ +@@ -1,3 +1,24 @@ +Post-v20.12.0 +------------------------- + - Support ECMP multiple nexthops for reroute router policies. @@ -327,6 +328,11 @@ index f71ec329c..57a9ba939 100644 + - Add a new option to Load_Balancer.options, "hairpin_snat_ip", to allow + users to explicitly select which source IP should be used for load + balancer hairpin traffic. ++ - ovn-controller: Add configuration knobs, through OVS external-id ++ "ovn-limit-lflow-cache" and "ovn-memlimit-lflow-cache-kb", to allow ++ enforcing a limit for the size of the logical flow cache based on ++ maximum number of entries and/or memory usage. ++ - ovn-controller: Add lflow cache related memory reports. + OVN v20.12.0 - 18 Dec 2020 -------------------------- @@ -373,6 +379,18 @@ index 000000000..1b980df4f +./configure --enable-Werror --enable-sparse +make -j5 + +diff --git a/configure.ac b/configure.ac +index af952453b..ec5060f1a 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -96,6 +96,7 @@ OVN_CHECK_DOT + OVS_CHECK_IF_DL + OVS_CHECK_STRTOK_R + AC_CHECK_DECLS([sys_siglist], [], [], [[#include ]]) ++AC_CHECK_DECLS([malloc_trim], [], [], [[#include ]]) + AC_CHECK_MEMBERS([struct stat.st_mtim.tv_nsec, struct stat.st_mtimensec], + [], [], [[#include ]]) + AC_CHECK_MEMBERS([struct ifreq.ifr_flagshigh], [], [], [[#include ]]) diff --git a/controller-vtep/binding.c b/controller-vtep/binding.c index 83377157e..01d5a16d2 100644 --- a/controller-vtep/binding.c @@ -390,11 +408,61 @@ index 83377157e..01d5a16d2 100644 } } +diff --git a/controller-vtep/ovn-controller-vtep.c b/controller-vtep/ovn-controller-vtep.c +index c13280bc0..1d35c7f04 100644 +--- a/controller-vtep/ovn-controller-vtep.c ++++ b/controller-vtep/ovn-controller-vtep.c +@@ -25,9 +25,11 @@ + #include "compiler.h" + #include "daemon.h" + #include "dirs.h" ++#include "memory.h" + #include "openvswitch/dynamic-string.h" + #include "fatal-signal.h" + #include "openvswitch/poll-loop.h" ++#include "simap.h" + #include "stream.h" + #include "stream-ssl.h" + #include "unixctl.h" +@@ -99,12 +101,21 @@ main(int argc, char *argv[]) + .ovnsb_idl_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop), + }; + ++ memory_run(); ++ if (memory_should_report()) { ++ struct simap usage = SIMAP_INITIALIZER(&usage); ++ ++ /* Nothing special to report yet. */ ++ memory_report(&usage); ++ simap_destroy(&usage); ++ } + gateway_run(&ctx); + binding_run(&ctx); + vtep_run(&ctx); + unixctl_server_run(unixctl); + + unixctl_server_wait(unixctl); ++ memory_wait(); + if (exiting) { + poll_immediate_wake(); + } diff --git a/controller/automake.mk b/controller/automake.mk -index 45e1bdd36..9b8debd2f 100644 +index 45e1bdd36..2f6c50890 100644 --- a/controller/automake.mk +++ b/controller/automake.mk -@@ -18,6 +18,8 @@ controller_ovn_controller_SOURCES = \ +@@ -10,14 +10,20 @@ controller_ovn_controller_SOURCES = \ + controller/encaps.h \ + controller/ha-chassis.c \ + controller/ha-chassis.h \ ++ controller/if-status.c \ ++ controller/if-status.h \ + controller/ip-mcast.c \ + controller/ip-mcast.h \ + controller/lflow.c \ + controller/lflow.h \ ++ controller/lflow-cache.c \ ++ controller/lflow-cache.h \ + controller/lport.c \ controller/lport.h \ controller/ofctrl.c \ controller/ofctrl.h \ @@ -403,7 +471,7 @@ index 45e1bdd36..9b8debd2f 100644 controller/pinctrl.c \ controller/pinctrl.h \ controller/patch.c \ -@@ -25,7 +27,10 @@ controller_ovn_controller_SOURCES = \ +@@ -25,7 +31,10 @@ controller_ovn_controller_SOURCES = \ controller/ovn-controller.c \ controller/ovn-controller.h \ controller/physical.c \ @@ -416,18 +484,18 @@ index 45e1bdd36..9b8debd2f 100644 man_MANS += controller/ovn-controller.8 EXTRA_DIST += controller/ovn-controller.8.xml diff --git a/controller/binding.c b/controller/binding.c -index cb60c5d67..4e6c75696 100644 +index cb60c5d67..31f3a210f 100644 --- a/controller/binding.c +++ b/controller/binding.c -@@ -18,6 +18,7 @@ +@@ -16,6 +16,7 @@ + #include + #include "binding.h" #include "ha-chassis.h" ++#include "if-status.h" #include "lflow.h" #include "lport.h" -+#include "ofctrl-seqno.h" #include "patch.h" - - #include "lib/bitmap.h" -@@ -34,6 +35,38 @@ +@@ -34,6 +35,12 @@ VLOG_DEFINE_THIS_MODULE(binding); @@ -437,61 +505,475 @@ index cb60c5d67..4e6c75696 100644 + */ +#define OVN_INSTALLED_EXT_ID "ovn-installed" + -+/* Set of OVS interface IDs that have been released in the most recent -+ * processing iterations. This gets updated in release_lport() and is -+ * periodically emptied in binding_seqno_run(). -+ */ -+static struct sset binding_iface_released_set = -+ SSET_INITIALIZER(&binding_iface_released_set); + #define OVN_QOS_TYPE "linux-htb" + + struct qos_queue { +@@ -564,6 +571,23 @@ remove_local_lport_ids(const struct sbrec_port_binding *pb, + } + } + ++/* Corresponds to each Port_Binding.type. */ ++enum en_lport_type { ++ LP_UNKNOWN, ++ LP_VIF, ++ LP_CONTAINER, ++ LP_PATCH, ++ LP_L3GATEWAY, ++ LP_LOCALNET, ++ LP_LOCALPORT, ++ LP_L2GATEWAY, ++ LP_VTEP, ++ LP_CHASSISREDIRECT, ++ LP_VIRTUAL, ++ LP_EXTERNAL, ++ LP_REMOTE ++}; + -+/* Set of OVS interface IDs that have been bound in the most recent -+ * processing iterations. This gets updated in release_lport() and is -+ * periodically emptied in binding_seqno_run(). + /* Local bindings. binding.c module binds the logical port (represented by + * Port_Binding rows) and sets the 'chassis' column when it sees the + * OVS interface row (of type "" or "internal") with the +@@ -575,126 +599,270 @@ remove_local_lport_ids(const struct sbrec_port_binding *pb, + * 'struct local_binding' is used. A shash of these local bindings is + * maintained with the 'external_ids:iface-id' as the key to the shash. + * +- * struct local_binding (defined in binding.h) has 3 main fields: +- * - type +- * - OVS interface row object +- * - Port_Binding row object +- * +- * An instance of 'struct local_binding' can be one of 3 types. +- * +- * BT_VIF: Represent a local binding for an OVS interface of +- * type "" or "internal" with the external_ids:iface-id +- * set. +- * +- * This can be a +- * * probable local binding - external_ids:iface-id is +- * set, but the corresponding Port_Binding row is not +- * created or is not visible to the local ovn-controller +- * instance. +- * +- * * a local binding - external_ids:iface-id is set and +- * which is already bound to the corresponding Port_Binding +- * row. +- * +- * It maintains a list of children +- * (of type BT_CONTAINER/BT_VIRTUAL) if any. +- * +- * BT_CONTAINER: Represents a local binding which has a parent of type +- * BT_VIF. Its Port_Binding row's 'parent' column is set to +- * its parent's Port_Binding. It shares the OVS interface row +- * with the parent. +- * Each ovn-controller when it sees a container Port_Binding, +- * it creates 'struct local_binding' for the parent +- * Port_Binding and for its even if the OVS interface row for +- * the parent is not present. +- * +- * BT_VIRTUAL: Represents a local binding which has a parent of type BT_VIF. +- * Its Port_Binding type is "virtual" and it shares the OVS +- * interface row with the parent. +- * Port_Binding of type "virtual" is claimed by pinctrl module +- * when it sees the ARP packet from the parent's VIF. +- * ++ * struct local_binding has 3 main fields: ++ * - name : 'external_ids:iface-id' of the OVS interface (key). ++ * - OVS interface row object. ++ * - List of 'binding_lport' objects with the primary lport ++ * in the front of the list (if present). + * + * An object of 'struct local_binding' is created: +- * - For each interface that has iface-id configured with the type - BT_VIF. ++ * - For each interface that has external_ids:iface-id configured. + * +- * - For each container Port Binding (of type BT_CONTAINER) and its +- * parent Port_Binding (of type BT_VIF), no matter if +- * they are bound to this chassis i.e even if OVS interface row for the +- * parent is not present. ++ * - For each port binding (also referred as lport) of type 'LP_VIF' ++ * if it is a parent lport of container lports even if there is no ++ * corresponding OVS interface. + */ -+static struct sset binding_iface_bound_set = -+ SSET_INITIALIZER(&binding_iface_bound_set); ++struct local_binding { ++ char *name; ++ const struct ovsrec_interface *iface; ++ struct ovs_list binding_lports; ++}; + -+static void -+binding_iface_released_add(const char *iface_id) -+{ -+ sset_add(&binding_iface_released_set, iface_id); -+} ++/* This structure represents a logical port (or port binding) ++ * which is associated with 'struct local_binding'. ++ * ++ * An instance of 'struct binding_lport' is created for a logical port ++ * - If the OVS interface's iface-id corresponds to the logical port. ++ * - If it is a container or virtual logical port and its parent ++ * has a 'local binding'. + * +- * - For each 'virtual' Port Binding (of type BT_VIRTUAL) provided its parent +- * is bound to this chassis. + */ ++struct binding_lport { ++ struct ovs_list list_node; /* Node in local_binding.binding_lports. */ + +-static struct local_binding * +-local_binding_create(const char *name, const struct ovsrec_interface *iface, +- const struct sbrec_port_binding *pb, +- enum local_binding_type type) ++ char *name; ++ const struct sbrec_port_binding *pb; ++ struct local_binding *lbinding; ++ enum en_lport_type type; ++}; + -+static void -+binding_iface_bound_add(const char *iface_id) -+{ -+ sset_add(&binding_iface_bound_set, iface_id); -+} ++static struct local_binding *local_binding_create( ++ const char *name, const struct ovsrec_interface *); ++static void local_binding_add(struct shash *local_bindings, ++ struct local_binding *); ++static struct local_binding *local_binding_find( ++ struct shash *local_bindings, const char *name); ++static void local_binding_destroy(struct local_binding *, ++ struct shash *binding_lports); ++static void local_binding_delete(struct local_binding *, ++ struct shash *local_bindings, ++ struct shash *binding_lports, ++ struct if_status_mgr *if_mgr); ++static struct binding_lport *local_binding_add_lport( ++ struct shash *binding_lports, ++ struct local_binding *, ++ const struct sbrec_port_binding *, ++ enum en_lport_type); ++static struct binding_lport *local_binding_get_primary_lport( ++ struct local_binding *); ++static bool local_binding_handle_stale_binding_lports( ++ struct local_binding *lbinding, struct binding_ctx_in *b_ctx_in, ++ struct binding_ctx_out *b_ctx_out, struct hmap *qos_map); ++ ++static struct binding_lport *binding_lport_create( ++ const struct sbrec_port_binding *, ++ struct local_binding *, enum en_lport_type); ++static void binding_lport_destroy(struct binding_lport *); ++static void binding_lport_delete(struct shash *binding_lports, ++ struct binding_lport *); ++static void binding_lport_add(struct shash *binding_lports, ++ struct binding_lport *); ++static void binding_lport_set_up(struct binding_lport *, bool sb_readonly); ++static void binding_lport_set_down(struct binding_lport *, bool sb_readonly); ++static struct binding_lport *binding_lport_find( ++ struct shash *binding_lports, const char *lport_name); ++static const struct sbrec_port_binding *binding_lport_get_parent_pb( ++ struct binding_lport *b_lprt); ++static struct binding_lport *binding_lport_check_and_cleanup( ++ struct binding_lport *, struct shash *b_lports); ++ ++static char *get_lport_type_str(enum en_lport_type lport_type); + - #define OVN_QOS_TYPE "linux-htb" ++void ++local_binding_data_init(struct local_binding_data *lbinding_data) + { +- struct local_binding *lbinding = xzalloc(sizeof *lbinding); +- lbinding->name = xstrdup(name); +- lbinding->type = type; +- lbinding->pb = pb; +- lbinding->iface = iface; +- shash_init(&lbinding->children); +- return lbinding; ++ shash_init(&lbinding_data->bindings); ++ shash_init(&lbinding_data->lports); + } - struct qos_queue { -@@ -688,6 +721,7 @@ local_binding_add_child(struct local_binding *lbinding, - struct local_binding *child) +-static void +-local_binding_add(struct shash *local_bindings, struct local_binding *lbinding) ++void ++local_binding_data_destroy(struct local_binding_data *lbinding_data) + { +- shash_add(local_bindings, lbinding->name, lbinding); ++ struct shash_node *node, *next; ++ ++ SHASH_FOR_EACH_SAFE (node, next, &lbinding_data->lports) { ++ struct binding_lport *b_lport = node->data; ++ binding_lport_destroy(b_lport); ++ shash_delete(&lbinding_data->lports, node); ++ } ++ ++ SHASH_FOR_EACH_SAFE (node, next, &lbinding_data->bindings) { ++ struct local_binding *lbinding = node->data; ++ local_binding_destroy(lbinding, &lbinding_data->lports); ++ shash_delete(&lbinding_data->bindings, node); ++ } ++ ++ shash_destroy(&lbinding_data->lports); ++ shash_destroy(&lbinding_data->bindings); + } + +-static void +-local_binding_destroy(struct local_binding *lbinding) ++const struct sbrec_port_binding * ++local_binding_get_primary_pb(struct shash *local_bindings, const char *pb_name) { - local_binding_add(&lbinding->children, child); -+ child->parent = lbinding; +- local_bindings_destroy(&lbinding->children); ++ struct local_binding *lbinding = ++ local_binding_find(local_bindings, pb_name); ++ struct binding_lport *b_lport = local_binding_get_primary_lport(lbinding); + +- free(lbinding->name); +- free(lbinding); ++ return b_lport ? b_lport->pb : NULL; } - static struct local_binding * -@@ -697,6 +731,13 @@ local_binding_find_child(struct local_binding *lbinding, - return local_binding_find(&lbinding->children, child_name); +-void +-local_bindings_init(struct shash *local_bindings) ++bool ++local_binding_is_up(struct shash *local_bindings, const char *pb_name) + { +- shash_init(local_bindings); ++ struct local_binding *lbinding = ++ local_binding_find(local_bindings, pb_name); ++ struct binding_lport *b_lport = local_binding_get_primary_lport(lbinding); ++ if (lbinding && b_lport && lbinding->iface) { ++ if (b_lport->pb->n_up && !b_lport->pb->up[0]) { ++ return false; ++ } ++ return smap_get_bool(&lbinding->iface->external_ids, ++ OVN_INSTALLED_EXT_ID, false); ++ } ++ return false; } -+static void -+local_binding_delete_child(struct local_binding *lbinding, -+ struct local_binding *child) -+{ -+ shash_find_and_delete(&lbinding->children, child->name); -+} +-void +-local_bindings_destroy(struct shash *local_bindings) ++bool ++local_binding_is_down(struct shash *local_bindings, const char *pb_name) + { +- struct shash_node *node, *next; +- SHASH_FOR_EACH_SAFE (node, next, local_bindings) { +- struct local_binding *lbinding = node->data; +- local_binding_destroy(lbinding); +- shash_delete(local_bindings, node); ++ struct local_binding *lbinding = ++ local_binding_find(local_bindings, pb_name); ++ ++ struct binding_lport *b_lport = local_binding_get_primary_lport(lbinding); ++ ++ if (!lbinding) { ++ return true; + } + +- shash_destroy(local_bindings); ++ if (lbinding->iface && smap_get_bool(&lbinding->iface->external_ids, ++ OVN_INSTALLED_EXT_ID, false)) { ++ return false; ++ } ++ ++ if (b_lport && b_lport->pb->n_up && b_lport->pb->up[0]) { ++ return false; ++ } ++ ++ return true; + } + +-static +-void local_binding_delete(struct shash *local_bindings, +- struct local_binding *lbinding) ++void ++local_binding_set_up(struct shash *local_bindings, const char *pb_name, ++ bool sb_readonly, bool ovs_readonly) + { +- shash_find_and_delete(local_bindings, lbinding->name); +- local_binding_destroy(lbinding); ++ struct local_binding *lbinding = ++ local_binding_find(local_bindings, pb_name); ++ struct binding_lport *b_lport = local_binding_get_primary_lport(lbinding); ++ ++ if (!ovs_readonly && lbinding && lbinding->iface ++ && !smap_get_bool(&lbinding->iface->external_ids, ++ OVN_INSTALLED_EXT_ID, false)) { ++ ovsrec_interface_update_external_ids_setkey(lbinding->iface, ++ OVN_INSTALLED_EXT_ID, ++ "true"); ++ } ++ ++ if (!sb_readonly && lbinding && b_lport && b_lport->pb->n_up) { ++ binding_lport_set_up(b_lport, sb_readonly); ++ LIST_FOR_EACH (b_lport, list_node, &lbinding->binding_lports) { ++ binding_lport_set_up(b_lport, sb_readonly); ++ } ++ } + } + +-static void +-local_binding_add_child(struct local_binding *lbinding, +- struct local_binding *child) ++void ++local_binding_set_down(struct shash *local_bindings, const char *pb_name, ++ bool sb_readonly, bool ovs_readonly) + { +- local_binding_add(&lbinding->children, child); ++ struct local_binding *lbinding = ++ local_binding_find(local_bindings, pb_name); ++ struct binding_lport *b_lport = local_binding_get_primary_lport(lbinding); ++ ++ if (!ovs_readonly && lbinding && lbinding->iface ++ && smap_get_bool(&lbinding->iface->external_ids, ++ OVN_INSTALLED_EXT_ID, false)) { ++ ovsrec_interface_update_external_ids_delkey(lbinding->iface, ++ OVN_INSTALLED_EXT_ID); ++ } ++ ++ if (!sb_readonly && b_lport && b_lport->pb->n_up) { ++ binding_lport_set_down(b_lport, sb_readonly); ++ LIST_FOR_EACH (b_lport, list_node, &lbinding->binding_lports) { ++ binding_lport_set_down(b_lport, sb_readonly); ++ } ++ } + } + +-static struct local_binding * +-local_binding_find_child(struct local_binding *lbinding, +- const char *child_name) ++void ++binding_dump_local_bindings(struct local_binding_data *lbinding_data, ++ struct ds *out_data) + { +- return local_binding_find(&lbinding->children, child_name); ++ const struct shash_node **nodes; ++ ++ nodes = shash_sort(&lbinding_data->bindings); ++ size_t n = shash_count(&lbinding_data->bindings); ++ ++ ds_put_cstr(out_data, "Local bindings:\n"); ++ for (size_t i = 0; i < n; i++) { ++ const struct shash_node *node = nodes[i]; ++ struct local_binding *lbinding = node->data; ++ size_t num_lports = ovs_list_size(&lbinding->binding_lports); ++ ds_put_format(out_data, "name: [%s], OVS interface name : [%s], " ++ "num binding lports : [%"PRIuSIZE"]\n", ++ lbinding->name, ++ lbinding->iface ? lbinding->iface->name : "NULL", ++ num_lports); ++ ++ if (num_lports) { ++ struct shash child_lports = SHASH_INITIALIZER(&child_lports); ++ struct binding_lport *primary_lport = NULL; ++ struct binding_lport *b_lport; ++ bool first_elem = true; ++ ++ LIST_FOR_EACH (b_lport, list_node, &lbinding->binding_lports) { ++ if (first_elem && b_lport->type == LP_VIF) { ++ primary_lport = b_lport; ++ } else { ++ shash_add(&child_lports, b_lport->name, b_lport); ++ } ++ first_elem = false; ++ } ++ ++ if (primary_lport) { ++ ds_put_format(out_data, "primary lport : [%s]\n", ++ primary_lport->name); ++ } else { ++ ds_put_format(out_data, "no primary lport\n"); ++ } ++ ++ if (!shash_is_empty(&child_lports)) { ++ const struct shash_node **c_nodes = ++ shash_sort(&child_lports); ++ for (size_t j = 0; j < shash_count(&child_lports); j++) { ++ b_lport = c_nodes[j]->data; ++ ds_put_format(out_data, "child lport[%"PRIuSIZE"] : [%s], " ++ "type : [%s]\n", j + 1, b_lport->name, ++ get_lport_type_str(b_lport->type)); ++ } ++ free(c_nodes); ++ } ++ shash_destroy(&child_lports); ++ } ++ ++ ds_put_cstr(out_data, "----------------------------------------\n"); ++ } + ++ free(nodes); + } + static bool - is_lport_vif(const struct sbrec_port_binding *pb) +@@ -703,12 +871,6 @@ is_lport_vif(const struct sbrec_port_binding *pb) + return !pb->type[0]; + } + +-static bool +-is_lport_container(const struct sbrec_port_binding *pb) +-{ +- return is_lport_vif(pb) && pb->parent_port && pb->parent_port[0]; +-} +- + static struct tracked_binding_datapath * + tracked_binding_datapath_create(const struct sbrec_datapath_binding *dp, + bool is_new, +@@ -777,26 +939,13 @@ binding_tracked_dp_destroy(struct hmap *tracked_datapaths) + hmap_destroy(tracked_datapaths); + } + +-/* Corresponds to each Port_Binding.type. */ +-enum en_lport_type { +- LP_UNKNOWN, +- LP_VIF, +- LP_PATCH, +- LP_L3GATEWAY, +- LP_LOCALNET, +- LP_LOCALPORT, +- LP_L2GATEWAY, +- LP_VTEP, +- LP_CHASSISREDIRECT, +- LP_VIRTUAL, +- LP_EXTERNAL, +- LP_REMOTE +-}; +- + static enum en_lport_type + get_lport_type(const struct sbrec_port_binding *pb) { -@@ -823,15 +864,52 @@ get_lport_type(const struct sbrec_port_binding *pb) + if (is_lport_vif(pb)) { ++ if (pb->parent_port && pb->parent_port[0]) { ++ return LP_CONTAINER; ++ } + return LP_VIF; + } else if (!strcmp(pb->type, "patch")) { + return LP_PATCH; +@@ -823,15 +972,88 @@ get_lport_type(const struct sbrec_port_binding *pb) return LP_UNKNOWN; } ++static char * ++get_lport_type_str(enum en_lport_type lport_type) ++{ ++ switch (lport_type) { ++ case LP_VIF: ++ return "VIF"; ++ case LP_CONTAINER: ++ return "CONTAINER"; ++ case LP_VIRTUAL: ++ return "VIRTUAL"; ++ case LP_PATCH: ++ return "PATCH"; ++ case LP_CHASSISREDIRECT: ++ return "CHASSISREDIRECT"; ++ case LP_L3GATEWAY: ++ return "L3GATEWAT"; ++ case LP_LOCALNET: ++ return "PATCH"; ++ case LP_LOCALPORT: ++ return "LOCALPORT"; ++ case LP_L2GATEWAY: ++ return "L2GATEWAY"; ++ case LP_EXTERNAL: ++ return "EXTERNAL"; ++ case LP_REMOTE: ++ return "REMOTE"; ++ case LP_VTEP: ++ return "VTEP"; ++ case LP_UNKNOWN: ++ return "UNKNOWN"; ++ } ++ ++ OVS_NOT_REACHED(); ++} ++ +/* For newly claimed ports, if 'notify_up' is 'false': + * - set the 'pb.up' field to true if 'pb' has no 'parent_pb'. + * - set the 'pb.up' field to true if 'parent_pb.up' is 'true' (e.g., for @@ -508,7 +990,7 @@ index cb60c5d67..4e6c75696 100644 +claimed_lport_set_up(const struct sbrec_port_binding *pb, + const struct sbrec_port_binding *parent_pb, + const struct sbrec_chassis *chassis_rec, -+ bool notify_up) ++ bool notify_up, struct if_status_mgr *if_mgr) +{ + if (!notify_up) { + bool up = true; @@ -519,7 +1001,7 @@ index cb60c5d67..4e6c75696 100644 + } + + if (pb->chassis != chassis_rec || (pb->n_up && !pb->up[0])) { -+ binding_iface_bound_add(pb->logical_port); ++ if_status_mgr_claim_iface(if_mgr, pb->logical_port); + } +} + @@ -533,89 +1015,726 @@ index cb60c5d67..4e6c75696 100644 const struct ovsrec_interface *iface_rec, - bool sb_readonly, struct hmap *tracked_datapaths) + bool sb_readonly, bool notify_up, -+ struct hmap *tracked_datapaths) ++ struct hmap *tracked_datapaths, ++ struct if_status_mgr *if_mgr) { + if (!sb_readonly) { -+ claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up); ++ claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up, if_mgr); + } + if (pb->chassis != chassis_rec) { if (sb_readonly) { return false; -@@ -900,7 +978,12 @@ release_lport(const struct sbrec_port_binding *pb, bool sb_readonly, - sbrec_port_binding_set_virtual_parent(pb, NULL); +@@ -877,7 +1099,7 @@ claim_lport(const struct sbrec_port_binding *pb, + */ + static bool + release_lport(const struct sbrec_port_binding *pb, bool sb_readonly, +- struct hmap *tracked_datapaths) ++ struct hmap *tracked_datapaths, struct if_status_mgr *if_mgr) + { + if (pb->encap) { + if (sb_readonly) { +@@ -901,6 +1123,7 @@ release_lport(const struct sbrec_port_binding *pb, bool sb_readonly, } -+ if (pb->n_up) { -+ bool up = false; -+ sbrec_port_binding_set_up(pb, &up, 1); -+ } update_lport_tracking(pb, tracked_datapaths); -+ binding_iface_released_add(pb->logical_port); ++ if_status_mgr_release_iface(if_mgr, pb->logical_port); VLOG_INFO("Releasing lport %s from this chassis.", pb->logical_port); return true; } -@@ -958,8 +1041,7 @@ release_local_binding_children(const struct sbrec_chassis *chassis_rec, - } +@@ -908,14 +1131,15 @@ release_lport(const struct sbrec_port_binding *pb, bool sb_readonly, + static bool + is_lbinding_set(struct local_binding *lbinding) + { +- return lbinding && lbinding->pb && lbinding->iface; ++ return lbinding && lbinding->iface; + } + + static bool +-is_lbinding_this_chassis(struct local_binding *lbinding, +- const struct sbrec_chassis *chassis) ++is_binding_lport_this_chassis(struct binding_lport *b_lport, ++ const struct sbrec_chassis *chassis) + { +- return lbinding && lbinding->pb && lbinding->pb->chassis == chassis; ++ return (b_lport && b_lport->pb && chassis && ++ b_lport->pb->chassis == chassis); + } + + static bool +@@ -927,15 +1151,14 @@ can_bind_on_this_chassis(const struct sbrec_chassis *chassis_rec, + || !strcmp(requested_chassis, chassis_rec->hostname); + } + +-/* Returns 'true' if the 'lbinding' has children of type BT_CONTAINER, ++/* Returns 'true' if the 'lbinding' has binding lports of type LP_CONTAINER, + * 'false' otherwise. */ + static bool + is_lbinding_container_parent(struct local_binding *lbinding) + { +- struct shash_node *node; +- SHASH_FOR_EACH (node, &lbinding->children) { +- struct local_binding *l = node->data; +- if (l->type == BT_CONTAINER) { ++ struct binding_lport *b_lport; ++ LIST_FOR_EACH (b_lport, list_node, &lbinding->binding_lports) { ++ if (b_lport->type == LP_CONTAINER) { + return true; } + } +@@ -944,63 +1167,44 @@ is_lbinding_container_parent(struct local_binding *lbinding) + } + static bool +-release_local_binding_children(const struct sbrec_chassis *chassis_rec, +- struct local_binding *lbinding, +- bool sb_readonly, +- struct hmap *tracked_dp_bindings) +-{ +- struct shash_node *node; +- SHASH_FOR_EACH (node, &lbinding->children) { +- struct local_binding *l = node->data; +- if (is_lbinding_this_chassis(l, chassis_rec)) { +- if (!release_lport(l->pb, sb_readonly, tracked_dp_bindings)) { +- return false; +- } ++release_binding_lport(const struct sbrec_chassis *chassis_rec, ++ struct binding_lport *b_lport, bool sb_readonly, ++ struct binding_ctx_out *b_ctx_out) ++{ ++ if (is_binding_lport_this_chassis(b_lport, chassis_rec)) { ++ remove_local_lport_ids(b_lport->pb, b_ctx_out); ++ if (!release_lport(b_lport->pb, sb_readonly, ++ b_ctx_out->tracked_dp_bindings, ++ b_ctx_out->if_mgr)) { ++ return false; + } +- - /* Clear the local bindings' 'pb' and 'iface'. */ - l->pb = NULL; -+ /* Clear the local bindings' 'iface'. */ - l->iface = NULL; +- l->iface = NULL; ++ binding_lport_set_down(b_lport, sb_readonly); } -@@ -998,8 +1080,12 @@ consider_vif_lport_(const struct sbrec_port_binding *pb, + return true; + } + +-static bool +-release_local_binding(const struct sbrec_chassis *chassis_rec, +- struct local_binding *lbinding, bool sb_readonly, +- struct hmap *tracked_dp_bindings) +-{ +- if (!release_local_binding_children(chassis_rec, lbinding, +- sb_readonly, tracked_dp_bindings)) { +- return false; +- } +- +- bool retval = true; +- if (is_lbinding_this_chassis(lbinding, chassis_rec)) { +- retval = release_lport(lbinding->pb, sb_readonly, tracked_dp_bindings); +- } +- +- lbinding->pb = NULL; +- lbinding->iface = NULL; +- return retval; +-} +- + static bool + consider_vif_lport_(const struct sbrec_port_binding *pb, + bool can_bind, const char *vif_chassis, + struct binding_ctx_in *b_ctx_in, + struct binding_ctx_out *b_ctx_out, +- struct local_binding *lbinding, ++ struct binding_lport *b_lport, + struct hmap *qos_map) + { +- bool lbinding_set = is_lbinding_set(lbinding); ++ bool lbinding_set = b_lport && is_lbinding_set(b_lport->lbinding); ++ if (lbinding_set) { if (can_bind) { /* We can claim the lport. */ - if (!claim_lport(pb, b_ctx_in->chassis_rec, lbinding->iface, -- !b_ctx_in->ovnsb_idl_txn, + const struct sbrec_port_binding *parent_pb = -+ lbinding->parent ? lbinding->parent->pb : NULL; ++ binding_lport_get_parent_pb(b_lport); + + if (!claim_lport(pb, parent_pb, b_ctx_in->chassis_rec, -+ lbinding->iface, !b_ctx_in->ovnsb_idl_txn, -+ !lbinding->parent, - b_ctx_out->tracked_dp_bindings)){ ++ b_lport->lbinding->iface, + !b_ctx_in->ovnsb_idl_txn, +- b_ctx_out->tracked_dp_bindings)){ ++ !parent_pb, b_ctx_out->tracked_dp_bindings, ++ b_ctx_out->if_mgr)){ return false; } -@@ -1203,8 +1289,8 @@ consider_nonvif_lport_(const struct sbrec_port_binding *pb, + +@@ -1012,7 +1216,7 @@ consider_vif_lport_(const struct sbrec_port_binding *pb, + b_ctx_out->tracked_dp_bindings); + update_local_lport_ids(pb, b_ctx_out); + update_local_lports(pb->logical_port, b_ctx_out); +- if (lbinding->iface && qos_map && b_ctx_in->ovs_idl_txn) { ++ if (b_lport->lbinding->iface && qos_map && b_ctx_in->ovs_idl_txn) { + get_qos_params(pb, qos_map); + } + } else { +@@ -1031,7 +1235,8 @@ consider_vif_lport_(const struct sbrec_port_binding *pb, + /* Release the lport if there is no lbinding. */ + if (!lbinding_set || !can_bind) { + return release_lport(pb, !b_ctx_in->ovnsb_idl_txn, +- b_ctx_out->tracked_dp_bindings); ++ b_ctx_out->tracked_dp_bindings, ++ b_ctx_out->if_mgr); + } + } + +@@ -1050,16 +1255,19 @@ consider_vif_lport(const struct sbrec_port_binding *pb, + vif_chassis); + + if (!lbinding) { +- lbinding = local_binding_find(b_ctx_out->local_bindings, ++ lbinding = local_binding_find(&b_ctx_out->lbinding_data->bindings, + pb->logical_port); + } + ++ struct binding_lport *b_lport = NULL; + if (lbinding) { +- lbinding->pb = pb; ++ struct shash *binding_lports = ++ &b_ctx_out->lbinding_data->lports; ++ b_lport = local_binding_add_lport(binding_lports, lbinding, pb, LP_VIF); + } + + return consider_vif_lport_(pb, can_bind, vif_chassis, b_ctx_in, +- b_ctx_out, lbinding, qos_map); ++ b_ctx_out, b_lport, qos_map); + } + + static bool +@@ -1068,9 +1276,9 @@ consider_container_lport(const struct sbrec_port_binding *pb, + struct binding_ctx_out *b_ctx_out, + struct hmap *qos_map) + { ++ struct shash *local_bindings = &b_ctx_out->lbinding_data->bindings; + struct local_binding *parent_lbinding; +- parent_lbinding = local_binding_find(b_ctx_out->local_bindings, +- pb->parent_port); ++ parent_lbinding = local_binding_find(local_bindings, pb->parent_port); + + if (!parent_lbinding) { + /* There is no local_binding for parent port. Create it +@@ -1085,54 +1293,62 @@ consider_container_lport(const struct sbrec_port_binding *pb, + * we want the these container ports also be claimed by the + * chassis. + * */ +- parent_lbinding = local_binding_create(pb->parent_port, NULL, NULL, +- BT_VIF); +- local_binding_add(b_ctx_out->local_bindings, parent_lbinding); ++ parent_lbinding = local_binding_create(pb->parent_port, NULL); ++ local_binding_add(local_bindings, parent_lbinding); + } + +- struct local_binding *container_lbinding = +- local_binding_find_child(parent_lbinding, pb->logical_port); ++ struct shash *binding_lports = &b_ctx_out->lbinding_data->lports; ++ struct binding_lport *container_b_lport = ++ local_binding_add_lport(binding_lports, parent_lbinding, pb, ++ LP_CONTAINER); + +- if (!container_lbinding) { +- container_lbinding = local_binding_create(pb->logical_port, +- parent_lbinding->iface, +- pb, BT_CONTAINER); +- local_binding_add_child(parent_lbinding, container_lbinding); +- } else { +- ovs_assert(container_lbinding->type == BT_CONTAINER); +- container_lbinding->pb = pb; +- container_lbinding->iface = parent_lbinding->iface; +- } ++ struct binding_lport *parent_b_lport = ++ binding_lport_find(binding_lports, pb->parent_port); + +- if (!parent_lbinding->pb) { +- parent_lbinding->pb = lport_lookup_by_name( ++ bool can_consider_c_lport = true; ++ if (!parent_b_lport || !parent_b_lport->pb) { ++ const struct sbrec_port_binding *parent_pb = lport_lookup_by_name( + b_ctx_in->sbrec_port_binding_by_name, pb->parent_port); + +- if (parent_lbinding->pb) { ++ if (parent_pb && get_lport_type(parent_pb) == LP_VIF) { + /* Its possible that the parent lport is not considered yet. + * So call consider_vif_lport() to process it first. */ +- consider_vif_lport(parent_lbinding->pb, b_ctx_in, b_ctx_out, ++ consider_vif_lport(parent_pb, b_ctx_in, b_ctx_out, + parent_lbinding, qos_map); ++ parent_b_lport = binding_lport_find(binding_lports, ++ pb->parent_port); + } else { +- /* The parent lport doesn't exist. Call release_lport() to +- * release the container lport, if it was bound earlier. */ +- if (is_lbinding_this_chassis(container_lbinding, +- b_ctx_in->chassis_rec)) { +- return release_lport(pb, !b_ctx_in->ovnsb_idl_txn, +- b_ctx_out->tracked_dp_bindings); +- } ++ /* The parent lport doesn't exist. Cannot consider the container ++ * lport for binding. */ ++ can_consider_c_lport = false; ++ } ++ } + +- return true; ++ if (parent_b_lport && parent_b_lport->type != LP_VIF) { ++ can_consider_c_lport = false; ++ } ++ ++ if (!can_consider_c_lport) { ++ /* Call release_lport() to release the container lport, ++ * if it was bound earlier. */ ++ if (is_binding_lport_this_chassis(container_b_lport, ++ b_ctx_in->chassis_rec)) { ++ return release_lport(pb, !b_ctx_in->ovnsb_idl_txn, ++ b_ctx_out->tracked_dp_bindings, ++ b_ctx_out->if_mgr); + } ++ ++ return true; + } + +- const char *vif_chassis = smap_get(&parent_lbinding->pb->options, ++ ovs_assert(parent_b_lport && parent_b_lport->pb); ++ const char *vif_chassis = smap_get(&parent_b_lport->pb->options, + "requested-chassis"); + bool can_bind = can_bind_on_this_chassis(b_ctx_in->chassis_rec, + vif_chassis); + + return consider_vif_lport_(pb, can_bind, vif_chassis, b_ctx_in, b_ctx_out, +- container_lbinding, qos_map); ++ container_b_lport, qos_map); + } + + static bool +@@ -1141,46 +1357,58 @@ consider_virtual_lport(const struct sbrec_port_binding *pb, + struct binding_ctx_out *b_ctx_out, + struct hmap *qos_map) + { +- struct local_binding * parent_lbinding = +- pb->virtual_parent ? local_binding_find(b_ctx_out->local_bindings, ++ struct shash *local_bindings = &b_ctx_out->lbinding_data->bindings; ++ struct local_binding *parent_lbinding = ++ pb->virtual_parent ? local_binding_find(local_bindings, + pb->virtual_parent) + : NULL; + +- if (parent_lbinding && !parent_lbinding->pb) { +- parent_lbinding->pb = lport_lookup_by_name( +- b_ctx_in->sbrec_port_binding_by_name, pb->virtual_parent); +- +- if (parent_lbinding->pb) { +- /* Its possible that the parent lport is not considered yet. +- * So call consider_vif_lport() to process it first. */ +- consider_vif_lport(parent_lbinding->pb, b_ctx_in, b_ctx_out, +- parent_lbinding, qos_map); +- } +- } +- ++ struct binding_lport *virtual_b_lport = NULL; + /* Unlike container lports, we don't have to create parent_lbinding if + * it is NULL. This is because, if parent_lbinding is not present, it + * means the virtual port can't bind in this chassis. + * Note: pinctrl module binds the virtual lport when it sees ARP + * packet from the parent lport. */ +- struct local_binding *virtual_lbinding = NULL; +- if (is_lbinding_this_chassis(parent_lbinding, b_ctx_in->chassis_rec)) { +- virtual_lbinding = +- local_binding_find_child(parent_lbinding, pb->logical_port); +- if (!virtual_lbinding) { +- virtual_lbinding = local_binding_create(pb->logical_port, +- parent_lbinding->iface, +- pb, BT_VIRTUAL); +- local_binding_add_child(parent_lbinding, virtual_lbinding); +- } else { +- ovs_assert(virtual_lbinding->type == BT_VIRTUAL); +- virtual_lbinding->pb = pb; +- virtual_lbinding->iface = parent_lbinding->iface; ++ if (parent_lbinding) { ++ struct shash *binding_lports = &b_ctx_out->lbinding_data->lports; ++ ++ struct binding_lport *parent_b_lport = ++ binding_lport_find(binding_lports, pb->virtual_parent); ++ ++ if (!parent_b_lport || !parent_b_lport->pb) { ++ const struct sbrec_port_binding *parent_pb = lport_lookup_by_name( ++ b_ctx_in->sbrec_port_binding_by_name, pb->virtual_parent); ++ ++ if (parent_pb && get_lport_type(parent_pb) == LP_VIF) { ++ /* Its possible that the parent lport is not considered yet. ++ * So call consider_vif_lport() to process it first. */ ++ consider_vif_lport(parent_pb, b_ctx_in, b_ctx_out, ++ parent_lbinding, qos_map); ++ } ++ } ++ ++ parent_b_lport = local_binding_get_primary_lport(parent_lbinding); ++ if (is_binding_lport_this_chassis(parent_b_lport, ++ b_ctx_in->chassis_rec)) { ++ virtual_b_lport = ++ local_binding_add_lport(binding_lports, parent_lbinding, pb, ++ LP_VIRTUAL); + } + } + +- return consider_vif_lport_(pb, true, NULL, b_ctx_in, b_ctx_out, +- virtual_lbinding, qos_map); ++ if (!consider_vif_lport_(pb, true, NULL, b_ctx_in, b_ctx_out, ++ virtual_b_lport, qos_map)) { ++ return false; ++ } ++ ++ /* If the virtual lport is not bound to this chassis, then remove ++ * its entry from the local_lport_ids if present. This is required ++ * when a virtual port moves from one chassis to other.*/ ++ if (!virtual_b_lport) { ++ remove_local_lport_ids(pb, b_ctx_out); ++ } ++ ++ return true; + } + + /* Considers either claiming the lport or releasing the lport +@@ -1203,12 +1431,14 @@ consider_nonvif_lport_(const struct sbrec_port_binding *pb, b_ctx_out->tracked_dp_bindings); update_local_lport_ids(pb, b_ctx_out); - return claim_lport(pb, b_ctx_in->chassis_rec, NULL, - !b_ctx_in->ovnsb_idl_txn, +- b_ctx_out->tracked_dp_bindings); + return claim_lport(pb, NULL, b_ctx_in->chassis_rec, NULL, + !b_ctx_in->ovnsb_idl_txn, false, - b_ctx_out->tracked_dp_bindings); ++ b_ctx_out->tracked_dp_bindings, ++ b_ctx_out->if_mgr); } else if (pb->chassis == b_ctx_in->chassis_rec) { return release_lport(pb, !b_ctx_in->ovnsb_idl_txn, -@@ -2063,6 +2149,16 @@ handle_deleted_vif_lport(const struct sbrec_port_binding *pb, - * when the interface change happens. */ - if (is_lport_container(pb)) { - remove_local_lports(pb->logical_port, b_ctx_out); +- b_ctx_out->tracked_dp_bindings); ++ b_ctx_out->tracked_dp_bindings, ++ b_ctx_out->if_mgr); + } + + return true; +@@ -1321,6 +1551,8 @@ build_local_bindings(struct binding_ctx_in *b_ctx_in, + continue; + } + ++ struct shash *local_bindings = ++ &b_ctx_out->lbinding_data->bindings; + for (j = 0; j < port_rec->n_interfaces; j++) { + const struct ovsrec_interface *iface_rec; + +@@ -1330,11 +1562,10 @@ build_local_bindings(struct binding_ctx_in *b_ctx_in, + + if (iface_id && ofport > 0) { + struct local_binding *lbinding = +- local_binding_find(b_ctx_out->local_bindings, iface_id); ++ local_binding_find(local_bindings, iface_id); + if (!lbinding) { +- lbinding = local_binding_create(iface_id, iface_rec, NULL, +- BT_VIF); +- local_binding_add(b_ctx_out->local_bindings, lbinding); ++ lbinding = local_binding_create(iface_id, iface_rec); ++ local_binding_add(local_bindings, lbinding); + } else { + static struct vlog_rate_limit rl = + VLOG_RATE_LIMIT_INIT(1, 5); +@@ -1345,7 +1576,6 @@ build_local_bindings(struct binding_ctx_in *b_ctx_in, + "configuration on interface [%s]", + lbinding->iface->name, iface_rec->name, + iface_rec->name); +- ovs_assert(lbinding->type == BT_VIF); + } + + update_local_lports(iface_id, b_ctx_out); +@@ -1408,11 +1638,11 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out) + break; + + case LP_VIF: +- if (is_lport_container(pb)) { +- consider_container_lport(pb, b_ctx_in, b_ctx_out, qos_map_ptr); +- } else { +- consider_vif_lport(pb, b_ctx_in, b_ctx_out, NULL, qos_map_ptr); +- } ++ consider_vif_lport(pb, b_ctx_in, b_ctx_out, NULL, qos_map_ptr); ++ break; + -+ /* If the container port is removed we should also remove it from -+ * its parent's children set. -+ */ -+ if (lbinding) { -+ if (lbinding->parent) { -+ local_binding_delete_child(lbinding->parent, lbinding); -+ } -+ local_binding_destroy(lbinding); -+ } ++ case LP_CONTAINER: ++ consider_container_lport(pb, b_ctx_in, b_ctx_out, qos_map_ptr); + break; + + case LP_VIRTUAL: +@@ -1713,39 +1943,44 @@ consider_iface_claim(const struct ovsrec_interface *iface_rec, + update_local_lports(iface_id, b_ctx_out); + smap_replace(b_ctx_out->local_iface_ids, iface_rec->name, iface_id); + +- struct local_binding *lbinding = +- local_binding_find(b_ctx_out->local_bindings, iface_id); ++ struct shash *local_bindings = &b_ctx_out->lbinding_data->bindings; ++ struct shash *binding_lports = &b_ctx_out->lbinding_data->lports; ++ struct local_binding *lbinding = local_binding_find(local_bindings, ++ iface_id); + + if (!lbinding) { +- lbinding = local_binding_create(iface_id, iface_rec, NULL, BT_VIF); +- local_binding_add(b_ctx_out->local_bindings, lbinding); ++ lbinding = local_binding_create(iface_id, iface_rec); ++ local_binding_add(local_bindings, lbinding); + } else { + lbinding->iface = iface_rec; } - handle_deleted_lport(pb, b_ctx_in, b_ctx_out); -@@ -2132,13 +2228,26 @@ bool - binding_handle_port_binding_changes(struct binding_ctx_in *b_ctx_in, - struct binding_ctx_out *b_ctx_out) - { -- bool handled = true; -+ /* Run the tracked port binding loop twice to ensure correctness: -+ * 1. First to handle deleted changes. This is split in four sub-parts -+ * because child local bindings must be cleaned up first: +- if (!lbinding->pb || strcmp(lbinding->name, lbinding->pb->logical_port)) { +- lbinding->pb = lport_lookup_by_name( +- b_ctx_in->sbrec_port_binding_by_name, lbinding->name); +- if (lbinding->pb && !strcmp(lbinding->pb->type, "virtual")) { +- lbinding->pb = NULL; ++ struct binding_lport *b_lport = local_binding_get_primary_lport(lbinding); ++ const struct sbrec_port_binding *pb = NULL; ++ if (!b_lport) { ++ pb = lport_lookup_by_name(b_ctx_in->sbrec_port_binding_by_name, ++ lbinding->name); ++ if (pb && get_lport_type(pb) == LP_VIF) { ++ b_lport = local_binding_add_lport(binding_lports, lbinding, pb, ++ LP_VIF); + } + } + +- if (lbinding->pb) { +- if (!consider_vif_lport(lbinding->pb, b_ctx_in, b_ctx_out, +- lbinding, qos_map)) { +- return false; +- } ++ if (!b_lport) { ++ /* There is no binding lport for this local binding. */ ++ return true; ++ } ++ ++ if (!consider_vif_lport(b_lport->pb, b_ctx_in, b_ctx_out, ++ lbinding, qos_map)) { ++ return false; + } + + /* Update the child local_binding's iface (if any children) and try to + * claim the container lbindings. */ +- struct shash_node *node; +- SHASH_FOR_EACH (node, &lbinding->children) { +- struct local_binding *child = node->data; +- child->iface = iface_rec; +- if (child->type == BT_CONTAINER) { +- if (!consider_container_lport(child->pb, b_ctx_in, b_ctx_out, ++ LIST_FOR_EACH (b_lport, list_node, &lbinding->binding_lports) { ++ if (b_lport->type == LP_CONTAINER) { ++ if (!consider_container_lport(b_lport->pb, b_ctx_in, b_ctx_out, + qos_map)) { + return false; + } +@@ -1776,32 +2011,43 @@ consider_iface_release(const struct ovsrec_interface *iface_rec, + struct binding_ctx_out *b_ctx_out) + { + struct local_binding *lbinding; +- lbinding = local_binding_find(b_ctx_out->local_bindings, +- iface_id); +- if (is_lbinding_this_chassis(lbinding, b_ctx_in->chassis_rec)) { ++ struct shash *local_bindings = &b_ctx_out->lbinding_data->bindings; ++ struct shash *binding_lports = &b_ctx_out->lbinding_data->lports; ++ ++ lbinding = local_binding_find(local_bindings, iface_id); ++ struct binding_lport *b_lport = local_binding_get_primary_lport(lbinding); ++ if (is_binding_lport_this_chassis(b_lport, b_ctx_in->chassis_rec)) { + struct local_datapath *ld = + get_local_datapath(b_ctx_out->local_datapaths, +- lbinding->pb->datapath->tunnel_key); ++ b_lport->pb->datapath->tunnel_key); + if (ld) { +- remove_pb_from_local_datapath(lbinding->pb, +- b_ctx_in->chassis_rec, +- b_ctx_out, ld); ++ remove_pb_from_local_datapath(b_lport->pb, ++ b_ctx_in->chassis_rec, ++ b_ctx_out, ld); + } + +- /* Note: release_local_binding() resets lbinding->pb and +- * lbinding->iface. +- * Cannot access these members of lbinding after this call. */ +- if (!release_local_binding(b_ctx_in->chassis_rec, lbinding, +- !b_ctx_in->ovnsb_idl_txn, +- b_ctx_out->tracked_dp_bindings)) { +- return false; ++ /* Release the primary binding lport and other children lports if ++ * any. */ ++ LIST_FOR_EACH (b_lport, list_node, &lbinding->binding_lports) { ++ if (!release_binding_lport(b_ctx_in->chassis_rec, b_lport, ++ !b_ctx_in->ovnsb_idl_txn, ++ b_ctx_out)) { ++ return false; ++ } + } ++ ++ } ++ ++ if (lbinding) { ++ /* Clear the iface of the local binding. */ ++ lbinding->iface = NULL; + } + + /* Check if the lbinding has children of type PB_CONTAINER. + * If so, don't delete the local_binding. */ + if (lbinding && !is_lbinding_container_parent(lbinding)) { +- local_binding_delete(b_ctx_out->local_bindings, lbinding); ++ local_binding_delete(lbinding, local_bindings, binding_lports, ++ b_ctx_out->if_mgr); + } + + remove_local_lports(iface_id, b_ctx_out); +@@ -2002,56 +2248,35 @@ handle_deleted_lport(const struct sbrec_port_binding *pb, + } + } + +-static struct local_binding * +-get_lbinding_for_lport(const struct sbrec_port_binding *pb, +- enum en_lport_type lport_type, +- struct binding_ctx_out *b_ctx_out) +-{ +- ovs_assert(lport_type == LP_VIF || lport_type == LP_VIRTUAL); +- +- if (lport_type == LP_VIF && !is_lport_container(pb)) { +- return local_binding_find(b_ctx_out->local_bindings, pb->logical_port); +- } +- +- struct local_binding *parent_lbinding = NULL; +- +- if (lport_type == LP_VIRTUAL) { +- if (pb->virtual_parent) { +- parent_lbinding = local_binding_find(b_ctx_out->local_bindings, +- pb->virtual_parent); +- } +- } else { +- if (pb->parent_port) { +- parent_lbinding = local_binding_find(b_ctx_out->local_bindings, +- pb->parent_port); +- } +- } +- +- return parent_lbinding +- ? local_binding_find(&parent_lbinding->children, pb->logical_port) +- : NULL; +-} +- + static bool + handle_deleted_vif_lport(const struct sbrec_port_binding *pb, + enum en_lport_type lport_type, + struct binding_ctx_in *b_ctx_in, + struct binding_ctx_out *b_ctx_out) + { +- struct local_binding *lbinding = +- get_lbinding_for_lport(pb, lport_type, b_ctx_out); ++ struct local_binding *lbinding = NULL; ++ bool bound = false; + +- if (lbinding) { +- lbinding->pb = NULL; +- /* The port_binding 'pb' is deleted. So there is no need to +- * clear the 'chassis' column of 'pb'. But we need to do +- * for the local_binding's children. */ +- if (lbinding->type == BT_VIF && +- !release_local_binding_children( +- b_ctx_in->chassis_rec, lbinding, +- !b_ctx_in->ovnsb_idl_txn, +- b_ctx_out->tracked_dp_bindings)) { +- return false; ++ struct shash *binding_lports = &b_ctx_out->lbinding_data->lports; ++ struct binding_lport *b_lport = binding_lport_find(binding_lports, pb->logical_port); ++ if (b_lport) { ++ lbinding = b_lport->lbinding; ++ bound = is_binding_lport_this_chassis(b_lport, b_ctx_in->chassis_rec); ++ ++ /* Remove b_lport from local_binding. */ ++ binding_lport_delete(binding_lports, b_lport); ++ } ++ ++ if (bound && lbinding && lport_type == LP_VIF) { ++ /* We need to release the container/virtual binding lports (if any) if ++ * deleted 'pb' type is LP_VIF. */ ++ struct binding_lport *c_lport; ++ LIST_FOR_EACH (c_lport, list_node, &lbinding->binding_lports) { ++ if (!release_binding_lport(b_ctx_in->chassis_rec, c_lport, ++ !b_ctx_in->ovnsb_idl_txn, ++ b_ctx_out)) { ++ return false; ++ } + } + } + +@@ -2061,7 +2286,7 @@ handle_deleted_vif_lport(const struct sbrec_port_binding *pb, + * it from local_lports if there is a VIF entry. + * consider_iface_release() takes care of removing from the local_lports + * when the interface change happens. */ +- if (is_lport_container(pb)) { ++ if (lport_type == LP_CONTAINER) { + remove_local_lports(pb->logical_port, b_ctx_out); + } + +@@ -2081,7 +2306,7 @@ handle_updated_vif_lport(const struct sbrec_port_binding *pb, + + if (lport_type == LP_VIRTUAL) { + handled = consider_virtual_lport(pb, b_ctx_in, b_ctx_out, qos_map); +- } else if (lport_type == LP_VIF && is_lport_container(pb)) { ++ } else if (lport_type == LP_CONTAINER) { + handled = consider_container_lport(pb, b_ctx_in, b_ctx_out, qos_map); + } else { + handled = consider_vif_lport(pb, b_ctx_in, b_ctx_out, NULL, qos_map); +@@ -2093,14 +2318,14 @@ handle_updated_vif_lport(const struct sbrec_port_binding *pb, + + bool now_claimed = (pb->chassis == b_ctx_in->chassis_rec); + +- if (lport_type == LP_VIRTUAL || +- (lport_type == LP_VIF && is_lport_container(pb)) || ++ if (lport_type == LP_VIRTUAL || lport_type == LP_CONTAINER || + claimed == now_claimed) { + return true; + } + +- struct local_binding *lbinding = +- local_binding_find(b_ctx_out->local_bindings, pb->logical_port); ++ struct shash *local_bindings = &b_ctx_out->lbinding_data->bindings; ++ struct local_binding *lbinding = local_binding_find(local_bindings, ++ pb->logical_port); + + /* If the ovs port backing this binding previously was removed in the + * meantime, we won't have a local_binding for it. +@@ -2110,12 +2335,11 @@ handle_updated_vif_lport(const struct sbrec_port_binding *pb, + return true; + } + +- struct shash_node *node; +- SHASH_FOR_EACH (node, &lbinding->children) { +- struct local_binding *child = node->data; +- if (child->type == BT_CONTAINER) { +- handled = consider_container_lport(child->pb, b_ctx_in, b_ctx_out, +- qos_map); ++ struct binding_lport *b_lport; ++ LIST_FOR_EACH (b_lport, list_node, &lbinding->binding_lports) { ++ if (b_lport->type == LP_CONTAINER) { ++ handled = consider_container_lport(b_lport->pb, b_ctx_in, ++ b_ctx_out, qos_map); + if (!handled) { + return false; + } +@@ -2132,13 +2356,26 @@ bool + binding_handle_port_binding_changes(struct binding_ctx_in *b_ctx_in, + struct binding_ctx_out *b_ctx_out) + { +- bool handled = true; ++ /* Run the tracked port binding loop twice to ensure correctness: ++ * 1. First to handle deleted changes. This is split in four sub-parts ++ * because child local bindings must be cleaned up first: + * a. Container ports first. + * b. Then virtual ports. + * c. Then regular VIFs. @@ -640,7 +1759,7 @@ index cb60c5d67..4e6c75696 100644 SBREC_PORT_BINDING_TABLE_FOR_EACH_TRACKED (pb, b_ctx_in->port_binding_table) { if (!sbrec_port_binding_is_deleted(pb)) { -@@ -2146,18 +2255,60 @@ binding_handle_port_binding_changes(struct binding_ctx_in *b_ctx_in, +@@ -2146,18 +2383,73 @@ binding_handle_port_binding_changes(struct binding_ctx_in *b_ctx_in, } enum en_lport_type lport_type = get_lport_type(pb); @@ -648,12 +1767,25 @@ index cb60c5d67..4e6c75696 100644 - handled = handle_deleted_vif_lport(pb, lport_type, b_ctx_in, - b_ctx_out); + -+ if (lport_type == LP_VIF) { -+ if (is_lport_container(pb)) { -+ shash_add(&deleted_container_pbs, pb->logical_port, pb); -+ } else { -+ shash_add(&deleted_vif_pbs, pb->logical_port, pb); ++ struct binding_lport *b_lport = ++ binding_lport_find(&b_ctx_out->lbinding_data->lports, ++ pb->logical_port); ++ if (b_lport) { ++ /* If the 'b_lport->type' and 'lport_type' don't match, then update ++ * the b_lport->type to the updated 'lport_type'. The function ++ * binding_lport_check_and_cleanup() will cleanup the 'b_lport' ++ * if required. */ ++ if (b_lport->type != lport_type) { ++ b_lport->type = lport_type; + } ++ b_lport = binding_lport_check_and_cleanup( ++ b_lport, &b_ctx_out->lbinding_data->lports); ++ } ++ ++ if (lport_type == LP_VIF) { ++ shash_add(&deleted_vif_pbs, pb->logical_port, pb); ++ } else if (lport_type == LP_CONTAINER) { ++ shash_add(&deleted_container_pbs, pb->logical_port, pb); + } else if (lport_type == LP_VIRTUAL) { + shash_add(&deleted_virtual_pbs, pb->logical_port, pb); } else { @@ -665,24 +1797,24 @@ index cb60c5d67..4e6c75696 100644 + struct shash_node *node; + struct shash_node *node_next; + SHASH_FOR_EACH_SAFE (node, node_next, &deleted_container_pbs) { -+ handled = handle_deleted_vif_lport(node->data, LP_VIF, b_ctx_in, ++ handled = handle_deleted_vif_lport(node->data, LP_CONTAINER, b_ctx_in, + b_ctx_out); + shash_delete(&deleted_container_pbs, node); if (!handled) { - break; + goto delete_done; -+ } -+ } -+ + } + } + + SHASH_FOR_EACH_SAFE (node, node_next, &deleted_virtual_pbs) { + handled = handle_deleted_vif_lport(node->data, LP_VIRTUAL, b_ctx_in, + b_ctx_out); + shash_delete(&deleted_virtual_pbs, node); + if (!handled) { + goto delete_done; - } - } - ++ } ++ } ++ + SHASH_FOR_EACH_SAFE (node, node_next, &deleted_vif_pbs) { + handled = handle_deleted_vif_lport(node->data, LP_VIF, b_ctx_in, + b_ctx_out); @@ -706,186 +1838,452 @@ index cb60c5d67..4e6c75696 100644 if (!handled) { return false; } -@@ -2288,3 +2439,155 @@ binding_handle_port_binding_changes(struct binding_ctx_in *b_ctx_in, +@@ -2175,12 +2467,33 @@ binding_handle_port_binding_changes(struct binding_ctx_in *b_ctx_in, + + enum en_lport_type lport_type = get_lport_type(pb); + ++ struct binding_lport *b_lport = ++ binding_lport_find(&b_ctx_out->lbinding_data->lports, ++ pb->logical_port); ++ if (b_lport) { ++ ovs_assert(b_lport->pb == pb); ++ ++ if (b_lport->type != lport_type) { ++ b_lport->type = lport_type; ++ } ++ ++ if (b_lport->lbinding) { ++ handled = local_binding_handle_stale_binding_lports( ++ b_lport->lbinding, b_ctx_in, b_ctx_out, qos_map_ptr); ++ if (!handled) { ++ /* Backout from the handling. */ ++ break; ++ } ++ } ++ } ++ + struct local_datapath *ld = + get_local_datapath(b_ctx_out->local_datapaths, + pb->datapath->tunnel_key); + + switch (lport_type) { + case LP_VIF: ++ case LP_CONTAINER: + case LP_VIRTUAL: + handled = handle_updated_vif_lport(pb, lport_type, b_ctx_in, + b_ctx_out, qos_map_ptr); +@@ -2288,3 +2601,328 @@ binding_handle_port_binding_changes(struct binding_ctx_in *b_ctx_in, destroy_qos_map(&qos_map); return handled; } + -+/* Registered ofctrl seqno type for port_binding flow installation. */ -+static size_t binding_seq_type_pb_cfg; ++/* Static functions for local_lbindind and binding_lport. */ ++static struct local_binding * ++local_binding_create(const char *name, const struct ovsrec_interface *iface) ++{ ++ struct local_binding *lbinding = xzalloc(sizeof *lbinding); ++ lbinding->name = xstrdup(name); ++ lbinding->iface = iface; ++ ovs_list_init(&lbinding->binding_lports); + -+/* Binding specific seqno to be acked by ofctrl when flows for new interfaces -+ * have been installed. -+ */ -+static uint32_t binding_iface_seqno = 0; ++ return lbinding; ++} + -+/* Map indexed by iface-id containing the sequence numbers that when acked -+ * indicate that the OVS flows for the iface-id have been installed. -+ */ -+static struct simap binding_iface_seqno_map = -+ SIMAP_INITIALIZER(&binding_iface_seqno_map); ++static struct local_binding * ++local_binding_find(struct shash *local_bindings, const char *name) ++{ ++ return shash_find_data(local_bindings, name); ++} + -+void -+binding_init(void) ++static void ++local_binding_add(struct shash *local_bindings, struct local_binding *lbinding) +{ -+ binding_seq_type_pb_cfg = ofctrl_seqno_add_type(); ++ shash_add(local_bindings, lbinding->name, lbinding); +} + -+/* Processes new release/bind operations OVN ports. For newly bound ports -+ * it creates ofctrl seqno update requests that will be acked when -+ * corresponding OVS flows have been installed. -+ * -+ * NOTE: Should be called only when valid SB and OVS transactions are -+ * available. ++static void ++local_binding_destroy(struct local_binding *lbinding, ++ struct shash *binding_lports) ++{ ++ struct binding_lport *b_lport; ++ LIST_FOR_EACH_POP (b_lport, list_node, &lbinding->binding_lports) { ++ b_lport->lbinding = NULL; ++ binding_lport_delete(binding_lports, b_lport); ++ } ++ ++ free(lbinding->name); ++ free(lbinding); ++} ++ ++static void ++local_binding_delete(struct local_binding *lbinding, ++ struct shash *local_bindings, ++ struct shash *binding_lports, ++ struct if_status_mgr *if_mgr) ++{ ++ shash_find_and_delete(local_bindings, lbinding->name); ++ if_status_mgr_delete_iface(if_mgr, lbinding->name); ++ local_binding_destroy(lbinding, binding_lports); ++} ++ ++/* Returns the primary binding lport if present in lbinding's ++ * binding lports list. A binding lport is considered primary ++ * if binding lport's type is LP_VIF and the name matches ++ * with the 'lbinding'. + */ -+void -+binding_seqno_run(struct shash *local_bindings) ++static struct binding_lport * ++local_binding_get_primary_lport(struct local_binding *lbinding) +{ -+ const char *iface_id; -+ const char *iface_id_next; ++ if (!lbinding) { ++ return NULL; ++ } + -+ SSET_FOR_EACH_SAFE (iface_id, iface_id_next, &binding_iface_released_set) { -+ struct shash_node *lb_node = shash_find(local_bindings, iface_id); ++ if (!ovs_list_is_empty(&lbinding->binding_lports)) { ++ struct binding_lport *b_lport = NULL; ++ b_lport = CONTAINER_OF(ovs_list_front(&lbinding->binding_lports), ++ struct binding_lport, list_node); + -+ /* If the local binding still exists (i.e., the OVS interface is -+ * still configured locally) then remove the external id and remove -+ * it from the in-flight seqno map. -+ */ -+ if (lb_node) { -+ struct local_binding *lb = lb_node->data; ++ if (b_lport->type == LP_VIF && ++ !strcmp(lbinding->name, b_lport->name)) { ++ return b_lport; ++ } ++ } + -+ if (lb->iface && smap_get(&lb->iface->external_ids, -+ OVN_INSTALLED_EXT_ID)) { -+ ovsrec_interface_update_external_ids_delkey( -+ lb->iface, OVN_INSTALLED_EXT_ID); -+ } ++ return NULL; ++} ++ ++static struct binding_lport * ++local_binding_add_lport(struct shash *binding_lports, ++ struct local_binding *lbinding, ++ const struct sbrec_port_binding *pb, ++ enum en_lport_type b_type) ++{ ++ struct binding_lport *b_lport = ++ binding_lport_find(binding_lports, pb->logical_port); ++ bool add_to_lport_list = false; ++ if (!b_lport) { ++ b_lport = binding_lport_create(pb, lbinding, b_type); ++ binding_lport_add(binding_lports, b_lport); ++ add_to_lport_list = true; ++ } else if (b_lport->lbinding != lbinding) { ++ add_to_lport_list = true; ++ if (!ovs_list_is_empty(&b_lport->list_node)) { ++ ovs_list_remove(&b_lport->list_node); + } -+ simap_find_and_delete(&binding_iface_seqno_map, iface_id); -+ sset_delete(&binding_iface_released_set, -+ SSET_NODE_FROM_NAME(iface_id)); ++ b_lport->lbinding = lbinding; ++ b_lport->type = b_type; + } + -+ bool new_ifaces = false; -+ uint32_t new_seqno = binding_iface_seqno + 1; ++ if (add_to_lport_list) { ++ if (b_type == LP_VIF) { ++ ovs_list_push_front(&lbinding->binding_lports, &b_lport->list_node); ++ } else { ++ ovs_list_push_back(&lbinding->binding_lports, &b_lport->list_node); ++ } ++ } + -+ SSET_FOR_EACH_SAFE (iface_id, iface_id_next, &binding_iface_bound_set) { -+ struct shash_node *lb_node = shash_find(local_bindings, iface_id); ++ return b_lport; ++} + -+ struct local_binding *lb = lb_node ? lb_node->data : NULL; ++/* This function handles the stale binding lports of 'lbinding' if 'lbinding' ++ * doesn't have a primary binding lport. ++ */ ++static bool ++local_binding_handle_stale_binding_lports(struct local_binding *lbinding, ++ struct binding_ctx_in *b_ctx_in, ++ struct binding_ctx_out *b_ctx_out, ++ struct hmap *qos_map) ++{ ++ /* Check if this lbinding has a primary binding_lport or not. */ ++ struct binding_lport *p_lport = local_binding_get_primary_lport(lbinding); ++ if (p_lport) { ++ /* Nothing to be done. */ ++ return true; ++ } + -+ /* Make sure the binding is still complete, i.e., both SB port_binding -+ * and OVS interface still exist. -+ * -+ * If so, then this is a newly bound interface, make sure we reset the -+ * Port_Binding 'up' field and the OVS Interface 'external-id'. -+ */ -+ if (lb && lb->pb && lb->iface) { -+ new_ifaces = true; ++ bool handled = true; ++ struct binding_lport *b_lport, *next; ++ const struct sbrec_port_binding *pb; ++ LIST_FOR_EACH_SAFE (b_lport, next, list_node, &lbinding->binding_lports) { ++ /* Get the lport type again from the pb. Its possible that the ++ * pb type has changed. */ ++ enum en_lport_type pb_lport_type = get_lport_type(b_lport->pb); ++ if (b_lport->type == LP_VIRTUAL && pb_lport_type == LP_VIRTUAL) { ++ pb = b_lport->pb; ++ binding_lport_delete(&b_ctx_out->lbinding_data->lports, ++ b_lport); ++ handled = consider_virtual_lport(pb, b_ctx_in, b_ctx_out, qos_map); ++ } else if (b_lport->type == LP_CONTAINER && ++ pb_lport_type == LP_CONTAINER) { ++ /* For container lport, binding_lport is preserved so that when ++ * the parent port is created, it can be considered. ++ * consider_container_lport() creates the binding_lport for the parent ++ * port (with iface set to NULL). */ ++ handled = consider_container_lport(b_lport->pb, b_ctx_in, b_ctx_out, qos_map); ++ } else { ++ /* This can happen when the lport type changes from one type ++ * to another. Eg. from normal lport to external. Release the ++ * lport if it was claimed earlier and delete the b_lport. */ ++ handled = release_binding_lport(b_ctx_in->chassis_rec, b_lport, ++ !b_ctx_in->ovnsb_idl_txn, ++ b_ctx_out); ++ binding_lport_delete(&b_ctx_out->lbinding_data->lports, ++ b_lport); ++ } + -+ if (smap_get(&lb->iface->external_ids, OVN_INSTALLED_EXT_ID)) { -+ ovsrec_interface_update_external_ids_delkey( -+ lb->iface, OVN_INSTALLED_EXT_ID); -+ } -+ if (lb->pb->n_up) { -+ bool up = false; -+ sbrec_port_binding_set_up(lb->pb, &up, 1); -+ } -+ simap_put(&binding_iface_seqno_map, lb->name, new_seqno); ++ if (!handled) { ++ return false; + } -+ sset_delete(&binding_iface_bound_set, SSET_NODE_FROM_NAME(iface_id)); + } + -+ /* Request a seqno update when the flows for new interfaces have been -+ * installed in OVS. -+ */ -+ if (new_ifaces) { -+ binding_iface_seqno = new_seqno; -+ ofctrl_seqno_update_create(binding_seq_type_pb_cfg, new_seqno); ++ return handled; ++} ++ ++static struct binding_lport * ++binding_lport_create(const struct sbrec_port_binding *pb, ++ struct local_binding *lbinding, ++ enum en_lport_type type) ++{ ++ struct binding_lport *b_lport = xzalloc(sizeof *b_lport); ++ b_lport->name = xstrdup(pb->logical_port); ++ b_lport->pb = pb; ++ b_lport->type = type; ++ b_lport->lbinding = lbinding; ++ ovs_list_init(&b_lport->list_node); ++ ++ return b_lport; ++} ++ ++static void ++binding_lport_add(struct shash *binding_lports, struct binding_lport *b_lport) ++{ ++ shash_add(binding_lports, b_lport->pb->logical_port, b_lport); ++} ++ ++static struct binding_lport * ++binding_lport_find(struct shash *binding_lports, const char *lport_name) ++{ ++ if (!lport_name) { ++ return NULL; ++ } ++ ++ return shash_find_data(binding_lports, lport_name); ++} ++ ++static void ++binding_lport_destroy(struct binding_lport *b_lport) ++{ ++ if (!ovs_list_is_empty(&b_lport->list_node)) { ++ ovs_list_remove(&b_lport->list_node); ++ } ++ ++ free(b_lport->name); ++ free(b_lport); ++} ++ ++static void ++binding_lport_delete(struct shash *binding_lports, ++ struct binding_lport *b_lport) ++{ ++ shash_find_and_delete(binding_lports, b_lport->name); ++ binding_lport_destroy(b_lport); ++} ++ ++static void ++binding_lport_set_up(struct binding_lport *b_lport, bool sb_readonly) ++{ ++ if (sb_readonly || !b_lport || !b_lport->pb->n_up || b_lport->pb->up[0]) { ++ return; ++ } ++ ++ bool up = true; ++ sbrec_port_binding_set_up(b_lport->pb, &up, 1); ++} ++ ++static void ++binding_lport_set_down(struct binding_lport *b_lport, bool sb_readonly) ++{ ++ if (sb_readonly || !b_lport || !b_lport->pb->n_up || !b_lport->pb->up[0]) { ++ return; ++ } ++ ++ bool up = false; ++ sbrec_port_binding_set_up(b_lport->pb, &up, 1); ++} ++ ++static const struct sbrec_port_binding * ++binding_lport_get_parent_pb(struct binding_lport *b_lport) ++{ ++ if (!b_lport) { ++ return NULL; ++ } ++ ++ if (b_lport->type == LP_VIF) { ++ return NULL; + } ++ ++ struct local_binding *lbinding = b_lport->lbinding; ++ ovs_assert(lbinding); ++ ++ struct binding_lport *parent_b_lport = ++ local_binding_get_primary_lport(lbinding); ++ ++ return parent_b_lport ? parent_b_lport->pb : NULL; +} + -+/* Processes ofctrl seqno ACKs for new bindings. Sets the -+ * 'OVN_INSTALLED_EXT_ID' external-id in the OVS interface and the -+ * Port_Binding.up field for all ports for which OVS flows have been -+ * installed. ++/* This function checks and cleans up the 'b_lport' if it is ++ * not in the correct state. ++ * ++ * If the 'b_lport' type is LP_VIF, then its name and its lbinding->name ++ * should match. Otherwise this should be cleaned up. + * -+ * NOTE: Should be called only when valid SB and OVS transactions are -+ * available. ++ * If the 'b_lport' type is LP_CONTAINER, then its parent_port name should ++ * be the same as its lbinding's name. Otherwise this should be ++ * cleaned up. ++ * ++ * If the 'b_lport' type is LP_VIRTUAL, then its virtual parent name ++ * should be the same as its lbinding's name. Otherwise this ++ * should be cleaned up. ++ * ++ * If the 'b_lport' type is not LP_VIF, LP_CONTAINER or LP_VIRTUAL, it ++ * should be cleaned up. This can happen if the CMS changes ++ * the port binding type. + */ -+void -+binding_seqno_install(struct shash *local_bindings) ++static struct binding_lport * ++binding_lport_check_and_cleanup(struct binding_lport *b_lport, ++ struct shash *binding_lports) +{ -+ struct ofctrl_acked_seqnos *acked_seqnos = -+ ofctrl_acked_seqnos_get(binding_seq_type_pb_cfg); -+ struct simap_node *node; -+ struct simap_node *node_next; ++ bool cleanup_blport = false; + -+ SIMAP_FOR_EACH_SAFE (node, node_next, &binding_iface_seqno_map) { -+ struct shash_node *lb_node = shash_find(local_bindings, node->name); -+ -+ if (!lb_node) { -+ goto del_seqno; -+ } ++ if (!b_lport->lbinding) { ++ cleanup_blport = true; ++ goto cleanup; ++ } + -+ struct local_binding *lb = lb_node->data; -+ if (!lb->pb || !lb->iface) { -+ goto del_seqno; ++ switch (b_lport->type) { ++ case LP_VIF: ++ if (strcmp(b_lport->name, b_lport->lbinding->name)) { ++ cleanup_blport = true; + } ++ break; + -+ if (!ofctrl_acked_seqnos_contains(acked_seqnos, node->data)) { -+ continue; ++ case LP_CONTAINER: ++ if (strcmp(b_lport->pb->parent_port, b_lport->lbinding->name)) { ++ cleanup_blport = true; + } ++ break; + -+ ovsrec_interface_update_external_ids_setkey(lb->iface, -+ OVN_INSTALLED_EXT_ID, -+ "true"); -+ if (lb->pb->n_up) { -+ bool up = true; -+ -+ sbrec_port_binding_set_up(lb->pb, &up, 1); -+ struct shash_node *child_node; -+ SHASH_FOR_EACH (child_node, &lb->children) { -+ struct local_binding *lb_child = child_node->data; -+ sbrec_port_binding_set_up(lb_child->pb, &up, 1); -+ } ++ case LP_VIRTUAL: ++ if (!b_lport->pb->virtual_parent || ++ strcmp(b_lport->pb->virtual_parent, b_lport->lbinding->name)) { ++ cleanup_blport = true; + } ++ break; + -+del_seqno: -+ simap_delete(&binding_iface_seqno_map, node); ++ case LP_PATCH: ++ case LP_LOCALPORT: ++ case LP_VTEP: ++ case LP_L2GATEWAY: ++ case LP_L3GATEWAY: ++ case LP_CHASSISREDIRECT: ++ case LP_EXTERNAL: ++ case LP_LOCALNET: ++ case LP_REMOTE: ++ case LP_UNKNOWN: ++ cleanup_blport = true; + } + -+ ofctrl_acked_seqnos_destroy(acked_seqnos); -+} ++cleanup: ++ if (cleanup_blport) { ++ binding_lport_delete(binding_lports, b_lport); ++ return NULL; ++ } + -+void -+binding_seqno_flush(void) -+{ -+ simap_clear(&binding_iface_seqno_map); ++ return b_lport; +} diff --git a/controller/binding.h b/controller/binding.h -index c9740560f..c9ebef4b1 100644 +index c9740560f..7a6495320 100644 --- a/controller/binding.h +++ b/controller/binding.h -@@ -100,6 +100,7 @@ struct local_binding { +@@ -36,6 +36,8 @@ struct sbrec_chassis; + struct sbrec_port_binding_table; + struct sset; + struct sbrec_port_binding; ++struct ds; ++struct if_status_mgr; + + struct binding_ctx_in { + struct ovsdb_idl_txn *ovnsb_idl_txn; +@@ -56,7 +58,7 @@ struct binding_ctx_in { + + struct binding_ctx_out { + struct hmap *local_datapaths; +- struct shash *local_bindings; ++ struct local_binding_data *lbinding_data; + + /* sset of (potential) local lports. */ + struct sset *local_lports; +@@ -84,29 +86,26 @@ struct binding_ctx_out { + * binding_handle_port_binding_changes) fills in for + * the changed datapaths and port bindings. */ + struct hmap *tracked_dp_bindings; +-}; + +-enum local_binding_type { +- BT_VIF, +- BT_CONTAINER, +- BT_VIRTUAL ++ struct if_status_mgr *if_mgr; + }; - /* shash of 'struct local_binding' representing children. */ - struct shash children; -+ struct local_binding *parent; +-struct local_binding { +- char *name; +- enum local_binding_type type; +- const struct ovsrec_interface *iface; +- const struct sbrec_port_binding *pb; +- +- /* shash of 'struct local_binding' representing children. */ +- struct shash children; ++struct local_binding_data { ++ struct shash bindings; ++ struct shash lports; }; - static inline struct local_binding * -@@ -134,4 +135,9 @@ bool binding_handle_ovs_interface_changes(struct binding_ctx_in *, +-static inline struct local_binding * +-local_binding_find(struct shash *local_bindings, const char *name) +-{ +- return shash_find_data(local_bindings, name); +-} ++void local_binding_data_init(struct local_binding_data *); ++void local_binding_data_destroy(struct local_binding_data *); ++ ++const struct sbrec_port_binding *local_binding_get_primary_pb( ++ struct shash *local_bindings, const char *pb_name); ++bool local_binding_is_up(struct shash *local_bindings, const char *pb_name); ++bool local_binding_is_down(struct shash *local_bindings, const char *pb_name); ++void local_binding_set_up(struct shash *local_bindings, const char *pb_name, ++ bool sb_readonly, bool ovs_readonly); ++void local_binding_set_down(struct shash *local_bindings, const char *pb_name, ++ bool sb_readonly, bool ovs_readonly); + + /* Represents a tracked binding logical port. */ + struct tracked_binding_lport { +@@ -127,11 +126,11 @@ bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn, + const struct sbrec_port_binding_table *, + const struct sbrec_chassis *); + +-void local_bindings_init(struct shash *local_bindings); +-void local_bindings_destroy(struct shash *local_bindings); + bool binding_handle_ovs_interface_changes(struct binding_ctx_in *, + struct binding_ctx_out *); bool binding_handle_port_binding_changes(struct binding_ctx_in *, struct binding_ctx_out *); void binding_tracked_dp_destroy(struct hmap *tracked_datapaths); + -+void binding_init(void); -+void binding_seqno_run(struct shash *local_bindings); -+void binding_seqno_install(struct shash *local_bindings); -+void binding_seqno_flush(void); ++void binding_dump_local_bindings(struct local_binding_data *, struct ds *); #endif /* controller/binding.h */ diff --git a/controller/chassis.c b/controller/chassis.c -index b4d4b0e37..0937e33e6 100644 +index b4d4b0e37..310132d09 100644 --- a/controller/chassis.c +++ b/controller/chassis.c @@ -28,6 +28,7 @@ @@ -896,7 +2294,61 @@ index b4d4b0e37..0937e33e6 100644 VLOG_DEFINE_THIS_MODULE(chassis); -@@ -293,6 +294,7 @@ chassis_build_other_config(struct smap *config, const char *bridge_mappings, +@@ -49,6 +50,8 @@ struct ovs_chassis_cfg { + const char *monitor_all; + const char *chassis_macs; + const char *enable_lflow_cache; ++ const char *limit_lflow_cache; ++ const char *memlimit_lflow_cache; + + /* Set of encap types parsed from the 'ovn-encap-type' external-id. */ + struct sset encap_type_set; +@@ -134,6 +137,18 @@ get_enable_lflow_cache(const struct smap *ext_ids) + return smap_get_def(ext_ids, "ovn-enable-lflow-cache", "true"); + } + ++static const char * ++get_limit_lflow_cache(const struct smap *ext_ids) ++{ ++ return smap_get_def(ext_ids, "ovn-limit-lflow-cache", ""); ++} ++ ++static const char * ++get_memlimit_lflow_cache(const struct smap *ext_ids) ++{ ++ return smap_get_def(ext_ids, "ovn-memlimit-lflow-cache-kb", ""); ++} ++ + static const char * + get_encap_csum(const struct smap *ext_ids) + { +@@ -256,6 +271,9 @@ chassis_parse_ovs_config(const struct ovsrec_open_vswitch_table *ovs_table, + ovs_cfg->monitor_all = get_monitor_all(&cfg->external_ids); + ovs_cfg->chassis_macs = get_chassis_mac_mappings(&cfg->external_ids); + ovs_cfg->enable_lflow_cache = get_enable_lflow_cache(&cfg->external_ids); ++ ovs_cfg->limit_lflow_cache = get_limit_lflow_cache(&cfg->external_ids); ++ ovs_cfg->memlimit_lflow_cache = ++ get_memlimit_lflow_cache(&cfg->external_ids); + + if (!chassis_parse_ovs_encap_type(encap_type, &ovs_cfg->encap_type_set)) { + return false; +@@ -283,16 +301,22 @@ chassis_build_other_config(struct smap *config, const char *bridge_mappings, + const char *datapath_type, const char *cms_options, + const char *monitor_all, const char *chassis_macs, + const char *iface_types, +- const char *enable_lflow_cache, bool is_interconn) ++ const char *enable_lflow_cache, ++ const char *limit_lflow_cache, ++ const char *memlimit_lflow_cache, ++ bool is_interconn) + { + smap_replace(config, "ovn-bridge-mappings", bridge_mappings); + smap_replace(config, "datapath-type", datapath_type); + smap_replace(config, "ovn-cms-options", cms_options); + smap_replace(config, "ovn-monitor-all", monitor_all); + smap_replace(config, "ovn-enable-lflow-cache", enable_lflow_cache); ++ smap_replace(config, "ovn-limit-lflow-cache", limit_lflow_cache); ++ smap_replace(config, "ovn-memlimit-lflow-cache-kb", memlimit_lflow_cache); smap_replace(config, "iface-types", iface_types); smap_replace(config, "ovn-chassis-mac-mappings", chassis_macs); smap_replace(config, "is-interconn", is_interconn ? "true" : "false"); @@ -904,24 +2356,1008 @@ index b4d4b0e37..0937e33e6 100644 } /* -@@ -363,6 +365,11 @@ chassis_other_config_changed(const char *bridge_mappings, +@@ -305,6 +329,8 @@ chassis_other_config_changed(const char *bridge_mappings, + const char *monitor_all, + const char *chassis_macs, + const char *enable_lflow_cache, ++ const char *limit_lflow_cache, ++ const char *memlimit_lflow_cache, + const struct ds *iface_types, + bool is_interconn, + const struct sbrec_chassis *chassis_rec) +@@ -344,6 +370,20 @@ chassis_other_config_changed(const char *bridge_mappings, return true; } -+ if (!smap_get_bool(&chassis_rec->other_config, OVN_FEATURE_PORT_UP_NOTIF, -+ false)) { ++ const char *chassis_limit_lflow_cache = ++ get_limit_lflow_cache(&chassis_rec->other_config); ++ ++ if (strcmp(limit_lflow_cache, chassis_limit_lflow_cache)) { + return true; + } + - return false; - } - -diff --git a/controller/lflow.c b/controller/lflow.c -index c02585b1e..76a4deaa0 100644 ---- a/controller/lflow.c -+++ b/controller/lflow.c -@@ -88,6 +88,11 @@ static void lflow_resource_destroy_lflow(struct lflow_resource_ref *, - static bool ++ const char *chassis_memlimit_lflow_cache = ++ get_memlimit_lflow_cache(&chassis_rec->other_config); ++ ++ if (strcmp(memlimit_lflow_cache, chassis_memlimit_lflow_cache)) { ++ return true; ++ } ++ + const char *chassis_mac_mappings = + get_chassis_mac_mappings(&chassis_rec->other_config); + if (strcmp(chassis_macs, chassis_mac_mappings)) { +@@ -363,6 +403,11 @@ chassis_other_config_changed(const char *bridge_mappings, + return true; + } + ++ if (!smap_get_bool(&chassis_rec->other_config, OVN_FEATURE_PORT_UP_NOTIF, ++ false)) { ++ return true; ++ } ++ + return false; + } + +@@ -523,6 +568,8 @@ chassis_update(const struct sbrec_chassis *chassis_rec, + ovs_cfg->monitor_all, + ovs_cfg->chassis_macs, + ovs_cfg->enable_lflow_cache, ++ ovs_cfg->limit_lflow_cache, ++ ovs_cfg->memlimit_lflow_cache, + &ovs_cfg->iface_types, + ovs_cfg->is_interconn, + chassis_rec)) { +@@ -536,6 +583,8 @@ chassis_update(const struct sbrec_chassis *chassis_rec, + ovs_cfg->chassis_macs, + ds_cstr_ro(&ovs_cfg->iface_types), + ovs_cfg->enable_lflow_cache, ++ ovs_cfg->limit_lflow_cache, ++ ovs_cfg->memlimit_lflow_cache, + ovs_cfg->is_interconn); + sbrec_chassis_verify_other_config(chassis_rec); + sbrec_chassis_set_other_config(chassis_rec, &other_config); +diff --git a/controller/if-status.c b/controller/if-status.c +new file mode 100644 +index 000000000..8d8c8d436 +--- /dev/null ++++ b/controller/if-status.c +@@ -0,0 +1,415 @@ ++/* Copyright (c) 2021, Red Hat, Inc. ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at: ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++#include ++ ++#include "binding.h" ++#include "if-status.h" ++#include "ofctrl-seqno.h" ++ ++#include "lib/hmapx.h" ++#include "lib/util.h" ++#include "openvswitch/vlog.h" ++ ++VLOG_DEFINE_THIS_MODULE(if_status); ++ ++/* This module implements an interface manager that maintains the state of ++ * the interfaces wrt. their flows being completely installed in OVS and ++ * their corresponding bindings being marked up/down. ++ * ++ * A state machine is maintained for each interface. ++ * ++ * Transitions are triggered between states by three types of events: ++ * A. Events received from the binding module: ++ * - interface is claimed: if_status_mgr_claim_iface() ++ * - interface is released: if_status_mgr_release_iface() ++ * - interface is deleted: if_status_mgr_delete_iface() ++ * ++ * B. At every iteration, based on SB/OVS updates, handled in ++ * if_status_mgr_update(): ++ * - an interface binding has been marked "up" both in the Southbound and OVS ++ * databases. ++ * - an interface binding has been marked "down" both in the Southbound and OVS ++ * databases. ++ * - new interface has been claimed. ++ * ++ * C. At every iteration, based on ofctrl_seqno updates, handled in ++ * if_status_mgr_run(): ++ * - the flows for a previously claimed interface have been installed in OVS. ++ */ ++ ++enum if_state { ++ OIF_CLAIMED, /* Newly claimed interface. */ ++ OIF_INSTALL_FLOWS, /* Already claimed interface for which flows are still ++ * being installed. ++ */ ++ OIF_MARK_UP, /* Interface with flows successfully installed in OVS ++ * but not yet marked "up" in the binding module (in ++ * SB and OVS databases). ++ */ ++ OIF_MARK_DOWN, /* Released interface but not yet marked "down" in the ++ * binding module (in SB and/or OVS databases). ++ */ ++ OIF_INSTALLED, /* Interface flows programmed in OVS and binding marked ++ * "up" in the binding module. ++ */ ++ OIF_MAX, ++}; ++ ++static const char *if_state_names[] = { ++ [OIF_CLAIMED] = "CLAIMED", ++ [OIF_INSTALL_FLOWS] = "INSTALL_FLOWS", ++ [OIF_MARK_UP] = "MARK_UP", ++ [OIF_MARK_DOWN] = "MARK_DOWN", ++ [OIF_INSTALLED] = "INSTALLED", ++}; ++ ++struct ovs_iface { ++ char *id; /* Extracted from OVS external_ids.iface_id. */ ++ enum if_state state; /* State of the interface in the state machine. */ ++ uint32_t install_seqno; /* Seqno at which this interface is expected to ++ * be fully programmed in OVS. Only used in state ++ * OIF_INSTALL_FLOWS. ++ */ ++}; ++ ++/* State machine manager for all local OVS interfaces. */ ++struct if_status_mgr { ++ /* All local interfaces, mapping from 'iface-id' to 'struct ovs_iface'. */ ++ struct shash ifaces; ++ ++ /* All local interfaces, stored per state. */ ++ struct hmapx ifaces_per_state[OIF_MAX]; ++ ++ /* Registered ofctrl seqno type for port_binding flow installation. */ ++ size_t iface_seq_type_pb_cfg; ++ ++ /* Interface specific seqno to be acked by ofctrl when flows for new ++ * interfaces have been installed. ++ */ ++ uint32_t iface_seqno; ++}; ++ ++static struct ovs_iface *ovs_iface_create(struct if_status_mgr *, ++ const char *iface_id, ++ enum if_state ); ++static void ovs_iface_destroy(struct if_status_mgr *, struct ovs_iface *); ++static void ovs_iface_set_state(struct if_status_mgr *, struct ovs_iface *, ++ enum if_state); ++ ++static void if_status_mgr_update_bindings( ++ struct if_status_mgr *mgr, struct local_binding_data *binding_data, ++ bool sb_readonly, bool ovs_readonly); ++ ++struct if_status_mgr * ++if_status_mgr_create(void) ++{ ++ struct if_status_mgr *mgr = xzalloc(sizeof *mgr); ++ ++ mgr->iface_seq_type_pb_cfg = ofctrl_seqno_add_type(); ++ for (size_t i = 0; i < ARRAY_SIZE(mgr->ifaces_per_state); i++) { ++ hmapx_init(&mgr->ifaces_per_state[i]); ++ } ++ shash_init(&mgr->ifaces); ++ return mgr; ++} ++ ++void ++if_status_mgr_clear(struct if_status_mgr *mgr) ++{ ++ struct shash_node *node_next; ++ struct shash_node *node; ++ ++ SHASH_FOR_EACH_SAFE (node, node_next, &mgr->ifaces) { ++ ovs_iface_destroy(mgr, node->data); ++ } ++ ovs_assert(shash_is_empty(&mgr->ifaces)); ++ ++ for (size_t i = 0; i < ARRAY_SIZE(mgr->ifaces_per_state); i++) { ++ ovs_assert(hmapx_is_empty(&mgr->ifaces_per_state[i])); ++ } ++} ++ ++void ++if_status_mgr_destroy(struct if_status_mgr *mgr) ++{ ++ if_status_mgr_clear(mgr); ++ shash_destroy(&mgr->ifaces); ++ for (size_t i = 0; i < ARRAY_SIZE(mgr->ifaces_per_state); i++) { ++ hmapx_destroy(&mgr->ifaces_per_state[i]); ++ } ++ free(mgr); ++} ++ ++void ++if_status_mgr_claim_iface(struct if_status_mgr *mgr, const char *iface_id) ++{ ++ struct ovs_iface *iface = shash_find_data(&mgr->ifaces, iface_id); ++ ++ if (!iface) { ++ iface = ovs_iface_create(mgr, iface_id, OIF_CLAIMED); ++ } ++ ++ switch (iface->state) { ++ case OIF_CLAIMED: ++ case OIF_INSTALL_FLOWS: ++ case OIF_MARK_UP: ++ /* Nothing to do here. */ ++ break; ++ case OIF_INSTALLED: ++ case OIF_MARK_DOWN: ++ ovs_iface_set_state(mgr, iface, OIF_CLAIMED); ++ break; ++ case OIF_MAX: ++ OVS_NOT_REACHED(); ++ break; ++ } ++} ++ ++void ++if_status_mgr_release_iface(struct if_status_mgr *mgr, const char *iface_id) ++{ ++ struct ovs_iface *iface = shash_find_data(&mgr->ifaces, iface_id); ++ ++ if (!iface) { ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); ++ VLOG_WARN_RL(&rl, "Trying to release unknown interface %s", iface_id); ++ return; ++ } ++ ++ switch (iface->state) { ++ case OIF_CLAIMED: ++ case OIF_INSTALL_FLOWS: ++ /* Not yet fully installed interfaces can be safely deleted. */ ++ ovs_iface_destroy(mgr, iface); ++ break; ++ case OIF_MARK_UP: ++ case OIF_INSTALLED: ++ /* Properly mark interfaces "down" if their flows were already ++ * programmed in OVS. ++ */ ++ ovs_iface_set_state(mgr, iface, OIF_MARK_DOWN); ++ break; ++ case OIF_MARK_DOWN: ++ /* Nothing to do here. */ ++ break; ++ case OIF_MAX: ++ OVS_NOT_REACHED(); ++ break; ++ } ++} ++ ++void ++if_status_mgr_delete_iface(struct if_status_mgr *mgr, const char *iface_id) ++{ ++ struct ovs_iface *iface = shash_find_data(&mgr->ifaces, iface_id); ++ ++ if (!iface) { ++ return; ++ } ++ ++ switch (iface->state) { ++ case OIF_CLAIMED: ++ case OIF_INSTALL_FLOWS: ++ /* Not yet fully installed interfaces can be safely deleted. */ ++ ovs_iface_destroy(mgr, iface); ++ break; ++ case OIF_MARK_UP: ++ case OIF_INSTALLED: ++ /* Properly mark interfaces "down" if their flows were already ++ * programmed in OVS. ++ */ ++ ovs_iface_set_state(mgr, iface, OIF_MARK_DOWN); ++ break; ++ case OIF_MARK_DOWN: ++ /* Nothing to do here. */ ++ break; ++ case OIF_MAX: ++ OVS_NOT_REACHED(); ++ break; ++ } ++} ++ ++void ++if_status_mgr_update(struct if_status_mgr *mgr, ++ struct local_binding_data *binding_data) ++{ ++ if (!binding_data) { ++ return; ++ } ++ ++ struct shash *bindings = &binding_data->bindings; ++ struct hmapx_node *node_next; ++ struct hmapx_node *node; ++ ++ /* Move all interfaces that have been confirmed "up" by the binding module, ++ * from OIF_MARK_UP to OIF_INSTALLED. ++ */ ++ HMAPX_FOR_EACH_SAFE (node, node_next, ++ &mgr->ifaces_per_state[OIF_MARK_UP]) { ++ struct ovs_iface *iface = node->data; ++ ++ if (local_binding_is_up(bindings, iface->id)) { ++ ovs_iface_set_state(mgr, iface, OIF_INSTALLED); ++ } ++ } ++ ++ /* Cleanup all interfaces that have been confirmed "down" by the binding ++ * module. ++ */ ++ HMAPX_FOR_EACH_SAFE (node, node_next, ++ &mgr->ifaces_per_state[OIF_MARK_DOWN]) { ++ struct ovs_iface *iface = node->data; ++ ++ if (local_binding_is_down(bindings, iface->id)) { ++ ovs_iface_destroy(mgr, iface); ++ } ++ } ++ ++ /* Register for a notification about flows being installed in OVS for all ++ * newly claimed interfaces. ++ * ++ * Move them from OIF_CLAIMED to OIF_INSTALL_FLOWS. ++ */ ++ bool new_ifaces = false; ++ HMAPX_FOR_EACH_SAFE (node, node_next, ++ &mgr->ifaces_per_state[OIF_CLAIMED]) { ++ struct ovs_iface *iface = node->data; ++ ++ ovs_iface_set_state(mgr, iface, OIF_INSTALL_FLOWS); ++ iface->install_seqno = mgr->iface_seqno + 1; ++ new_ifaces = true; ++ } ++ ++ /* Request a seqno update when the flows for new interfaces have been ++ * installed in OVS. ++ */ ++ if (new_ifaces) { ++ mgr->iface_seqno++; ++ ofctrl_seqno_update_create(mgr->iface_seq_type_pb_cfg, ++ mgr->iface_seqno); ++ VLOG_DBG("Seqno requested: %"PRIu32, mgr->iface_seqno); ++ } ++} ++ ++void ++if_status_mgr_run(struct if_status_mgr *mgr, ++ struct local_binding_data *binding_data, ++ bool sb_readonly, bool ovs_readonly) ++{ ++ struct ofctrl_acked_seqnos *acked_seqnos = ++ ofctrl_acked_seqnos_get(mgr->iface_seq_type_pb_cfg); ++ struct hmapx_node *node_next; ++ struct hmapx_node *node; ++ ++ /* Move interfaces from state OIF_INSTALL_FLOWS to OIF_MARK_UP if a ++ * notification has been received aabout their flows being installed ++ * in OVS. ++ */ ++ HMAPX_FOR_EACH_SAFE (node, node_next, ++ &mgr->ifaces_per_state[OIF_INSTALL_FLOWS]) { ++ struct ovs_iface *iface = node->data; ++ ++ if (!ofctrl_acked_seqnos_contains(acked_seqnos, ++ iface->install_seqno)) { ++ continue; ++ } ++ ovs_iface_set_state(mgr, iface, OIF_MARK_UP); ++ } ++ ofctrl_acked_seqnos_destroy(acked_seqnos); ++ ++ /* Update binding states. */ ++ if_status_mgr_update_bindings(mgr, binding_data, sb_readonly, ++ ovs_readonly); ++} ++ ++static struct ovs_iface * ++ovs_iface_create(struct if_status_mgr *mgr, const char *iface_id, ++ enum if_state state) ++{ ++ struct ovs_iface *iface = xzalloc(sizeof *iface); ++ ++ VLOG_DBG("Interface %s create.", iface->id); ++ iface->id = xstrdup(iface_id); ++ shash_add(&mgr->ifaces, iface_id, iface); ++ ovs_iface_set_state(mgr, iface, state); ++ return iface; ++} ++ ++static void ++ovs_iface_destroy(struct if_status_mgr *mgr, struct ovs_iface *iface) ++{ ++ VLOG_DBG("Interface %s destroy: state %s", iface->id, ++ if_state_names[iface->state]); ++ hmapx_find_and_delete(&mgr->ifaces_per_state[iface->state], iface); ++ shash_find_and_delete(&mgr->ifaces, iface->id); ++ free(iface->id); ++ free(iface); ++} ++ ++static void ++ovs_iface_set_state(struct if_status_mgr *mgr, struct ovs_iface *iface, ++ enum if_state state) ++{ ++ VLOG_DBG("Interface %s set state: old %s, new %s", iface->id, ++ if_state_names[iface->state], ++ if_state_names[state]); ++ ++ hmapx_find_and_delete(&mgr->ifaces_per_state[iface->state], iface); ++ iface->state = state; ++ hmapx_add(&mgr->ifaces_per_state[iface->state], iface); ++ iface->install_seqno = 0; ++} ++ ++static void ++if_status_mgr_update_bindings(struct if_status_mgr *mgr, ++ struct local_binding_data *binding_data, ++ bool sb_readonly, bool ovs_readonly) ++{ ++ if (!binding_data) { ++ return; ++ } ++ ++ struct shash *bindings = &binding_data->bindings; ++ struct hmapx_node *node; ++ ++ /* Notify the binding module to set "down" all bindings that are still ++ * in the process of being installed in OVS, i.e., are not yet instsalled. ++ */ ++ HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_INSTALL_FLOWS]) { ++ struct ovs_iface *iface = node->data; ++ ++ local_binding_set_down(bindings, iface->id, sb_readonly, ovs_readonly); ++ } ++ ++ /* Notifiy the binding module to set "up" all bindings that have had ++ * their flows installed but are not yet marked "up" in the binding ++ * module. ++ */ ++ HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_UP]) { ++ struct ovs_iface *iface = node->data; ++ ++ local_binding_set_up(bindings, iface->id, sb_readonly, ovs_readonly); ++ } ++ ++ /* Notify the binding module to set "down" all bindings that have been ++ * released but are not yet marked as "down" in the binding module. ++ */ ++ HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_DOWN]) { ++ struct ovs_iface *iface = node->data; ++ ++ local_binding_set_down(bindings, iface->id, sb_readonly, ovs_readonly); ++ } ++} +diff --git a/controller/if-status.h b/controller/if-status.h +new file mode 100644 +index 000000000..51fe7c684 +--- /dev/null ++++ b/controller/if-status.h +@@ -0,0 +1,37 @@ ++/* Copyright (c) 2021, Red Hat, Inc. ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at: ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++#ifndef IF_STATUS_H ++#define IF_STATUS_H 1 ++ ++#include "openvswitch/shash.h" ++ ++#include "binding.h" ++ ++struct if_status_mgr; ++ ++struct if_status_mgr *if_status_mgr_create(void); ++void if_status_mgr_clear(struct if_status_mgr *); ++void if_status_mgr_destroy(struct if_status_mgr *); ++ ++void if_status_mgr_claim_iface(struct if_status_mgr *, const char *iface_id); ++void if_status_mgr_release_iface(struct if_status_mgr *, const char *iface_id); ++void if_status_mgr_delete_iface(struct if_status_mgr *, const char *iface_id); ++ ++void if_status_mgr_update(struct if_status_mgr *, struct local_binding_data *); ++void if_status_mgr_run(struct if_status_mgr *mgr, struct local_binding_data *, ++ bool sb_readonly, bool ovs_readonly); ++ ++# endif /* controller/if-status.h */ +diff --git a/controller/lflow-cache.c b/controller/lflow-cache.c +new file mode 100644 +index 000000000..56ddf1075 +--- /dev/null ++++ b/controller/lflow-cache.c +@@ -0,0 +1,371 @@ ++/* ++ * Copyright (c) 2015, 2016 Nicira, Inc. ++ * Copyright (c) 2021, Red Hat, Inc. ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at: ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++#include ++ ++#if HAVE_DECL_MALLOC_TRIM ++#include ++#endif ++ ++#include "coverage.h" ++#include "lflow-cache.h" ++#include "lib/uuid.h" ++#include "ovn/expr.h" ++ ++COVERAGE_DEFINE(lflow_cache_flush); ++COVERAGE_DEFINE(lflow_cache_add_conj_id); ++COVERAGE_DEFINE(lflow_cache_add_expr); ++COVERAGE_DEFINE(lflow_cache_add_matches); ++COVERAGE_DEFINE(lflow_cache_free_conj_id); ++COVERAGE_DEFINE(lflow_cache_free_expr); ++COVERAGE_DEFINE(lflow_cache_free_matches); ++COVERAGE_DEFINE(lflow_cache_add); ++COVERAGE_DEFINE(lflow_cache_hit); ++COVERAGE_DEFINE(lflow_cache_miss); ++COVERAGE_DEFINE(lflow_cache_delete); ++COVERAGE_DEFINE(lflow_cache_full); ++COVERAGE_DEFINE(lflow_cache_mem_full); ++COVERAGE_DEFINE(lflow_cache_made_room); ++ ++static const char *lflow_cache_type_names[LCACHE_T_MAX] = { ++ [LCACHE_T_CONJ_ID] = "cache-conj-id", ++ [LCACHE_T_EXPR] = "cache-expr", ++ [LCACHE_T_MATCHES] = "cache-matches", ++}; ++ ++struct lflow_cache { ++ struct hmap entries[LCACHE_T_MAX]; ++ uint32_t capacity; ++ uint64_t mem_usage; ++ uint64_t max_mem_usage; ++ bool enabled; ++}; ++ ++struct lflow_cache_entry { ++ struct hmap_node node; ++ struct uuid lflow_uuid; /* key */ ++ size_t size; ++ ++ struct lflow_cache_value value; ++}; ++ ++static size_t lflow_cache_n_entries__(const struct lflow_cache *lc); ++static bool lflow_cache_make_room__(struct lflow_cache *lc, ++ enum lflow_cache_type type); ++static struct lflow_cache_value *lflow_cache_add__( ++ struct lflow_cache *lc, const struct uuid *lflow_uuid, ++ enum lflow_cache_type type, uint64_t value_size); ++static void lflow_cache_delete__(struct lflow_cache *lc, ++ struct lflow_cache_entry *lce); ++ ++struct lflow_cache * ++lflow_cache_create(void) ++{ ++ struct lflow_cache *lc = xmalloc(sizeof *lc); ++ ++ for (size_t i = 0; i < LCACHE_T_MAX; i++) { ++ hmap_init(&lc->entries[i]); ++ } ++ ++ lc->enabled = true; ++ lc->mem_usage = 0; ++ return lc; ++} ++ ++void ++lflow_cache_flush(struct lflow_cache *lc) ++{ ++ if (!lc) { ++ return; ++ } ++ ++ COVERAGE_INC(lflow_cache_flush); ++ for (size_t i = 0; i < LCACHE_T_MAX; i++) { ++ struct lflow_cache_entry *lce; ++ struct lflow_cache_entry *lce_next; ++ ++ HMAP_FOR_EACH_SAFE (lce, lce_next, node, &lc->entries[i]) { ++ lflow_cache_delete__(lc, lce); ++ } ++ hmap_shrink(&lc->entries[i]); ++ } ++ ++#if HAVE_DECL_MALLOC_TRIM ++ malloc_trim(0); ++#endif ++} ++ ++void ++lflow_cache_destroy(struct lflow_cache *lc) ++{ ++ if (!lc) { ++ return; ++ } ++ ++ lflow_cache_flush(lc); ++ for (size_t i = 0; i < LCACHE_T_MAX; i++) { ++ hmap_destroy(&lc->entries[i]); ++ } ++ free(lc); ++} ++ ++void ++lflow_cache_enable(struct lflow_cache *lc, bool enabled, uint32_t capacity, ++ uint64_t max_mem_usage_kb) ++{ ++ if (!lc) { ++ return; ++ } ++ ++ uint64_t max_mem_usage = max_mem_usage_kb * 1024; ++ ++ if ((lc->enabled && !enabled) ++ || capacity < lflow_cache_n_entries__(lc) ++ || max_mem_usage < lc->mem_usage) { ++ lflow_cache_flush(lc); ++ } ++ ++ lc->enabled = enabled; ++ lc->capacity = capacity; ++ lc->max_mem_usage = max_mem_usage; ++} ++ ++bool ++lflow_cache_is_enabled(const struct lflow_cache *lc) ++{ ++ return lc && lc->enabled; ++} ++ ++void ++lflow_cache_get_stats(const struct lflow_cache *lc, struct ds *output) ++{ ++ if (!output) { ++ return; ++ } ++ ++ if (!lc) { ++ ds_put_cstr(output, "Invalid arguments."); ++ return; ++ } ++ ++ ds_put_format(output, "Enabled: %s\n", ++ lflow_cache_is_enabled(lc) ? "true" : "false"); ++ for (size_t i = 0; i < LCACHE_T_MAX; i++) { ++ ds_put_format(output, "%-16s: %"PRIuSIZE"\n", ++ lflow_cache_type_names[i], ++ hmap_count(&lc->entries[i])); ++ } ++ ds_put_format(output, "%-16s: %"PRIu64"\n", "Mem usage (KB)", ++ ROUND_UP(lc->mem_usage, 1024) / 1024); ++} ++ ++void ++lflow_cache_add_conj_id(struct lflow_cache *lc, const struct uuid *lflow_uuid, ++ uint32_t conj_id_ofs) ++{ ++ struct lflow_cache_value *lcv = ++ lflow_cache_add__(lc, lflow_uuid, LCACHE_T_CONJ_ID, 0); ++ ++ if (!lcv) { ++ return; ++ } ++ COVERAGE_INC(lflow_cache_add_conj_id); ++ lcv->conj_id_ofs = conj_id_ofs; ++} ++ ++void ++lflow_cache_add_expr(struct lflow_cache *lc, const struct uuid *lflow_uuid, ++ uint32_t conj_id_ofs, struct expr *expr, size_t expr_sz) ++{ ++ struct lflow_cache_value *lcv = ++ lflow_cache_add__(lc, lflow_uuid, LCACHE_T_EXPR, expr_sz); ++ ++ if (!lcv) { ++ expr_destroy(expr); ++ return; ++ } ++ COVERAGE_INC(lflow_cache_add_expr); ++ lcv->conj_id_ofs = conj_id_ofs; ++ lcv->expr = expr; ++} ++ ++void ++lflow_cache_add_matches(struct lflow_cache *lc, const struct uuid *lflow_uuid, ++ struct hmap *matches, size_t matches_sz) ++{ ++ struct lflow_cache_value *lcv = ++ lflow_cache_add__(lc, lflow_uuid, LCACHE_T_MATCHES, matches_sz); ++ ++ if (!lcv) { ++ expr_matches_destroy(matches); ++ free(matches); ++ return; ++ } ++ COVERAGE_INC(lflow_cache_add_matches); ++ lcv->expr_matches = matches; ++} ++ ++struct lflow_cache_value * ++lflow_cache_get(struct lflow_cache *lc, const struct uuid *lflow_uuid) ++{ ++ if (!lflow_cache_is_enabled(lc)) { ++ return NULL; ++ } ++ ++ size_t hash = uuid_hash(lflow_uuid); ++ ++ for (size_t i = 0; i < LCACHE_T_MAX; i++) { ++ struct lflow_cache_entry *lce; ++ ++ HMAP_FOR_EACH_WITH_HASH (lce, node, hash, &lc->entries[i]) { ++ if (uuid_equals(&lce->lflow_uuid, lflow_uuid)) { ++ COVERAGE_INC(lflow_cache_hit); ++ return &lce->value; ++ } ++ } ++ } ++ COVERAGE_INC(lflow_cache_miss); ++ return NULL; ++} ++ ++void ++lflow_cache_delete(struct lflow_cache *lc, const struct uuid *lflow_uuid) ++{ ++ if (!lc) { ++ return; ++ } ++ ++ struct lflow_cache_value *lcv = lflow_cache_get(lc, lflow_uuid); ++ if (lcv) { ++ COVERAGE_INC(lflow_cache_delete); ++ lflow_cache_delete__(lc, CONTAINER_OF(lcv, struct lflow_cache_entry, ++ value)); ++ } ++} ++ ++static size_t ++lflow_cache_n_entries__(const struct lflow_cache *lc) ++{ ++ size_t n_entries = 0; ++ ++ for (size_t i = 0; i < LCACHE_T_MAX; i++) { ++ n_entries += hmap_count(&lc->entries[i]); ++ } ++ return n_entries; ++} ++ ++static bool ++lflow_cache_make_room__(struct lflow_cache *lc, enum lflow_cache_type type) ++{ ++ /* When the cache becomes full, the rule is to prefer more "important" ++ * cache entries over less "important" ones. That is, evict entries of ++ * type LCACHE_T_CONJ_ID if there's no room to add an entry of type ++ * LCACHE_T_EXPR. Similarly, evict entries of type LCACHE_T_CONJ_ID or ++ * LCACHE_T_EXPR if there's no room to add an entry of type ++ * LCACHE_T_MATCHES. ++ */ ++ for (size_t i = 0; i < type; i++) { ++ if (hmap_count(&lc->entries[i]) > 0) { ++ struct lflow_cache_entry *lce = ++ CONTAINER_OF(hmap_first(&lc->entries[i]), ++ struct lflow_cache_entry, node); ++ ++ lflow_cache_delete__(lc, lce); ++ return true; ++ } ++ } ++ return false; ++} ++ ++void ++lflow_cache_get_memory_usage(const struct lflow_cache *lc, struct simap *usage) ++{ ++ for (size_t i = 0; i < LCACHE_T_MAX; i++) { ++ char *counter_name = xasprintf("lflow-cache-entries-%s", ++ lflow_cache_type_names[i]); ++ simap_increase(usage, counter_name, hmap_count(&lc->entries[i])); ++ free(counter_name); ++ } ++ simap_increase(usage, "lflow-cache-size-KB", ++ ROUND_UP(lc->mem_usage, 1024) / 1024); ++} ++ ++static struct lflow_cache_value * ++lflow_cache_add__(struct lflow_cache *lc, const struct uuid *lflow_uuid, ++ enum lflow_cache_type type, uint64_t value_size) ++{ ++ if (!lflow_cache_is_enabled(lc) || !lflow_uuid) { ++ return NULL; ++ } ++ ++ struct lflow_cache_entry *lce; ++ size_t size = sizeof *lce + value_size; ++ if (size + lc->mem_usage > lc->max_mem_usage) { ++ COVERAGE_INC(lflow_cache_mem_full); ++ return NULL; ++ } ++ ++ if (lflow_cache_n_entries__(lc) == lc->capacity) { ++ if (!lflow_cache_make_room__(lc, type)) { ++ COVERAGE_INC(lflow_cache_full); ++ return NULL; ++ } else { ++ COVERAGE_INC(lflow_cache_made_room); ++ } ++ } ++ ++ lc->mem_usage += size; ++ ++ COVERAGE_INC(lflow_cache_add); ++ lce = xzalloc(sizeof *lce); ++ lce->lflow_uuid = *lflow_uuid; ++ lce->size = size; ++ lce->value.type = type; ++ hmap_insert(&lc->entries[type], &lce->node, uuid_hash(lflow_uuid)); ++ return &lce->value; ++} ++ ++static void ++lflow_cache_delete__(struct lflow_cache *lc, struct lflow_cache_entry *lce) ++{ ++ if (!lce) { ++ return; ++ } ++ ++ hmap_remove(&lc->entries[lce->value.type], &lce->node); ++ switch (lce->value.type) { ++ case LCACHE_T_NONE: ++ OVS_NOT_REACHED(); ++ break; ++ case LCACHE_T_CONJ_ID: ++ COVERAGE_INC(lflow_cache_free_conj_id); ++ break; ++ case LCACHE_T_EXPR: ++ COVERAGE_INC(lflow_cache_free_expr); ++ expr_destroy(lce->value.expr); ++ break; ++ case LCACHE_T_MATCHES: ++ COVERAGE_INC(lflow_cache_free_matches); ++ expr_matches_destroy(lce->value.expr_matches); ++ free(lce->value.expr_matches); ++ break; ++ } ++ ++ ovs_assert(lc->mem_usage >= lce->size); ++ lc->mem_usage -= lce->size; ++ free(lce); ++} +diff --git a/controller/lflow-cache.h b/controller/lflow-cache.h +new file mode 100644 +index 000000000..3c1fb4142 +--- /dev/null ++++ b/controller/lflow-cache.h +@@ -0,0 +1,81 @@ ++/* ++ * Copyright (c) 2015, 2016 Nicira, Inc. ++ * Copyright (c) 2021, Red Hat, Inc. ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at: ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++#ifndef LFLOW_CACHE_H ++#define LFLOW_CACHE_H 1 ++ ++#include "openvswitch/dynamic-string.h" ++#include "openvswitch/hmap.h" ++#include "openvswitch/uuid.h" ++#include "simap.h" ++ ++struct lflow_cache; ++ ++/* Various lflow cache types which ++ * - store the conjunction id offset if the lflow matches ++ * results in conjunctive OpenvSwitch flows. ++ * ++ * - Caches ++ * (1) Conjunction ID offset if the logical flow has port group/address ++ * set references. ++ * (2) expr tree if the logical flow has is_chassis_resident() match. ++ * (3) expr matches if (1) and (2) are false. ++ */ ++enum lflow_cache_type { ++ LCACHE_T_CONJ_ID, /* Only conjunction id offset is cached. */ ++ LCACHE_T_EXPR, /* Expr tree of the logical flow is cached. */ ++ LCACHE_T_MATCHES, /* Expression matches are cached. */ ++ LCACHE_T_MAX, ++ LCACHE_T_NONE = LCACHE_T_MAX, /* Not found in cache. */ ++}; ++ ++struct lflow_cache_value { ++ enum lflow_cache_type type; ++ uint32_t conj_id_ofs; ++ ++ union { ++ struct hmap *expr_matches; ++ struct expr *expr; ++ }; ++}; ++ ++struct lflow_cache *lflow_cache_create(void); ++void lflow_cache_flush(struct lflow_cache *); ++void lflow_cache_destroy(struct lflow_cache *); ++void lflow_cache_enable(struct lflow_cache *, bool enabled, uint32_t capacity, ++ uint64_t max_mem_usage_kb); ++bool lflow_cache_is_enabled(const struct lflow_cache *); ++void lflow_cache_get_stats(const struct lflow_cache *, struct ds *output); ++ ++void lflow_cache_add_conj_id(struct lflow_cache *, ++ const struct uuid *lflow_uuid, ++ uint32_t conj_id_ofs); ++void lflow_cache_add_expr(struct lflow_cache *, const struct uuid *lflow_uuid, ++ uint32_t conj_id_ofs, struct expr *expr, ++ size_t expr_sz); ++void lflow_cache_add_matches(struct lflow_cache *, ++ const struct uuid *lflow_uuid, ++ struct hmap *matches, size_t matches_sz); ++ ++struct lflow_cache_value *lflow_cache_get(struct lflow_cache *, ++ const struct uuid *lflow_uuid); ++void lflow_cache_delete(struct lflow_cache *, const struct uuid *lflow_uuid); ++ ++void lflow_cache_get_memory_usage(const struct lflow_cache *, ++ struct simap *usage); ++ ++#endif /* controller/lflow-cache.h */ +diff --git a/controller/lflow.c b/controller/lflow.c +index c02585b1e..a3d84aff4 100644 +--- a/controller/lflow.c ++++ b/controller/lflow.c +@@ -17,6 +17,7 @@ + #include "lflow.h" + #include "coverage.h" + #include "ha-chassis.h" ++#include "lflow-cache.h" + #include "lport.h" + #include "ofctrl.h" + #include "openvswitch/dynamic-string.h" +@@ -88,6 +89,11 @@ static void lflow_resource_destroy_lflow(struct lflow_resource_ref *, + static bool lookup_port_cb(const void *aux_, const char *port_name, unsigned int *portp) { + if (!strcmp(port_name, "none")) { @@ -932,7 +3368,111 @@ index c02585b1e..76a4deaa0 100644 const struct lookup_port_aux *aux = aux_; const struct sbrec_port_binding *pb -@@ -480,22 +485,19 @@ lflow_handle_changed_flows(struct lflow_ctx_in *l_ctx_in, +@@ -306,103 +312,6 @@ lflow_resource_destroy_lflow(struct lflow_resource_ref *lfrr, + free(lfrn); + } + +-enum lflow_cache_type { +- LCACHE_T_NO_CACHE, +- LCACHE_T_MATCHES, +- LCACHE_T_EXPR, +-}; +- +-/* Represents an lflow cache which +- * - stores the conjunction id offset if the lflow matches +- * results in conjunctive OpenvSwitch flows. +- * +- * - Caches +- * (1) Nothing if the logical flow has port group/address set references. +- * (2) expr tree if the logical flow has is_chassis_resident() match. +- * (3) expr matches if (1) and (2) are false. +- */ +-struct lflow_cache { +- struct hmap_node node; +- struct uuid lflow_uuid; /* key */ +- uint32_t conj_id_ofs; +- +- enum lflow_cache_type type; +- union { +- struct { +- struct hmap *expr_matches; +- size_t n_conjs; +- }; +- struct expr *expr; +- }; +-}; +- +-static struct lflow_cache * +-lflow_cache_add(struct hmap *lflow_cache_map, +- const struct sbrec_logical_flow *lflow) +-{ +- struct lflow_cache *lc = xmalloc(sizeof *lc); +- lc->lflow_uuid = lflow->header_.uuid; +- lc->conj_id_ofs = 0; +- lc->type = LCACHE_T_NO_CACHE; +- hmap_insert(lflow_cache_map, &lc->node, uuid_hash(&lc->lflow_uuid)); +- return lc; +-} +- +-static struct lflow_cache * +-lflow_cache_get(struct hmap *lflow_cache_map, +- const struct sbrec_logical_flow *lflow) +-{ +- struct lflow_cache *lc; +- size_t hash = uuid_hash(&lflow->header_.uuid); +- HMAP_FOR_EACH_WITH_HASH (lc, node, hash, lflow_cache_map) { +- if (uuid_equals(&lc->lflow_uuid, &lflow->header_.uuid)) { +- return lc; +- } +- } +- +- return NULL; +-} +- +-static void +-lflow_cache_delete(struct hmap *lflow_cache_map, +- const struct sbrec_logical_flow *lflow) +-{ +- struct lflow_cache *lc = lflow_cache_get(lflow_cache_map, lflow); +- if (lc) { +- hmap_remove(lflow_cache_map, &lc->node); +- if (lc->type == LCACHE_T_MATCHES) { +- expr_matches_destroy(lc->expr_matches); +- free(lc->expr_matches); +- } else if (lc->type == LCACHE_T_EXPR) { +- expr_destroy(lc->expr); +- } +- free(lc); +- } +-} +- +-void +-lflow_cache_init(struct hmap *lflow_cache_map) +-{ +- hmap_init(lflow_cache_map); +-} +- +-void +-lflow_cache_destroy(struct hmap *lflow_cache_map) +-{ +- struct lflow_cache *lc; +- HMAP_FOR_EACH_POP (lc, node, lflow_cache_map) { +- if (lc->type == LCACHE_T_MATCHES) { +- expr_matches_destroy(lc->expr_matches); +- free(lc->expr_matches); +- } else if (lc->type == LCACHE_T_EXPR) { +- expr_destroy(lc->expr); +- } +- free(lc); +- } +- +- hmap_destroy(lflow_cache_map); +-} +- + /* Adds the logical flows from the Logical_Flow table to flow tables. */ + static void + add_logical_flows(struct lflow_ctx_in *l_ctx_in, +@@ -480,24 +389,22 @@ lflow_handle_changed_flows(struct lflow_ctx_in *l_ctx_in, struct controller_event_options controller_event_opts; controller_event_opts_init(&controller_event_opts); @@ -960,10 +3500,15 @@ index c02585b1e..76a4deaa0 100644 - ofrn->sb_uuid = lflow->header_.uuid; - hmap_insert(&flood_remove_nodes, &ofrn->hmap_node, - uuid_hash(&ofrn->sb_uuid)); - if (l_ctx_out->lflow_cache_map) { - lflow_cache_delete(l_ctx_out->lflow_cache_map, lflow); +- if (l_ctx_out->lflow_cache_map) { +- lflow_cache_delete(l_ctx_out->lflow_cache_map, lflow); ++ if (lflow_cache_is_enabled(l_ctx_out->lflow_cache)) { ++ lflow_cache_delete(l_ctx_out->lflow_cache, ++ &lflow->header_.uuid); } -@@ -525,21 +527,6 @@ lflow_handle_changed_flows(struct lflow_ctx_in *l_ctx_in, + } + } +@@ -525,21 +432,6 @@ lflow_handle_changed_flows(struct lflow_ctx_in *l_ctx_in, } hmap_destroy(&flood_remove_nodes); @@ -985,7 +3530,18 @@ index c02585b1e..76a4deaa0 100644 dhcp_opts_destroy(&dhcp_opts); dhcp_opts_destroy(&dhcpv6_opts); nd_ra_opts_destroy(&nd_ra_opts); -@@ -668,9 +655,8 @@ update_conj_id_ofs(uint32_t *conj_id_ofs, uint32_t n_conjs) +@@ -659,18 +551,17 @@ update_conj_id_ofs(uint32_t *conj_id_ofs, uint32_t n_conjs) + { + if (*conj_id_ofs + n_conjs < *conj_id_ofs) { + /* overflow */ +- return false; ++ return true; + } + *conj_id_ofs += n_conjs; +- return true; ++ return false; + } + static void add_matches_to_flow_table(const struct sbrec_logical_flow *lflow, const struct sbrec_datapath_binding *dp, @@ -997,7 +3553,7 @@ index c02585b1e..76a4deaa0 100644 bool ingress, struct lflow_ctx_in *l_ctx_in, struct lflow_ctx_out *l_ctx_out) { -@@ -702,15 +688,14 @@ add_matches_to_flow_table(const struct sbrec_logical_flow *lflow, +@@ -702,15 +593,14 @@ add_matches_to_flow_table(const struct sbrec_logical_flow *lflow, .lb_hairpin_ptable = OFTABLE_CHK_LB_HAIRPIN, .lb_hairpin_reply_ptable = OFTABLE_CHK_LB_HAIRPIN_REPLY, .ct_snat_vip_ptable = OFTABLE_CT_SNAT_FOR_VIP, @@ -1015,7 +3571,7 @@ index c02585b1e..76a4deaa0 100644 if (datapath_is_switch(dp)) { unsigned int reg_index = (ingress ? MFF_LOG_INPORT : MFF_LOG_OUTPORT) - MFF_REG0; -@@ -744,7 +729,7 @@ add_matches_to_flow_table(const struct sbrec_logical_flow *lflow, +@@ -744,7 +634,7 @@ add_matches_to_flow_table(const struct sbrec_logical_flow *lflow, struct ofpact_conjunction *dst; dst = ofpact_put_CONJUNCTION(&conj); @@ -1024,50 +3580,301 @@ index c02585b1e..76a4deaa0 100644 dst->clause = src->clause; dst->n_clauses = src->n_clauses; } -@@ -915,9 +900,9 @@ consider_logical_flow__(const struct sbrec_logical_flow *lflow, - return true; +@@ -762,11 +652,12 @@ add_matches_to_flow_table(const struct sbrec_logical_flow *lflow, + /* Converts the match and returns the simplified expr tree. + * + * The caller should evaluate the conditions and normalize the expr tree. ++ * If parsing is successful, '*prereqs' is also consumed. + */ + static struct expr * + convert_match_to_expr(const struct sbrec_logical_flow *lflow, + const struct sbrec_datapath_binding *dp, +- struct expr *prereqs, ++ struct expr **prereqs, + const struct shash *addr_sets, + const struct shash *port_groups, + struct lflow_resource_ref *lfrr, +@@ -799,8 +690,9 @@ convert_match_to_expr(const struct sbrec_logical_flow *lflow, + sset_destroy(&port_groups_ref); + + if (!error) { +- if (prereqs) { +- e = expr_combine(EXPR_T_AND, e, prereqs); ++ if (*prereqs) { ++ e = expr_combine(EXPR_T_AND, e, *prereqs); ++ *prereqs = NULL; } + e = expr_annotate(e, &symtab, &error); + } +@@ -858,7 +750,7 @@ consider_logical_flow__(const struct sbrec_logical_flow *lflow, + .n_tables = LOG_PIPELINE_LEN, + .cur_ltable = lflow->table_id, + }; +- struct expr *prereqs; ++ struct expr *prereqs = NULL; + char *error; + + error = ovnacts_parse_string(lflow->actions, &pp, &ovnacts, &prereqs); +@@ -886,150 +778,129 @@ consider_logical_flow__(const struct sbrec_logical_flow *lflow, + .lfrr = l_ctx_out->lfrr, + }; +- struct expr *expr = NULL; +- if (!l_ctx_out->lflow_cache_map) { +- /* Caching is disabled. */ +- expr = convert_match_to_expr(lflow, dp, prereqs, l_ctx_in->addr_sets, +- l_ctx_in->port_groups, l_ctx_out->lfrr, +- NULL); +- if (!expr) { +- expr_destroy(prereqs); +- ovnacts_free(ovnacts.data, ovnacts.size); +- ofpbuf_uninit(&ovnacts); +- return true; +- } +- +- expr = expr_evaluate_condition(expr, is_chassis_resident_cb, &cond_aux, +- NULL); +- expr = expr_normalize(expr); +- struct hmap matches = HMAP_INITIALIZER(&matches); +- uint32_t n_conjs = expr_to_matches(expr, lookup_port_cb, &aux, +- &matches); +- expr_destroy(expr); +- if (hmap_is_empty(&matches)) { +- VLOG_DBG("lflow "UUID_FMT" matches are empty, skip", +- UUID_ARGS(&lflow->header_.uuid)); +- ovnacts_free(ovnacts.data, ovnacts.size); +- ofpbuf_uninit(&ovnacts); +- expr_matches_destroy(&matches); +- return true; +- } +- - add_matches_to_flow_table(lflow, dp, &matches, *l_ctx_out->conj_id_ofs, - ptable, output_ptable, &ovnacts, ingress, - l_ctx_in, l_ctx_out); -+ expr_matches_prepare(&matches, *l_ctx_out->conj_id_ofs); -+ add_matches_to_flow_table(lflow, dp, &matches, ptable, output_ptable, -+ &ovnacts, ingress, l_ctx_in, l_ctx_out); - - ovnacts_free(ovnacts.data, ovnacts.size); - ofpbuf_uninit(&ovnacts); -@@ -930,10 +915,11 @@ consider_logical_flow__(const struct sbrec_logical_flow *lflow, - lflow_cache_get(l_ctx_out->lflow_cache_map, lflow); - - if (lc && lc->type == LCACHE_T_MATCHES) { +- +- ovnacts_free(ovnacts.data, ovnacts.size); +- ofpbuf_uninit(&ovnacts); +- expr_matches_destroy(&matches); +- return update_conj_id_ofs(l_ctx_out->conj_id_ofs, n_conjs); +- } ++ struct lflow_cache_value *lcv = ++ lflow_cache_get(l_ctx_out->lflow_cache, &lflow->header_.uuid); ++ uint32_t conj_id_ofs = ++ lcv ? lcv->conj_id_ofs : *l_ctx_out->conj_id_ofs; ++ enum lflow_cache_type lcv_type = ++ lcv ? lcv->type : LCACHE_T_NONE; + +- /* Caching is enabled. */ +- struct lflow_cache *lc = +- lflow_cache_get(l_ctx_out->lflow_cache_map, lflow); +- +- if (lc && lc->type == LCACHE_T_MATCHES) { - /* 'matches' is cached. No need to do expr parsing. -+ /* 'matches' is cached. No need to do expr parsing and no need -+ * to call expr_matches_prepare() to update the conj ids. - * Add matches to flow table and return. */ +- * Add matches to flow table and return. */ - add_matches_to_flow_table(lflow, dp, lc->expr_matches, lc->conj_id_ofs, - ptable, output_ptable, &ovnacts, ingress, -+ add_matches_to_flow_table(lflow, dp, lc->expr_matches, ptable, -+ output_ptable, &ovnacts, ingress, - l_ctx_in, l_ctx_out); - ovnacts_free(ovnacts.data, ovnacts.size); - ofpbuf_uninit(&ovnacts); -@@ -1009,10 +995,11 @@ consider_logical_flow__(const struct sbrec_logical_flow *lflow, +- l_ctx_in, l_ctx_out); +- ovnacts_free(ovnacts.data, ovnacts.size); +- ofpbuf_uninit(&ovnacts); +- expr_destroy(prereqs); +- return true; +- } ++ struct expr *cached_expr = NULL, *expr = NULL; ++ struct hmap *matches = NULL; ++ size_t matches_size = 0; + +- if (!lc) { +- /* Create the lflow_cache for the lflow. */ +- lc = lflow_cache_add(l_ctx_out->lflow_cache_map, lflow); +- } ++ bool is_cr_cond_present = false; ++ bool pg_addr_set_ref = false; ++ uint32_t n_conjs = 0; + +- if (lc && lc->type == LCACHE_T_EXPR) { +- expr = lc->expr; +- } ++ bool conj_id_overflow = false; + +- bool pg_addr_set_ref = false; +- if (!expr) { +- expr = convert_match_to_expr(lflow, dp, prereqs, l_ctx_in->addr_sets, ++ /* Get match expr, either from cache or from lflow match. */ ++ switch (lcv_type) { ++ case LCACHE_T_NONE: ++ case LCACHE_T_CONJ_ID: ++ expr = convert_match_to_expr(lflow, dp, &prereqs, l_ctx_in->addr_sets, + l_ctx_in->port_groups, l_ctx_out->lfrr, + &pg_addr_set_ref); + if (!expr) { +- expr_destroy(prereqs); +- ovnacts_free(ovnacts.data, ovnacts.size); +- ofpbuf_uninit(&ovnacts); +- return true; ++ goto done; } +- } else { +- expr_destroy(prereqs); +- } +- +- ovs_assert(lc && expr); +- +- /* Cache the 'expr' only if the lflow doesn't reference a port group and +- * address set. */ +- if (!pg_addr_set_ref) { +- /* Note: If the expr doesn't have 'is_chassis_resident, then the +- * type will be set to LCACHE_T_MATCHES and 'matches' will be +- * cached instead. See below. */ +- lc->type = LCACHE_T_EXPR; +- lc->expr = expr; ++ break; ++ case LCACHE_T_EXPR: ++ expr = expr_clone(lcv->expr); ++ break; ++ case LCACHE_T_MATCHES: ++ break; + } + +- if (lc->type == LCACHE_T_EXPR) { +- expr = expr_clone(lc->expr); +- } +- +- bool is_cr_cond_present = false; +- expr = expr_evaluate_condition(expr, is_chassis_resident_cb, &cond_aux, +- &is_cr_cond_present); +- expr = expr_normalize(expr); +- struct hmap *matches = xmalloc(sizeof *matches); +- uint32_t n_conjs = expr_to_matches(expr, lookup_port_cb, &aux, +- matches); +- expr_destroy(expr); +- if (hmap_is_empty(matches)) { +- VLOG_DBG("lflow "UUID_FMT" matches are empty, skip", +- UUID_ARGS(&lflow->header_.uuid)); +- ovnacts_free(ovnacts.data, ovnacts.size); +- ofpbuf_uninit(&ovnacts); +- expr_matches_destroy(matches); +- free(matches); +- return true; ++ /* If caching is enabled and this is a not cached expr that doesn't refer ++ * to address sets or port groups, save it to potentially cache it later. ++ */ ++ if (lcv_type == LCACHE_T_NONE ++ && lflow_cache_is_enabled(l_ctx_out->lflow_cache) ++ && !pg_addr_set_ref) { ++ cached_expr = expr_clone(expr); } -+ expr_matches_prepare(matches, lc->conj_id_ofs); +- if (n_conjs && !lc->conj_id_ofs) { +- lc->conj_id_ofs = *l_ctx_out->conj_id_ofs; +- if (!update_conj_id_ofs(l_ctx_out->conj_id_ofs, n_conjs)) { +- lc->conj_id_ofs = 0; +- expr_matches_destroy(matches); +- free(matches); +- return false; ++ /* Normalize expression if needed. */ ++ switch (lcv_type) { ++ case LCACHE_T_NONE: ++ case LCACHE_T_CONJ_ID: ++ case LCACHE_T_EXPR: ++ expr = expr_evaluate_condition(expr, is_chassis_resident_cb, &cond_aux, ++ &is_cr_cond_present); ++ expr = expr_normalize(expr); ++ break; ++ case LCACHE_T_MATCHES: ++ break; ++ } + - /* Encode OVN logical actions into OpenFlow. */ ++ /* Get matches, either from cache or from expr computed above. */ ++ switch (lcv_type) { ++ case LCACHE_T_NONE: ++ case LCACHE_T_CONJ_ID: ++ case LCACHE_T_EXPR: ++ matches = xmalloc(sizeof *matches); ++ n_conjs = expr_to_matches(expr, lookup_port_cb, &aux, matches); ++ matches_size = expr_matches_prepare(matches, conj_id_ofs); ++ if (hmap_is_empty(matches)) { ++ VLOG_DBG("lflow "UUID_FMT" matches are empty, skip", ++ UUID_ARGS(&lflow->header_.uuid)); ++ goto done; + } +- } +- +- /* Encode OVN logical actions into OpenFlow. */ - add_matches_to_flow_table(lflow, dp, matches, lc->conj_id_ofs, - ptable, output_ptable, &ovnacts, ingress, - l_ctx_in, l_ctx_out); ++ break; ++ case LCACHE_T_MATCHES: ++ matches = lcv->expr_matches; ++ break; ++ } ++ + add_matches_to_flow_table(lflow, dp, matches, ptable, output_ptable, + &ovnacts, ingress, l_ctx_in, l_ctx_out); ++ ++ /* Update cache if needed. */ ++ switch (lcv_type) { ++ case LCACHE_T_NONE: ++ /* Entry not already in cache, update conjunction id offset and ++ * add the entry to the cache. ++ */ ++ conj_id_overflow = update_conj_id_ofs(l_ctx_out->conj_id_ofs, n_conjs); ++ ++ /* Cache new entry if caching is enabled. */ ++ if (lflow_cache_is_enabled(l_ctx_out->lflow_cache)) { ++ if (cached_expr && !is_cr_cond_present) { ++ lflow_cache_add_matches(l_ctx_out->lflow_cache, ++ &lflow->header_.uuid, matches, ++ matches_size); ++ matches = NULL; ++ } else if (cached_expr) { ++ lflow_cache_add_expr(l_ctx_out->lflow_cache, ++ &lflow->header_.uuid, conj_id_ofs, ++ cached_expr, expr_size(cached_expr)); ++ cached_expr = NULL; ++ } else if (n_conjs) { ++ lflow_cache_add_conj_id(l_ctx_out->lflow_cache, ++ &lflow->header_.uuid, conj_id_ofs); ++ } ++ } ++ break; ++ case LCACHE_T_CONJ_ID: ++ case LCACHE_T_EXPR: ++ break; ++ case LCACHE_T_MATCHES: ++ /* Cached matches were used, don't destroy them. */ ++ matches = NULL; ++ break; ++ } ++ ++done: ++ expr_destroy(prereqs); ovnacts_free(ovnacts.data, ovnacts.size); ofpbuf_uninit(&ovnacts); +- +- if (!is_cr_cond_present && lc->type == LCACHE_T_EXPR) { +- /* If 'is_chassis_resident' match is not present, then cache +- * 'matches'. */ +- expr_destroy(lc->expr); +- lc->type = LCACHE_T_MATCHES; +- lc->expr_matches = matches; +- } +- +- if (lc->type != LCACHE_T_MATCHES) { +- expr_matches_destroy(matches); +- free(matches); +- } +- +- return true; ++ expr_destroy(expr); ++ expr_destroy(cached_expr); ++ expr_matches_destroy(matches); ++ free(matches); ++ return !conj_id_overflow; + } -@@ -1080,6 +1067,18 @@ put_load(const uint8_t *data, size_t len, + static bool +@@ -1080,6 +951,18 @@ put_load(const uint8_t *data, size_t len, bitwise_one(ofpact_set_field_mask(sf), sf->field->n_bytes, ofs, n_bits); } @@ -1086,7 +3893,7 @@ index c02585b1e..76a4deaa0 100644 static void consider_neighbor_flow(struct ovsdb_idl_index *sbrec_port_binding_by_name, const struct hmap *local_datapaths, -@@ -1173,6 +1172,184 @@ add_neighbor_flows(struct ovsdb_idl_index *sbrec_port_binding_by_name, +@@ -1173,6 +1056,184 @@ add_neighbor_flows(struct ovsdb_idl_index *sbrec_port_binding_by_name, } } @@ -1271,7 +4078,7 @@ index c02585b1e..76a4deaa0 100644 static void add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb, struct ovn_lb_vip *lb_vip, -@@ -1182,43 +1359,81 @@ add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb, +@@ -1182,43 +1243,81 @@ add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb, { uint64_t stub[1024 / 8]; struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(stub); @@ -1334,11 +4141,7 @@ index c02585b1e..76a4deaa0 100644 - match_set_ipv6_dst(&hairpin_match, &lb_backend->ip); + match_set_ipv6_src(&hairpin_match, bip6); + match_set_ipv6_dst(&hairpin_match, bip6); - -- match_set_dl_type(&hairpin_reply_match, -- htons(ETH_TYPE_IPV6)); -- match_set_ipv6_src(&hairpin_reply_match, &lb_backend->ip); -- match_set_ipv6_dst(&hairpin_reply_match, &lb_vip->vip); ++ + if (!lb->hairpin_orig_tuple) { + match_set_ct_ipv6_dst(&hairpin_match, &lb_vip->vip); + } else { @@ -1349,7 +4152,11 @@ index c02585b1e..76a4deaa0 100644 + MFF_LOG_LB_ORIG_DIP_IPV6 - MFF_LOG_XXREG0, + ntoh128(vip6_value)); + } -+ + +- match_set_dl_type(&hairpin_reply_match, +- htons(ETH_TYPE_IPV6)); +- match_set_ipv6_src(&hairpin_reply_match, &lb_backend->ip); +- match_set_ipv6_dst(&hairpin_reply_match, &lb_vip->vip); + add_lb_vip_hairpin_reply_action(snat_vip6, 0, lb_proto, + lb_backend->port, + lb->slb->header_.uuid.parts[0], @@ -1373,7 +4180,7 @@ index c02585b1e..76a4deaa0 100644 } /* In the original direction, only match on traffic that was already -@@ -1239,23 +1454,19 @@ add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb, +@@ -1239,23 +1338,19 @@ add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb, ofctrl_add_flow(flow_table, OFTABLE_CHK_LB_HAIRPIN, 100, lb->slb->header_.uuid.parts[0], &hairpin_match, &ofpacts, &lb->slb->header_.uuid); @@ -1404,7 +4211,7 @@ index c02585b1e..76a4deaa0 100644 struct ovn_desired_flow_table *flow_table) { uint64_t stub[1024 / 8]; -@@ -1279,25 +1490,65 @@ add_lb_ct_snat_vip_flows(struct ovn_controller_lb *lb, +@@ -1279,25 +1374,65 @@ add_lb_ct_snat_vip_flows(struct ovn_controller_lb *lb, if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { nat->range_af = AF_INET; @@ -1476,7 +4283,7 @@ index c02585b1e..76a4deaa0 100644 for (size_t i = 0; i < lb->slb->n_datapaths; i++) { match_set_metadata(&match, -@@ -1351,7 +1602,7 @@ consider_lb_hairpin_flows(const struct sbrec_load_balancer *sbrec_lb, +@@ -1351,7 +1486,7 @@ consider_lb_hairpin_flows(const struct sbrec_load_balancer *sbrec_lb, flow_table); } @@ -1485,7 +4292,7 @@ index c02585b1e..76a4deaa0 100644 } ovn_controller_lb_destroy(lb); -@@ -1404,6 +1655,61 @@ lflow_handle_changed_neighbors( +@@ -1404,6 +1539,61 @@ lflow_handle_changed_neighbors( } } @@ -1547,16 +4354,52 @@ index c02585b1e..76a4deaa0 100644 /* Translates logical flows in the Logical_Flow table in the OVN_SB database * into OpenFlow flows. See ovn-architecture(7) for more information. */ -@@ -1431,6 +1737,8 @@ lflow_run(struct lflow_ctx_in *l_ctx_in, struct lflow_ctx_out *l_ctx_out) +@@ -1412,25 +1602,31 @@ lflow_run(struct lflow_ctx_in *l_ctx_in, struct lflow_ctx_out *l_ctx_out) + { + COVERAGE_INC(lflow_run); + +- /* when lflow_run is called, it's possible that some of the logical flows +- * are deleted. We need to delete the lflow cache for these +- * lflows (if present), otherwise, they will not be deleted at all. */ +- if (l_ctx_out->lflow_cache_map) { +- const struct sbrec_logical_flow *lflow; +- SBREC_LOGICAL_FLOW_TABLE_FOR_EACH_TRACKED (lflow, +- l_ctx_in->logical_flow_table) { +- if (sbrec_logical_flow_is_deleted(lflow)) { +- lflow_cache_delete(l_ctx_out->lflow_cache_map, lflow); +- } +- } +- } +- + add_logical_flows(l_ctx_in, l_ctx_out); + add_neighbor_flows(l_ctx_in->sbrec_port_binding_by_name, + l_ctx_in->mac_binding_table, l_ctx_in->local_datapaths, l_ctx_out->flow_table); add_lb_hairpin_flows(l_ctx_in->lb_table, l_ctx_in->local_datapaths, l_ctx_out->flow_table); + add_fdb_flows(l_ctx_in->fdb_table, l_ctx_in->local_datapaths, + l_ctx_out->flow_table); ++} ++ ++/* Should be called at every ovn-controller iteration before IDL tracked ++ * changes are cleared to avoid maintaining cache entries for flows that ++ * don't exist anymore. ++ */ ++void ++lflow_handle_cached_flows(struct lflow_cache *lc, ++ const struct sbrec_logical_flow_table *flow_table) ++{ ++ const struct sbrec_logical_flow *lflow; ++ ++ SBREC_LOGICAL_FLOW_TABLE_FOR_EACH_TRACKED (lflow, flow_table) { ++ if (sbrec_logical_flow_is_deleted(lflow)) { ++ lflow_cache_delete(lc, &lflow->header_.uuid); ++ } ++ } } void -@@ -1582,3 +1890,37 @@ lflow_handle_changed_lbs(struct lflow_ctx_in *l_ctx_in, +@@ -1582,3 +1778,37 @@ lflow_handle_changed_lbs(struct lflow_ctx_in *l_ctx_in, return true; } @@ -1595,10 +4438,18 @@ index c02585b1e..76a4deaa0 100644 + return true; +} diff --git a/controller/lflow.h b/controller/lflow.h -index ba79cc374..2eb2cb112 100644 +index ba79cc374..3c929d8a6 100644 --- a/controller/lflow.h +++ b/controller/lflow.h -@@ -60,9 +60,9 @@ struct uuid; +@@ -34,6 +34,7 @@ + */ + + #include ++#include "lflow-cache.h" + #include "openvswitch/hmap.h" + #include "openvswitch/uuid.h" + #include "openvswitch/list.h" +@@ -60,9 +61,9 @@ struct uuid; * you make any changes. */ #define OFTABLE_PHY_TO_LOG 0 #define OFTABLE_LOG_INGRESS_PIPELINE 8 /* First of LOG_PIPELINE_LEN tables. */ @@ -1611,7 +4462,7 @@ index ba79cc374..2eb2cb112 100644 #define OFTABLE_LOG_EGRESS_PIPELINE 40 /* First of LOG_PIPELINE_LEN tables. */ #define OFTABLE_SAVE_INPORT 64 #define OFTABLE_LOG_TO_PHY 65 -@@ -71,9 +71,8 @@ struct uuid; +@@ -71,9 +72,8 @@ struct uuid; #define OFTABLE_CHK_LB_HAIRPIN 68 #define OFTABLE_CHK_LB_HAIRPIN_REPLY 69 #define OFTABLE_CT_SNAT_FOR_VIP 70 @@ -1623,7 +4474,7 @@ index ba79cc374..2eb2cb112 100644 enum ref_type { REF_TYPE_ADDRSET, -@@ -136,6 +135,7 @@ struct lflow_ctx_in { +@@ -136,6 +136,7 @@ struct lflow_ctx_in { const struct sbrec_logical_flow_table *logical_flow_table; const struct sbrec_logical_dp_group_table *logical_dp_group_table; const struct sbrec_multicast_group_table *mc_group_table; @@ -1631,14 +4482,37 @@ index ba79cc374..2eb2cb112 100644 const struct sbrec_chassis *chassis; const struct sbrec_load_balancer_table *lb_table; const struct hmap *local_datapaths; -@@ -167,6 +167,7 @@ void lflow_handle_changed_neighbors( +@@ -150,13 +151,15 @@ struct lflow_ctx_out { + struct ovn_extend_table *group_table; + struct ovn_extend_table *meter_table; + struct lflow_resource_ref *lfrr; +- struct hmap *lflow_cache_map; ++ struct lflow_cache *lflow_cache; + uint32_t *conj_id_ofs; + bool conj_id_overflow; + }; + + void lflow_init(void); +-void lflow_run(struct lflow_ctx_in *, struct lflow_ctx_out *); ++void lflow_run(struct lflow_ctx_in *, struct lflow_ctx_out *); ++void lflow_handle_cached_flows(struct lflow_cache *, ++ const struct sbrec_logical_flow_table *); + bool lflow_handle_changed_flows(struct lflow_ctx_in *, struct lflow_ctx_out *); + bool lflow_handle_changed_ref(enum ref_type, const char *ref_name, + struct lflow_ctx_in *, struct lflow_ctx_out *, +@@ -167,11 +170,9 @@ void lflow_handle_changed_neighbors( const struct hmap *local_datapaths, struct ovn_desired_flow_table *); bool lflow_handle_changed_lbs(struct lflow_ctx_in *, struct lflow_ctx_out *); +bool lflow_handle_changed_fdbs(struct lflow_ctx_in *, struct lflow_ctx_out *); void lflow_destroy(void); - void lflow_cache_init(struct hmap *); +-void lflow_cache_init(struct hmap *); +-void lflow_cache_destroy(struct hmap *); +- + bool lflow_add_flows_for_datapath(const struct sbrec_datapath_binding *, + struct lflow_ctx_in *, + struct lflow_ctx_out *); diff --git a/controller/mac-learn.c b/controller/mac-learn.c new file mode 100644 index 000000000..27634dca8 @@ -2417,19 +5291,137 @@ index 64b0ea5dd..88769566a 100644 void ofctrl_ct_flush_zone(uint16_t zone_id); +diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml +index 29833c7c7..c407f8984 100644 +--- a/controller/ovn-controller.8.xml ++++ b/controller/ovn-controller.8.xml +@@ -249,6 +249,29 @@ + processing the southbound and local Open vSwitch database changes. + The default value is considered false if this option is not defined. + ++ ++
external_ids:ovn-enable-lflow-cache
++
++ The boolean flag indicates if ovn-controller should ++ enable/disable the logical flow in-memory cache it uses when ++ processing Southbound database logical flow changes. By default ++ caching is enabled. ++
++ ++
external_ids:ovn-limit-lflow-cache
++
++ When used, this configuration value determines the maximum number of ++ logical flow cache entries ovn-controller may create ++ when the logical flow cache is enabled. By default the size of the ++ cache is unlimited. ++
++
external_ids:ovn-memlimit-lflow-cache-kb
++
++ When used, this configuration value determines the maximum size of ++ the logical flow cache (in KB) ovn-controller may create ++ when the logical flow cache is enabled. By default the size of the ++ cache is unlimited. ++
+ + +

+@@ -544,6 +567,39 @@ + end-to-end latency in a large scale environment. See + ovn-nbctl(8) for more details. + ++ ++

inc-engine/show-stats
++
++ Display ovn-controller engine counters. For each engine ++ node the following counters have been added: ++
    ++
  • ++ recompute ++
  • ++
  • ++ compute ++
  • ++
  • ++ abort ++
  • ++
++
++ ++
inc-engine/clear-stats
++
++ Reset ovn-controller engine counters. ++
++ ++
lflow-cache/flush
++
++ Flushes the ovn-controller logical flow cache. ++
++ ++
lflow-cache/show-stats
++
++ Displays logical flow cache statistics: enabled/disabled, per cache ++ type entry counts. ++
+ +

+ diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c -index 366fc9c06..288e2e12d 100644 +index 366fc9c06..b4eee4848 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c -@@ -39,6 +39,7 @@ +@@ -33,12 +33,16 @@ + #include "openvswitch/dynamic-string.h" + #include "encaps.h" + #include "fatal-signal.h" ++#include "if-status.h" + #include "ip-mcast.h" + #include "openvswitch/hmap.h" + #include "lflow.h" ++#include "lflow-cache.h" #include "lib/vswitch-idl.h" #include "lport.h" ++#include "memory.h" #include "ofctrl.h" +#include "ofctrl-seqno.h" #include "openvswitch/vconn.h" #include "openvswitch/vlog.h" #include "ovn/actions.h" -@@ -98,6 +99,9 @@ struct pending_pkt { +@@ -54,6 +58,7 @@ + #include "openvswitch/poll-loop.h" + #include "lib/bitmap.h" + #include "lib/hash.h" ++#include "simap.h" + #include "smap.h" + #include "sset.h" + #include "stream-ssl.h" +@@ -77,7 +82,9 @@ static unixctl_cb_func cluster_state_reset_cmd; + static unixctl_cb_func debug_pause_execution; + static unixctl_cb_func debug_resume_execution; + static unixctl_cb_func debug_status_execution; +-static unixctl_cb_func flush_lflow_cache; ++static unixctl_cb_func debug_dump_local_bindings; ++static unixctl_cb_func lflow_cache_flush_cmd; ++static unixctl_cb_func lflow_cache_show_stats_cmd; + static unixctl_cb_func debug_delay_nb_cfg_report; + + #define DEFAULT_BRIDGE_NAME "br-int" +@@ -91,6 +98,15 @@ static unixctl_cb_func debug_delay_nb_cfg_report; + static char *parse_options(int argc, char *argv[]); + OVS_NO_RETURN static void usage(void); + ++/* By default don't set an upper bound for the lflow cache. */ ++#define DEFAULT_LFLOW_CACHE_MAX_ENTRIES UINT32_MAX ++#define DEFAULT_LFLOW_CACHE_MAX_MEM_KB (UINT64_MAX / 1024) ++ ++struct controller_engine_ctx { ++ struct lflow_cache *lflow_cache; ++ struct if_status_mgr *if_mgr; ++}; ++ + /* Pending packet to be injected into connected OVS. */ + struct pending_pkt { + /* Setting 'conn' indicates that a request is pending. */ +@@ -98,6 +114,9 @@ struct pending_pkt { char *flow_s; }; @@ -2439,7 +5431,82 @@ index 366fc9c06..288e2e12d 100644 struct local_datapath * get_local_datapath(const struct hmap *local_datapaths, uint32_t tunnel_key) { -@@ -583,7 +587,18 @@ add_pending_ct_zone_entry(struct shash *pending_ct_zones, +@@ -242,23 +261,15 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, + uuid); + } + +- /* Updating conditions to receive logical flows that references +- * datapath groups containing local datapaths. */ +- const struct sbrec_logical_dp_group *group; +- SBREC_LOGICAL_DP_GROUP_FOR_EACH (group, ovnsb_idl) { +- struct uuid *uuid = CONST_CAST(struct uuid *, +- &group->header_.uuid); +- size_t i; +- +- for (i = 0; i < group->n_datapaths; i++) { +- if (get_local_datapath(local_datapaths, +- group->datapaths[i]->tunnel_key)) { +- sbrec_logical_flow_add_clause_logical_dp_group( +- &lf, OVSDB_F_EQ, uuid); +- break; +- } +- } +- } ++ /* Datapath groups are immutable, which means a new group record is ++ * created when a datapath is added to a group. The logical flows ++ * referencing a datapath group are also updated in such cases but the ++ * new group UUID is not known by ovn-controller until the SB update ++ * is received. To avoid unnecessarily removing and adding lflows ++ * that reference datapath groups, set the monitor condition to always ++ * request all of them. ++ */ ++ sbrec_logical_flow_add_clause_logical_dp_group(&lf, OVSDB_F_NE, NULL); + } + + out:; +@@ -404,6 +415,10 @@ process_br_int(struct ovsdb_idl_txn *ovs_idl_txn, + if (datapath_type && strcmp(br_int->datapath_type, datapath_type)) { + ovsrec_bridge_set_datapath_type(br_int, datapath_type); + } ++ if (!br_int->fail_mode || strcmp(br_int->fail_mode, "secure")) { ++ ovsrec_bridge_set_fail_mode(br_int, "secure"); ++ VLOG_WARN("Integration bridge fail-mode changed to 'secure'."); ++ } + } + return br_int; + } +@@ -524,7 +539,8 @@ get_ofctrl_probe_interval(struct ovsdb_idl *ovs_idl) + static void + update_sb_db(struct ovsdb_idl *ovs_idl, struct ovsdb_idl *ovnsb_idl, + bool *monitor_all_p, bool *reset_ovnsb_idl_min_index, +- bool *enable_lflow_cache, unsigned int *sb_cond_seqno) ++ struct controller_engine_ctx *ctx, ++ unsigned int *sb_cond_seqno) + { + const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(ovs_idl); + if (!cfg) { +@@ -565,9 +581,17 @@ update_sb_db(struct ovsdb_idl *ovs_idl, struct ovsdb_idl *ovnsb_idl, + *reset_ovnsb_idl_min_index = false; + } + +- if (enable_lflow_cache != NULL) { +- *enable_lflow_cache = +- smap_get_bool(&cfg->external_ids, "ovn-enable-lflow-cache", true); ++ if (ctx) { ++ lflow_cache_enable(ctx->lflow_cache, ++ smap_get_bool(&cfg->external_ids, ++ "ovn-enable-lflow-cache", ++ true), ++ smap_get_uint(&cfg->external_ids, ++ "ovn-limit-lflow-cache", ++ DEFAULT_LFLOW_CACHE_MAX_ENTRIES), ++ smap_get_ullong(&cfg->external_ids, ++ "ovn-memlimit-lflow-cache-kb", ++ DEFAULT_LFLOW_CACHE_MAX_MEM_KB)); + } + } + +@@ -583,7 +607,18 @@ add_pending_ct_zone_entry(struct shash *pending_ct_zones, pending->state = state; /* Skip flushing zone. */ pending->zone = zone; pending->add = add; @@ -2459,7 +5526,7 @@ index 366fc9c06..288e2e12d 100644 } static void -@@ -798,11 +813,11 @@ restore_ct_zones(const struct ovsrec_bridge_table *bridge_table, +@@ -798,11 +833,11 @@ restore_ct_zones(const struct ovsrec_bridge_table *bridge_table, } } @@ -2473,7 +5540,7 @@ index 366fc9c06..288e2e12d 100644 /* Delay getting nb_cfg if there are monitor condition changes * in flight. It might be that those changes would instruct the -@@ -825,11 +840,14 @@ static void +@@ -825,11 +860,14 @@ static void store_nb_cfg(struct ovsdb_idl_txn *sb_txn, struct ovsdb_idl_txn *ovs_txn, const struct sbrec_chassis_private *chassis, const struct ovsrec_bridge *br_int, @@ -2491,7 +5558,7 @@ index 366fc9c06..288e2e12d 100644 } if (sb_txn && chassis && cur_cfg != chassis->nb_cfg) { -@@ -850,6 +868,9 @@ store_nb_cfg(struct ovsdb_idl_txn *sb_txn, struct ovsdb_idl_txn *ovs_txn, +@@ -850,6 +888,9 @@ store_nb_cfg(struct ovsdb_idl_txn *sb_txn, struct ovsdb_idl_txn *ovs_txn, cur_cfg_str); free(cur_cfg_str); } @@ -2501,7 +5568,7 @@ index 366fc9c06..288e2e12d 100644 } static const char * -@@ -911,7 +932,8 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl) +@@ -911,7 +952,8 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl) SB_NODE(dhcp_options, "dhcp_options") \ SB_NODE(dhcpv6_options, "dhcpv6_options") \ SB_NODE(dns, "dns") \ @@ -2511,7 +5578,22 @@ index 366fc9c06..288e2e12d 100644 enum sb_engine_node { #define SB_NODE(NAME, NAME_STR) SB_##NAME, -@@ -967,6 +989,12 @@ en_ofctrl_is_connected_run(struct engine_node *node, void *data) +@@ -940,10 +982,6 @@ enum ovs_engine_node { + OVS_NODES + #undef OVS_NODE + +-struct controller_engine_ctx { +- bool enable_lflow_cache; +-}; +- + struct ed_type_ofctrl_is_connected { + bool connected; + }; +@@ -964,9 +1002,16 @@ en_ofctrl_is_connected_cleanup(void *data OVS_UNUSED) + static void + en_ofctrl_is_connected_run(struct engine_node *node, void *data) + { ++ struct controller_engine_ctx *ctrl_ctx = engine_get_context()->client_ctx; struct ed_type_ofctrl_is_connected *of_data = data; if (of_data->connected != ofctrl_is_connected()) { of_data->connected = !of_data->connected; @@ -2519,12 +5601,128 @@ index 366fc9c06..288e2e12d 100644 + /* Flush ofctrl seqno requests when the ofctrl connection goes down. */ + if (!of_data->connected) { + ofctrl_seqno_flush(); -+ binding_seqno_flush(); ++ if_status_mgr_clear(ctrl_ctx->if_mgr); + } engine_set_node_state(node, EN_UPDATED); return; } -@@ -1836,6 +1864,10 @@ static void init_lflow_ctx(struct engine_node *node, +@@ -1137,8 +1182,7 @@ struct ed_type_runtime_data { + /* Contains "struct local_datapath" nodes. */ + struct hmap local_datapaths; + +- /* Contains "struct local_binding" nodes. */ +- struct shash local_bindings; ++ struct local_binding_data lbinding_data; + + /* Contains the name of each logical port resident on the local + * hypervisor. These logical ports include the VIFs (and their child +@@ -1177,9 +1221,9 @@ struct ed_type_runtime_data { + * | | Interface and Port Binding changes store the | + * | @tracked_dp_bindings | changed datapaths (datapaths added/removed from | + * | | local_datapaths) and changed port bindings | +- * | | (added/updated/deleted in 'local_bindings'). | ++ * | | (added/updated/deleted in 'lbinding_data'). | + * | | So any changes to the runtime data - | +- * | | local_datapaths and local_bindings is captured | ++ * | | local_datapaths and lbinding_data is captured | + * | | here. | + * ------------------------------------------------------------------------ + * | | This is a bool which represents if the runtime | +@@ -1206,7 +1250,7 @@ struct ed_type_runtime_data { + * + * --------------------------------------------------------------------- + * | local_datapaths | The changes to these runtime data is captured in | +- * | local_bindings | the @tracked_dp_bindings indirectly and hence it | ++ * | lbinding_data | the @tracked_dp_bindings indirectly and hence it | + * | local_lport_ids | is not tracked explicitly. | + * --------------------------------------------------------------------- + * | local_iface_ids | This is used internally within the runtime data | +@@ -1249,7 +1293,7 @@ en_runtime_data_init(struct engine_node *node OVS_UNUSED, + sset_init(&data->active_tunnels); + sset_init(&data->egress_ifaces); + smap_init(&data->local_iface_ids); +- local_bindings_init(&data->local_bindings); ++ local_binding_data_init(&data->lbinding_data); + + /* Init the tracked data. */ + hmap_init(&data->tracked_dp_bindings); +@@ -1277,7 +1321,7 @@ en_runtime_data_cleanup(void *data) + free(cur_node); + } + hmap_destroy(&rt_data->local_datapaths); +- local_bindings_destroy(&rt_data->local_bindings); ++ local_binding_data_destroy(&rt_data->lbinding_data); + hmapx_destroy(&rt_data->ct_updated_datapaths); + } + +@@ -1338,6 +1382,8 @@ init_binding_ctx(struct engine_node *node, + engine_get_input("SB_port_binding", node), + "datapath"); + ++ struct controller_engine_ctx *ctrl_ctx = engine_get_context()->client_ctx; ++ + b_ctx_in->ovnsb_idl_txn = engine_get_context()->ovnsb_idl_txn; + b_ctx_in->ovs_idl_txn = engine_get_context()->ovs_idl_txn; + b_ctx_in->sbrec_datapath_binding_by_key = sbrec_datapath_binding_by_key; +@@ -1360,10 +1406,10 @@ init_binding_ctx(struct engine_node *node, + b_ctx_out->local_lport_ids_changed = false; + b_ctx_out->non_vif_ports_changed = false; + b_ctx_out->egress_ifaces = &rt_data->egress_ifaces; +- b_ctx_out->local_bindings = &rt_data->local_bindings; ++ b_ctx_out->lbinding_data = &rt_data->lbinding_data; + b_ctx_out->local_iface_ids = &rt_data->local_iface_ids; + b_ctx_out->tracked_dp_bindings = NULL; +- b_ctx_out->local_lports_changed = NULL; ++ b_ctx_out->if_mgr = ctrl_ctx->if_mgr; + } + + static void +@@ -1404,7 +1450,7 @@ en_runtime_data_run(struct engine_node *node, void *data) + free(cur_node); + } + hmap_clear(local_datapaths); +- local_bindings_destroy(&rt_data->local_bindings); ++ local_binding_data_destroy(&rt_data->lbinding_data); + sset_destroy(local_lports); + sset_destroy(local_lport_ids); + sset_destroy(active_tunnels); +@@ -1415,7 +1461,7 @@ en_runtime_data_run(struct engine_node *node, void *data) + sset_init(active_tunnels); + sset_init(&rt_data->egress_ifaces); + smap_init(&rt_data->local_iface_ids); +- local_bindings_init(&rt_data->local_bindings); ++ local_binding_data_init(&rt_data->lbinding_data); + hmapx_clear(&rt_data->ct_updated_datapaths); + } + +@@ -1670,6 +1716,7 @@ en_physical_flow_changes_run(struct engine_node *node, void *data) + { + struct ed_type_pfc_data *pfc_tdata = data; + pfc_tdata->recompute_physical_flows = true; ++ pfc_tdata->ovs_ifaces_changed = true; + engine_set_node_state(node, EN_UPDATED); + } + +@@ -1696,8 +1743,7 @@ physical_flow_changes_ovs_iface_handler(struct engine_node *node, void *data) + + struct flow_output_persistent_data { + uint32_t conj_id_ofs; +- struct hmap lflow_cache_map; +- bool lflow_cache_enabled; ++ struct lflow_cache *lflow_cache; + }; + + struct ed_type_flow_output { +@@ -1778,7 +1824,7 @@ static void init_physical_ctx(struct engine_node *node, + p_ctx->local_lports = &rt_data->local_lports; + p_ctx->ct_zones = ct_zones; + p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve; +- p_ctx->local_bindings = &rt_data->local_bindings; ++ p_ctx->local_bindings = &rt_data->lbinding_data.bindings; + p_ctx->ct_updated_datapaths = &rt_data->ct_updated_datapaths; + } + +@@ -1836,6 +1882,10 @@ static void init_lflow_ctx(struct engine_node *node, (struct sbrec_load_balancer_table *)EN_OVSDB_GET( engine_get_input("SB_load_balancer", node)); @@ -2535,7 +5733,7 @@ index 366fc9c06..288e2e12d 100644 struct ovsrec_open_vswitch_table *ovs_table = (struct ovsrec_open_vswitch_table *)EN_OVSDB_GET( engine_get_input("OVS_open_vswitch", node)); -@@ -1873,6 +1905,7 @@ static void init_lflow_ctx(struct engine_node *node, +@@ -1873,6 +1923,7 @@ static void init_lflow_ctx(struct engine_node *node, l_ctx_in->logical_flow_table = logical_flow_table; l_ctx_in->logical_dp_group_table = logical_dp_group_table; l_ctx_in->mc_group_table = multicast_group_table; @@ -2543,7 +5741,65 @@ index 366fc9c06..288e2e12d 100644 l_ctx_in->chassis = chassis; l_ctx_in->lb_table = lb_table; l_ctx_in->local_datapaths = &rt_data->local_datapaths; -@@ -2313,6 +2346,23 @@ flow_output_sb_load_balancer_handler(struct engine_node *node, void *data) +@@ -1886,11 +1937,7 @@ static void init_lflow_ctx(struct engine_node *node, + l_ctx_out->meter_table = &fo->meter_table; + l_ctx_out->lfrr = &fo->lflow_resource_ref; + l_ctx_out->conj_id_ofs = &fo->pd.conj_id_ofs; +- if (fo->pd.lflow_cache_enabled) { +- l_ctx_out->lflow_cache_map = &fo->pd.lflow_cache_map; +- } else { +- l_ctx_out->lflow_cache_map = NULL; +- } ++ l_ctx_out->lflow_cache = fo->pd.lflow_cache; + l_ctx_out->conj_id_overflow = false; + } + +@@ -1905,8 +1952,6 @@ en_flow_output_init(struct engine_node *node OVS_UNUSED, + ovn_extend_table_init(&data->meter_table); + data->pd.conj_id_ofs = 1; + lflow_resource_init(&data->lflow_resource_ref); +- lflow_cache_init(&data->pd.lflow_cache_map); +- data->pd.lflow_cache_enabled = true; + return data; + } + +@@ -1918,7 +1963,7 @@ en_flow_output_cleanup(void *data) + ovn_extend_table_destroy(&flow_output_data->group_table); + ovn_extend_table_destroy(&flow_output_data->meter_table); + lflow_resource_destroy(&flow_output_data->lflow_resource_ref); +- lflow_cache_destroy(&flow_output_data->pd.lflow_cache_map); ++ lflow_cache_destroy(flow_output_data->pd.lflow_cache); + } + + static void +@@ -1965,13 +2010,10 @@ en_flow_output_run(struct engine_node *node, void *data) + } + + struct controller_engine_ctx *ctrl_ctx = engine_get_context()->client_ctx; +- if (fo->pd.lflow_cache_enabled && !ctrl_ctx->enable_lflow_cache) { +- lflow_cache_destroy(&fo->pd.lflow_cache_map); +- lflow_cache_init(&fo->pd.lflow_cache_map); +- } +- fo->pd.lflow_cache_enabled = ctrl_ctx->enable_lflow_cache; + +- if (!fo->pd.lflow_cache_enabled) { ++ fo->pd.lflow_cache = ctrl_ctx->lflow_cache; ++ ++ if (!lflow_cache_is_enabled(fo->pd.lflow_cache)) { + fo->pd.conj_id_ofs = 1; + } + +@@ -1988,8 +2030,7 @@ en_flow_output_run(struct engine_node *node, void *data) + ovn_extend_table_clear(meter_table, false /* desired */); + lflow_resource_clear(lfrr); + fo->pd.conj_id_ofs = 1; +- lflow_cache_destroy(&fo->pd.lflow_cache_map); +- lflow_cache_init(&fo->pd.lflow_cache_map); ++ lflow_cache_flush(fo->pd.lflow_cache); + l_ctx_out.conj_id_overflow = false; + lflow_run(&l_ctx_in, &l_ctx_out); + if (l_ctx_out.conj_id_overflow) { +@@ -2313,6 +2354,23 @@ flow_output_sb_load_balancer_handler(struct engine_node *node, void *data) return handled; } @@ -2567,18 +5823,17 @@ index 366fc9c06..288e2e12d 100644 struct ovn_controller_exit_args { bool *exiting; bool *restart; -@@ -2389,6 +2439,10 @@ main(int argc, char *argv[]) +@@ -2389,6 +2447,9 @@ main(int argc, char *argv[]) daemonize_complete(); + /* Register ofctrl seqno types. */ + ofctrl_seq_type_nb_cfg = ofctrl_seqno_add_type(); + -+ binding_init(); patch_init(); pinctrl_init(); lflow_init(); -@@ -2440,6 +2494,10 @@ main(int argc, char *argv[]) +@@ -2440,6 +2501,10 @@ main(int argc, char *argv[]) = ip_mcast_index_create(ovnsb_idl_loop.idl); struct ovsdb_idl_index *sbrec_igmp_group = igmp_group_index_create(ovnsb_idl_loop.idl); @@ -2589,7 +5844,7 @@ index 366fc9c06..288e2e12d 100644 ovsdb_idl_track_add_all(ovnsb_idl_loop.idl); ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, -@@ -2566,6 +2624,8 @@ main(int argc, char *argv[]) +@@ -2566,6 +2631,8 @@ main(int argc, char *argv[]) engine_add_input(&en_flow_output, &en_sb_dns, NULL); engine_add_input(&en_flow_output, &en_sb_load_balancer, flow_output_sb_load_balancer_handler); @@ -2598,7 +5853,14 @@ index 366fc9c06..288e2e12d 100644 engine_add_input(&en_ct_zones, &en_ovs_open_vswitch, NULL); engine_add_input(&en_ct_zones, &en_ovs_bridge, NULL); -@@ -2624,6 +2684,7 @@ main(int argc, char *argv[]) +@@ -2619,11 +2686,13 @@ main(int argc, char *argv[]) + engine_get_internal_data(&en_flow_output); + struct ed_type_ct_zones *ct_zones_data = + engine_get_internal_data(&en_ct_zones); +- struct ed_type_runtime_data *runtime_data = NULL; ++ struct ed_type_runtime_data *runtime_data = ++ engine_get_internal_data(&en_runtime_data); + ofctrl_init(&flow_output_data->group_table, &flow_output_data->meter_table, get_ofctrl_probe_interval(ovs_idl_loop.idl)); @@ -2606,7 +5868,88 @@ index 366fc9c06..288e2e12d 100644 unixctl_command_register("group-table-list", "", 0, 0, extend_table_list, -@@ -2832,11 +2893,13 @@ main(int argc, char *argv[]) +@@ -2643,7 +2712,15 @@ main(int argc, char *argv[]) + + unixctl_command_register("recompute", "", 0, 0, engine_recompute_cmd, + NULL); +- unixctl_command_register("flush-lflow-cache", "", 0, 0, flush_lflow_cache, ++ unixctl_command_register("lflow-cache/flush", "", 0, 0, ++ lflow_cache_flush_cmd, ++ &flow_output_data->pd); ++ /* Keep deprecated 'flush-lflow-cache' command for now. */ ++ unixctl_command_register("flush-lflow-cache", "[deprecated]", 0, 0, ++ lflow_cache_flush_cmd, ++ &flow_output_data->pd); ++ unixctl_command_register("lflow-cache/show-stats", "", 0, 0, ++ lflow_cache_show_stats_cmd, + &flow_output_data->pd); + + bool reset_ovnsb_idl_min_index = false; +@@ -2663,13 +2740,19 @@ main(int argc, char *argv[]) + unixctl_command_register("debug/delay-nb-cfg-report", "SECONDS", 1, 1, + debug_delay_nb_cfg_report, &delay_nb_cfg_report); + ++ unixctl_command_register("debug/dump-local-bindings", "", 0, 0, ++ debug_dump_local_bindings, ++ &runtime_data->lbinding_data); ++ + unsigned int ovs_cond_seqno = UINT_MAX; + unsigned int ovnsb_cond_seqno = UINT_MAX; + unsigned int ovnsb_expected_cond_seqno = UINT_MAX; + + struct controller_engine_ctx ctrl_engine_ctx = { +- .enable_lflow_cache = true ++ .lflow_cache = lflow_cache_create(), ++ .if_mgr = if_status_mgr_create(), + }; ++ struct if_status_mgr *if_mgr = ctrl_engine_ctx.if_mgr; + + char *ovn_version = ovn_get_internal_version(); + VLOG_INFO("OVN internal version is : [%s]", ovn_version); +@@ -2679,6 +2762,15 @@ main(int argc, char *argv[]) + restart = false; + bool sb_monitor_all = false; + while (!exiting) { ++ memory_run(); ++ if (memory_should_report()) { ++ struct simap usage = SIMAP_INITIALIZER(&usage); ++ ++ lflow_cache_get_memory_usage(ctrl_engine_ctx.lflow_cache, &usage); ++ memory_report(&usage); ++ simap_destroy(&usage); ++ } ++ + /* If we're paused just run the unixctl server and skip most of the + * processing loop. + */ +@@ -2703,8 +2795,7 @@ main(int argc, char *argv[]) + + update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl, &sb_monitor_all, + &reset_ovnsb_idl_min_index, +- &ctrl_engine_ctx.enable_lflow_cache, +- &ovnsb_expected_cond_seqno); ++ &ctrl_engine_ctx, &ovnsb_expected_cond_seqno); + update_ssl_config(ovsrec_ssl_table_get(ovs_idl_loop.idl)); + ofctrl_set_probe_interval(get_ofctrl_probe_interval(ovs_idl_loop.idl)); + +@@ -2741,6 +2832,16 @@ main(int argc, char *argv[]) + + if (ovsdb_idl_has_ever_connected(ovnsb_idl_loop.idl) && + northd_version_match) { ++ ++ /* Unconditionally remove all deleted lflows from the lflow ++ * cache. ++ */ ++ if (lflow_cache_is_enabled(ctrl_engine_ctx.lflow_cache)) { ++ lflow_handle_cached_flows( ++ ctrl_engine_ctx.lflow_cache, ++ sbrec_logical_flow_table_get(ovnsb_idl_loop.idl)); ++ } ++ + /* Contains the transport zones that this Chassis belongs to */ + struct sset transport_zones = SSET_INITIALIZER(&transport_zones); + sset_from_delimited_string(&transport_zones, +@@ -2832,11 +2933,13 @@ main(int argc, char *argv[]) sbrec_mac_binding_by_lport_ip, sbrec_igmp_group, sbrec_ip_multicast, @@ -2620,7 +5963,7 @@ index 366fc9c06..288e2e12d 100644 br_int, chassis, &runtime_data->local_datapaths, &runtime_data->active_tunnels); -@@ -2852,17 +2915,29 @@ main(int argc, char *argv[]) +@@ -2852,17 +2955,29 @@ main(int argc, char *argv[]) sb_monitor_all); } } @@ -2631,9 +5974,10 @@ index 366fc9c06..288e2e12d 100644 + ovnsb_idl_loop.idl), + ovnsb_cond_seqno, + ovnsb_expected_cond_seqno)); -+ if (runtime_data && ovs_idl_txn && ovnsb_idl_txn) { -+ binding_seqno_run(&runtime_data->local_bindings); -+ } ++ ++ struct local_binding_data *binding_data = ++ runtime_data ? &runtime_data->lbinding_data : NULL; ++ if_status_mgr_update(if_mgr, binding_data); + flow_output_data = engine_get_data(&en_flow_output); if (flow_output_data && ct_zones_data) { @@ -2648,23 +5992,176 @@ index 366fc9c06..288e2e12d 100644 engine_node_changed(&en_flow_output)); } + ofctrl_seqno_run(ofctrl_get_cur_cfg()); -+ if (runtime_data && ovs_idl_txn && ovnsb_idl_txn) { -+ binding_seqno_install(&runtime_data->local_bindings); -+ } ++ if_status_mgr_run(if_mgr, binding_data, !ovnsb_idl_txn, ++ !ovs_idl_txn); } } -@@ -2888,7 +2963,7 @@ main(int argc, char *argv[]) - } - - store_nb_cfg(ovnsb_idl_txn, ovs_idl_txn, chassis_private, -- br_int, delay_nb_cfg_report, ofctrl_get_cur_cfg()); -+ br_int, delay_nb_cfg_report); +@@ -2888,7 +3003,7 @@ main(int argc, char *argv[]) + } + + store_nb_cfg(ovnsb_idl_txn, ovs_idl_txn, chassis_private, +- br_int, delay_nb_cfg_report, ofctrl_get_cur_cfg()); ++ br_int, delay_nb_cfg_report); + + if (pending_pkt.conn) { + struct ed_type_addr_sets *as_data = +@@ -2960,6 +3075,7 @@ main(int argc, char *argv[]) + ovsdb_idl_track_clear(ovs_idl_loop.idl); + + loop_done: ++ memory_wait(); + poll_block(); + if (should_service_stop()) { + exiting = true; +@@ -3027,6 +3143,7 @@ loop_done: + ofctrl_destroy(); + pinctrl_destroy(); + patch_destroy(); ++ if_status_mgr_destroy(if_mgr); + + ovsdb_idl_loop_destroy(&ovs_idl_loop); + ovsdb_idl_loop_destroy(&ovnsb_idl_loop); +@@ -3207,19 +3324,32 @@ engine_recompute_cmd(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED, + } + + static void +-flush_lflow_cache(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED, +- const char *argv[] OVS_UNUSED, void *arg_) ++lflow_cache_flush_cmd(struct unixctl_conn *conn OVS_UNUSED, ++ int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, ++ void *arg_) + { + VLOG_INFO("User triggered lflow cache flush."); + struct flow_output_persistent_data *fo_pd = arg_; +- lflow_cache_destroy(&fo_pd->lflow_cache_map); +- lflow_cache_init(&fo_pd->lflow_cache_map); ++ lflow_cache_flush(fo_pd->lflow_cache); + fo_pd->conj_id_ofs = 1; + engine_set_force_recompute(true); + poll_immediate_wake(); + unixctl_command_reply(conn, NULL); + } + ++static void ++lflow_cache_show_stats_cmd(struct unixctl_conn *conn, int argc OVS_UNUSED, ++ const char *argv[] OVS_UNUSED, void *arg_) ++{ ++ struct flow_output_persistent_data *fo_pd = arg_; ++ struct lflow_cache *lc = fo_pd->lflow_cache; ++ struct ds ds = DS_EMPTY_INITIALIZER; ++ ++ lflow_cache_get_stats(lc, &ds); ++ unixctl_command_reply(conn, ds_cstr(&ds)); ++ ds_destroy(&ds); ++} ++ + static void + cluster_state_reset_cmd(struct unixctl_conn *conn, int argc OVS_UNUSED, + const char *argv[] OVS_UNUSED, void *idl_reset_) +@@ -3287,3 +3417,13 @@ debug_delay_nb_cfg_report(struct unixctl_conn *conn, int argc OVS_UNUSED, + unixctl_command_reply(conn, "no delay for nb_cfg report."); + } + } ++ ++static void ++debug_dump_local_bindings(struct unixctl_conn *conn, int argc OVS_UNUSED, ++ const char *argv[] OVS_UNUSED, void *local_bindings) ++{ ++ struct ds binding_data = DS_EMPTY_INITIALIZER; ++ binding_dump_local_bindings(local_bindings, &binding_data); ++ unixctl_command_reply(conn, ds_cstr(&binding_data)); ++ ds_destroy(&binding_data); ++} +diff --git a/controller/physical.c b/controller/physical.c +index fa5d0d692..c7090b351 100644 +--- a/controller/physical.c ++++ b/controller/physical.c +@@ -1160,6 +1160,11 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name, + + load_logical_ingress_metadata(binding, &zone_ids, ofpacts_p); + ++ if (!strcmp(binding->type, "localport")) { ++ /* mark the packet as incoming from a localport */ ++ put_load(1, MFF_LOG_FLAGS, MLF_LOCALPORT_BIT, 1, ofpacts_p); ++ } ++ + /* Resubmit to first logical ingress pipeline table. */ + put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts_p); + ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, +@@ -1219,6 +1224,24 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name, + ofport, flow_table); + } + ++ /* Table 39, priority 160. ++ * ======================= ++ * ++ * Do not forward local traffic from a localport to a localnet port. ++ */ ++ if (!strcmp(binding->type, "localnet")) { ++ /* do not forward traffic from localport to localnet port */ ++ match_init_catchall(&match); ++ ofpbuf_clear(ofpacts_p); ++ match_set_metadata(&match, htonll(dp_key)); ++ match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key); ++ match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, ++ MLF_LOCALPORT, MLF_LOCALPORT); ++ ofctrl_add_flow(flow_table, OFTABLE_CHECK_LOOPBACK, 160, ++ binding->header_.uuid.parts[0], &match, ++ ofpacts_p, &binding->header_.uuid); ++ } ++ + } else if (!tun && !is_ha_remote) { + /* Remote port connected by localnet port */ + /* Table 33, priority 100. +@@ -1839,20 +1862,29 @@ physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx, + continue; + } + +- const struct local_binding *lb = +- local_binding_find(p_ctx->local_bindings, iface_id); +- +- if (!lb || !lb->pb) { +- continue; ++ const struct sbrec_port_binding *lb_pb = ++ local_binding_get_primary_pb(p_ctx->local_bindings, iface_id); ++ if (!lb_pb) { ++ /* For regular VIFs (e.g. lsp) the upcoming port-binding update ++ * will remove lfows related to the unclaimed ovs port. ++ * Localport is a special case and it needs to be managed here ++ * since the port is not binded and otherwise the related lfows ++ * will not be cleared removing the ovs port. ++ */ ++ lb_pb = lport_lookup_by_name(p_ctx->sbrec_port_binding_by_name, ++ iface_id); ++ if (!lb_pb || strcmp(lb_pb->type, "localport")) { ++ continue; ++ } + } + + int64_t ofport = iface_rec->n_ofport ? *iface_rec->ofport : 0; + if (ovsrec_interface_is_deleted(iface_rec)) { +- ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid); ++ ofctrl_remove_flows(flow_table, &lb_pb->header_.uuid); + simap_find_and_delete(&localvif_to_ofport, iface_id); + } else { + if (!ovsrec_interface_is_new(iface_rec)) { +- ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid); ++ ofctrl_remove_flows(flow_table, &lb_pb->header_.uuid); + } - if (pending_pkt.conn) { - struct ed_type_addr_sets *as_data = + simap_put(&localvif_to_ofport, iface_id, ofport); +@@ -1860,7 +1892,7 @@ physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx, + p_ctx->mff_ovn_geneve, p_ctx->ct_zones, + p_ctx->active_tunnels, + p_ctx->local_datapaths, +- lb->pb, p_ctx->chassis, ++ lb_pb, p_ctx->chassis, + flow_table, &ofpacts); + } + } diff --git a/controller/pinctrl.c b/controller/pinctrl.c -index 7e3abf0a4..3dc10389d 100644 +index 7e3abf0a4..523a45b9a 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c @@ -26,6 +26,7 @@ @@ -2986,12 +6483,12 @@ index 7e3abf0a4..3dc10389d 100644 /* Buffered "put_mac_binding" operation. */ -struct put_mac_binding { - struct hmap_node hmap_node; /* In 'put_mac_bindings'. */ -- + - /* Key. */ - uint32_t dp_key; - uint32_t port_key; - struct in6_addr ip_key; - +- - /* Value. */ - struct eth_addr mac; -}; @@ -3166,7 +6663,91 @@ index 7e3abf0a4..3dc10389d 100644 /* * Send gratuitous/reverse ARP for vif on localnet. -@@ -5525,7 +5663,8 @@ may_inject_pkts(void) +@@ -4102,6 +4240,12 @@ send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn, + struct shash *nat_addresses) + { + volatile struct garp_rarp_data *garp_rarp = NULL; ++ ++ /* Skip localports as they don't need to be announced */ ++ if (!strcmp(binding_rec->type, "localport")) { ++ return; ++ } ++ + /* Update GARP for NAT IP if it exists. Consider port bindings with type + * "l3gateway" for logical switch ports attached to gateway routers, and + * port bindings with type "patch" for logical switch ports attached to +@@ -4507,9 +4651,16 @@ ip_mcast_snoop_state_find(int64_t dp_key) + return NULL; + } + ++/* Updates the ip_mcast_snoop_cfg for a logical datapath specified by ++ * 'dp_key'. Also sets 'needs_flush' to 'true' if the config change should ++ * to trigger flushing of the existing IGMP_Groups. ++ * ++ * Returns 'true' if any changes happened to the configuration. ++ */ + static bool + ip_mcast_snoop_state_update(int64_t dp_key, +- const struct ip_mcast_snoop_cfg *cfg) ++ const struct ip_mcast_snoop_cfg *cfg, ++ bool *needs_flush) + OVS_REQUIRES(pinctrl_mutex) + { + bool notify = false; +@@ -4519,6 +4670,9 @@ ip_mcast_snoop_state_update(int64_t dp_key, + ms_state = ip_mcast_snoop_state_add(dp_key); + notify = true; + } else if (memcmp(cfg, &ms_state->cfg, sizeof *cfg)) { ++ if (ms_state->cfg.seq_no != cfg->seq_no) { ++ *needs_flush = true; ++ } + notify = true; + } + +@@ -4738,6 +4892,25 @@ ip_mcast_snoop_run(void) + } + } + ++/* Flushes all IGMP_Groups installed by the local chassis for the logical ++ * datapath specified by 'dp_key'. ++ */ ++static void ++ip_mcast_flush_groups(int64_t dp_key, const struct sbrec_chassis *chassis, ++ struct ovsdb_idl_index *sbrec_igmp_groups) ++{ ++ const struct sbrec_igmp_group *sbrec_igmp; ++ ++ SBREC_IGMP_GROUP_FOR_EACH_BYINDEX (sbrec_igmp, sbrec_igmp_groups) { ++ if (!sbrec_igmp->datapath || ++ sbrec_igmp->datapath->tunnel_key != dp_key || ++ sbrec_igmp->chassis != chassis) { ++ continue; ++ } ++ igmp_group_delete(sbrec_igmp); ++ } ++} ++ + /* + * This runs in the pinctrl main thread, so it has access to the southbound + * database. It reads the IP_Multicast table and updates the local multicast +@@ -4770,11 +4943,15 @@ ip_mcast_sync(struct ovsdb_idl_txn *ovnsb_idl_txn, + + int64_t dp_key = ip_mcast->datapath->tunnel_key; + struct ip_mcast_snoop_cfg cfg; ++ bool flush_groups = false; + + ip_mcast_snoop_cfg_load(&cfg, ip_mcast); +- if (ip_mcast_snoop_state_update(dp_key, &cfg)) { ++ if (ip_mcast_snoop_state_update(dp_key, &cfg, &flush_groups)) { + notify = true; + } ++ if (flush_groups) { ++ ip_mcast_flush_groups(dp_key, chassis, sbrec_igmp_groups); ++ } + } + + /* Then delete the old entries. */ +@@ -5525,7 +5702,8 @@ may_inject_pkts(void) !shash_is_empty(&send_garp_rarp_data) || ipv6_prefixd_should_inject() || !ovs_list_is_empty(&mcast_query_list) || @@ -3176,7 +6757,7 @@ index 7e3abf0a4..3dc10389d 100644 } static void -@@ -6312,6 +6451,665 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn, +@@ -6312,6 +6490,665 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn, } @@ -3842,7 +7423,7 @@ index 7e3abf0a4..3dc10389d 100644 static uint16_t get_random_src_port(void) { -@@ -6724,3 +7522,94 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow, +@@ -6724,3 +7561,94 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow, svc_mon->next_send_time = time_msec() + svc_mon->interval; } } @@ -3961,12 +7542,12 @@ index 4b101ec92..cc0a51984 100644 const struct ovsrec_bridge *, const struct sbrec_chassis *, const struct hmap *local_datapaths, const struct sset *active_tunnels); -diff --git a/controller/test-ofctrl-seqno.c b/controller/test-ofctrl-seqno.c +diff --git a/controller/test-lflow-cache.c b/controller/test-lflow-cache.c new file mode 100644 -index 000000000..fce88d4bd +index 000000000..6a1416197 --- /dev/null -+++ b/controller/test-ofctrl-seqno.c -@@ -0,0 +1,194 @@ ++++ b/controller/test-lflow-cache.c +@@ -0,0 +1,238 @@ +/* Copyright (c) 2021, Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); @@ -3984,33 +7565,261 @@ index 000000000..fce88d4bd + +#include + ++#include "lib/uuid.h" ++#include "ovn/expr.h" +#include "tests/ovstest.h" -+#include "sort.h" ++#include "tests/test-utils.h" +#include "util.h" + -+#include "ofctrl-seqno.h" ++#include "lflow-cache.h" ++ ++/* Simulate 1KB large cache values. */ ++#define TEST_LFLOW_CACHE_VALUE_SIZE 1024 + +static void -+test_init(void) ++test_lflow_cache_add__(struct lflow_cache *lc, const char *op_type, ++ const struct uuid *lflow_uuid, ++ unsigned int conj_id_ofs, ++ struct expr *e) +{ -+ ofctrl_seqno_init(); ++ printf("ADD %s:\n", op_type); ++ printf(" conj-id-ofs: %u\n", conj_id_ofs); ++ ++ if (!strcmp(op_type, "conj-id")) { ++ lflow_cache_add_conj_id(lc, lflow_uuid, conj_id_ofs); ++ } else if (!strcmp(op_type, "expr")) { ++ lflow_cache_add_expr(lc, lflow_uuid, conj_id_ofs, expr_clone(e), ++ TEST_LFLOW_CACHE_VALUE_SIZE); ++ } else if (!strcmp(op_type, "matches")) { ++ struct hmap *matches = xmalloc(sizeof *matches); ++ ovs_assert(expr_to_matches(e, NULL, NULL, matches) == 0); ++ ovs_assert(hmap_count(matches) == 1); ++ lflow_cache_add_matches(lc, lflow_uuid, matches, ++ TEST_LFLOW_CACHE_VALUE_SIZE); ++ } else { ++ OVS_NOT_REACHED(); ++ } +} + -+static bool -+test_read_uint_value(struct ovs_cmdl_context *ctx, unsigned int index, -+ const char *descr, unsigned int *result) ++static void ++test_lflow_cache_lookup__(struct lflow_cache *lc, ++ const struct uuid *lflow_uuid) +{ -+ if (index >= ctx->argc) { -+ fprintf(stderr, "Missing %s argument\n", descr); -+ return false; ++ struct lflow_cache_value *lcv = lflow_cache_get(lc, lflow_uuid); ++ ++ printf("LOOKUP:\n"); ++ if (!lcv) { ++ printf(" not found\n"); ++ return; + } + -+ const char *arg = ctx->argv[index]; -+ if (!str_to_uint(arg, 10, result)) { -+ fprintf(stderr, "Invalid %s: %s\n", descr, arg); -+ return false; ++ printf(" conj_id_ofs: %"PRIu32"\n", lcv->conj_id_ofs); ++ switch (lcv->type) { ++ case LCACHE_T_CONJ_ID: ++ printf(" type: conj-id\n"); ++ break; ++ case LCACHE_T_EXPR: ++ printf(" type: expr\n"); ++ break; ++ case LCACHE_T_MATCHES: ++ printf(" type: matches\n"); ++ break; ++ case LCACHE_T_NONE: ++ OVS_NOT_REACHED(); ++ break; + } -+ return true; ++} ++ ++static void ++test_lflow_cache_delete__(struct lflow_cache *lc, ++ const struct uuid *lflow_uuid) ++{ ++ printf("DELETE\n"); ++ lflow_cache_delete(lc, lflow_uuid); ++} ++ ++static void ++test_lflow_cache_stats__(struct lflow_cache *lc) ++{ ++ struct ds ds = DS_EMPTY_INITIALIZER; ++ ++ lflow_cache_get_stats(lc, &ds); ++ printf("%s", ds_cstr(&ds)); ++ ds_destroy(&ds); ++} ++ ++static void ++test_lflow_cache_operations(struct ovs_cmdl_context *ctx) ++{ ++ struct lflow_cache *lc = lflow_cache_create(); ++ struct expr *e = expr_create_boolean(true); ++ bool enabled = !strcmp(ctx->argv[1], "true"); ++ unsigned int shift = 2; ++ unsigned int n_ops; ++ ++ lflow_cache_enable(lc, enabled, UINT32_MAX, UINT32_MAX); ++ test_lflow_cache_stats__(lc); ++ ++ if (!test_read_uint_value(ctx, shift++, "n_ops", &n_ops)) { ++ goto done; ++ } ++ ++ for (unsigned int i = 0; i < n_ops; i++) { ++ const char *op = test_read_value(ctx, shift++, "op"); ++ ++ if (!op) { ++ goto done; ++ } ++ ++ struct uuid lflow_uuid; ++ uuid_generate(&lflow_uuid); ++ ++ if (!strcmp(op, "add")) { ++ const char *op_type = test_read_value(ctx, shift++, "op_type"); ++ if (!op_type) { ++ goto done; ++ } ++ ++ unsigned int conj_id_ofs; ++ if (!test_read_uint_value(ctx, shift++, "conj-id-ofs", ++ &conj_id_ofs)) { ++ goto done; ++ } ++ ++ test_lflow_cache_add__(lc, op_type, &lflow_uuid, conj_id_ofs, e); ++ test_lflow_cache_lookup__(lc, &lflow_uuid); ++ } else if (!strcmp(op, "add-del")) { ++ const char *op_type = test_read_value(ctx, shift++, "op_type"); ++ if (!op_type) { ++ goto done; ++ } ++ ++ unsigned int conj_id_ofs; ++ if (!test_read_uint_value(ctx, shift++, "conj-id-ofs", ++ &conj_id_ofs)) { ++ goto done; ++ } ++ ++ test_lflow_cache_add__(lc, op_type, &lflow_uuid, conj_id_ofs, e); ++ test_lflow_cache_lookup__(lc, &lflow_uuid); ++ test_lflow_cache_delete__(lc, &lflow_uuid); ++ test_lflow_cache_lookup__(lc, &lflow_uuid); ++ } else if (!strcmp(op, "enable")) { ++ unsigned int limit; ++ unsigned int mem_limit_kb; ++ if (!test_read_uint_value(ctx, shift++, "limit", &limit)) { ++ goto done; ++ } ++ if (!test_read_uint_value(ctx, shift++, "mem-limit", ++ &mem_limit_kb)) { ++ goto done; ++ } ++ printf("ENABLE\n"); ++ lflow_cache_enable(lc, true, limit, mem_limit_kb); ++ } else if (!strcmp(op, "disable")) { ++ printf("DISABLE\n"); ++ lflow_cache_enable(lc, false, UINT32_MAX, UINT32_MAX); ++ } else if (!strcmp(op, "flush")) { ++ printf("FLUSH\n"); ++ lflow_cache_flush(lc); ++ } else { ++ OVS_NOT_REACHED(); ++ } ++ test_lflow_cache_stats__(lc); ++ } ++done: ++ lflow_cache_destroy(lc); ++ expr_destroy(e); ++} ++ ++static void ++test_lflow_cache_negative(struct ovs_cmdl_context *ctx OVS_UNUSED) ++{ ++ lflow_cache_flush(NULL); ++ lflow_cache_destroy(NULL); ++ lflow_cache_enable(NULL, true, UINT32_MAX, UINT32_MAX); ++ ovs_assert(!lflow_cache_is_enabled(NULL)); ++ ++ struct ds ds = DS_EMPTY_INITIALIZER; ++ lflow_cache_get_stats(NULL, &ds); ++ ovs_assert(!strcmp(ds_cstr_ro(&ds), "Invalid arguments.")); ++ lflow_cache_get_stats(NULL, NULL); ++ ds_destroy(&ds); ++ ++ struct lflow_cache *lcs[] = { ++ NULL, ++ lflow_cache_create(), ++ }; ++ ++ for (size_t i = 0; i < ARRAY_SIZE(lcs); i++) { ++ struct expr *e = expr_create_boolean(true); ++ struct hmap *matches = xmalloc(sizeof *matches); ++ ++ ovs_assert(expr_to_matches(e, NULL, NULL, matches) == 0); ++ ovs_assert(hmap_count(matches) == 1); ++ ++ lflow_cache_add_conj_id(lcs[i], NULL, 0); ++ lflow_cache_add_expr(lcs[i], NULL, 0, NULL, 0); ++ lflow_cache_add_expr(lcs[i], NULL, 0, e, expr_size(e)); ++ lflow_cache_add_matches(lcs[i], NULL, NULL, 0); ++ lflow_cache_add_matches(lcs[i], NULL, matches, ++ TEST_LFLOW_CACHE_VALUE_SIZE); ++ lflow_cache_destroy(lcs[i]); ++ } ++} ++ ++static void ++test_lflow_cache_main(int argc, char *argv[]) ++{ ++ set_program_name(argv[0]); ++ static const struct ovs_cmdl_command commands[] = { ++ {"lflow_cache_operations", NULL, 3, INT_MAX, ++ test_lflow_cache_operations, OVS_RO}, ++ {"lflow_cache_negative", NULL, 0, 0, ++ test_lflow_cache_negative, OVS_RO}, ++ {NULL, NULL, 0, 0, NULL, OVS_RO}, ++ }; ++ struct ovs_cmdl_context ctx; ++ ctx.argc = argc - 1; ++ ctx.argv = argv + 1; ++ ovs_cmdl_run_command(&ctx, commands); ++} ++ ++OVSTEST_REGISTER("test-lflow-cache", test_lflow_cache_main); +diff --git a/controller/test-ofctrl-seqno.c b/controller/test-ofctrl-seqno.c +new file mode 100644 +index 000000000..b96da9d2f +--- /dev/null ++++ b/controller/test-ofctrl-seqno.c +@@ -0,0 +1,178 @@ ++/* Copyright (c) 2021, Red Hat, Inc. ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at: ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++#include ++ ++#include "tests/ovstest.h" ++#include "tests/test-utils.h" ++#include "sort.h" ++#include "util.h" ++ ++#include "ofctrl-seqno.h" ++ ++static void ++test_init(void) ++{ ++ ofctrl_seqno_init(); +} + +static int @@ -4161,6 +7970,44 @@ index 000000000..fce88d4bd +} + +OVSTEST_REGISTER("test-ofctrl-seqno", test_ofctrl_seqno_main); +diff --git a/ic/ovn-ic.c b/ic/ovn-ic.c +index db9ef88da..18e37a31f 100644 +--- a/ic/ovn-ic.c ++++ b/ic/ovn-ic.c +@@ -33,7 +33,9 @@ + #include "lib/ovn-nb-idl.h" + #include "lib/ovn-sb-idl.h" + #include "lib/ovn-util.h" ++#include "memory.h" + #include "openvswitch/poll-loop.h" ++#include "simap.h" + #include "smap.h" + #include "sset.h" + #include "stream.h" +@@ -1653,6 +1655,15 @@ main(int argc, char *argv[]) + state.had_lock = false; + state.paused = false; + while (!exiting) { ++ memory_run(); ++ if (memory_should_report()) { ++ struct simap usage = SIMAP_INITIALIZER(&usage); ++ ++ /* Nothing special to report yet. */ ++ memory_report(&usage); ++ simap_destroy(&usage); ++ } ++ + if (!state.paused) { + if (!ovsdb_idl_has_lock(ovnsb_idl_loop.idl) && + !ovsdb_idl_is_lock_contended(ovnsb_idl_loop.idl)) +@@ -1727,6 +1738,7 @@ main(int argc, char *argv[]) + + unixctl_server_run(unixctl); + unixctl_server_wait(unixctl); ++ memory_wait(); + if (exiting) { + poll_immediate_wake(); + } diff --git a/include/ovn/actions.h b/include/ovn/actions.h index 9c1ebf4aa..040213177 100644 --- a/include/ovn/actions.h @@ -4252,14 +8099,22 @@ index 54b0e2c0e..582241a57 100644 include/ovn/lex.h \ include/ovn/logical-fields.h diff --git a/include/ovn/expr.h b/include/ovn/expr.h -index 0a83ec7a8..c2c821818 100644 +index 0a83ec7a8..032370058 100644 --- a/include/ovn/expr.h +++ b/include/ovn/expr.h -@@ -477,6 +477,7 @@ uint32_t expr_to_matches(const struct expr *, +@@ -413,6 +413,7 @@ expr_from_node(const struct ovs_list *node) + + void expr_format(const struct expr *, struct ds *); + void expr_print(const struct expr *); ++size_t expr_size(const struct expr *); + struct expr *expr_parse(struct lexer *, const struct shash *symtab, + const struct shash *addr_sets, + const struct shash *port_groups, +@@ -477,6 +478,7 @@ uint32_t expr_to_matches(const struct expr *, const void *aux, struct hmap *matches); void expr_matches_destroy(struct hmap *matches); -+void expr_matches_prepare(struct hmap *matches, uint32_t conj_id_ofs); ++size_t expr_matches_prepare(struct hmap *matches, uint32_t conj_id_ofs); void expr_matches_print(const struct hmap *matches, FILE *); /* Action parsing helper. */ @@ -4292,7 +8147,7 @@ index 000000000..10ee46fcd + +#endif diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h -index aee474856..017176f98 100644 +index aee474856..ef97117b9 100644 --- a/include/ovn/logical-fields.h +++ b/include/ovn/logical-fields.h @@ -44,7 +44,13 @@ enum ovn_controller_event { @@ -4310,21 +8165,30 @@ index aee474856..017176f98 100644 #define MFF_N_LOG_REGS 10 void ovn_init_symtab(struct shash *symtab); -@@ -59,6 +65,7 @@ enum mff_log_flags_bits { +@@ -59,6 +65,9 @@ enum mff_log_flags_bits { MLF_NESTED_CONTAINER_BIT = 5, MLF_LOOKUP_MAC_BIT = 6, MLF_LOOKUP_LB_HAIRPIN_BIT = 7, + MLF_LOOKUP_FDB_BIT = 8, ++ MLF_SKIP_SNAT_FOR_LB_BIT = 9, ++ MLF_LOCALPORT_BIT = 10, }; /* MFF_LOG_FLAGS_REG flag assignments */ -@@ -92,6 +99,9 @@ enum mff_log_flags { +@@ -92,6 +101,16 @@ enum mff_log_flags { MLF_LOOKUP_MAC = (1 << MLF_LOOKUP_MAC_BIT), MLF_LOOKUP_LB_HAIRPIN = (1 << MLF_LOOKUP_LB_HAIRPIN_BIT), + + /* Indicate that the lookup in the fdb table was successful. */ + MLF_LOOKUP_FDB = (1 << MLF_LOOKUP_FDB_BIT), ++ ++ /* Indicate that a packet must not SNAT in the gateway router when ++ * load-balancing has taken place. */ ++ MLF_SKIP_SNAT_FOR_LB = (1 << MLF_SKIP_SNAT_FOR_LB_BIT), ++ ++ /* Indicate the packet has been received from a localport */ ++ MLF_LOCALPORT = (1 << MLF_LOCALPORT_BIT), }; /* OVN logical fields @@ -4617,36 +8481,233 @@ index fbaeb34bc..b3433f49e 100644 } else { lexer_syntax_error(ctx->lexer, "expecting action"); } -diff --git a/lib/expr.c b/lib/expr.c -index 4566d9110..796e88ac7 100644 ---- a/lib/expr.c -+++ b/lib/expr.c -@@ -3125,6 +3125,25 @@ expr_to_matches(const struct expr *expr, - return n_conjs; +diff --git a/lib/expr.c b/lib/expr.c +index 4566d9110..7b3d3ddb3 100644 +--- a/lib/expr.c ++++ b/lib/expr.c +@@ -471,6 +471,36 @@ expr_print(const struct expr *e) + ds_destroy(&output); + } + ++/* Expr Size. */ ++size_t ++expr_size(const struct expr *expr) { ++ size_t total_sz = sizeof *expr; ++ const struct expr *subexpr; ++ ++ switch (expr->type) { ++ case EXPR_T_CMP: ++ return total_sz + (expr->cmp.symbol->width ++ ? 0 ++ : strlen(expr->cmp.string)); ++ ++ case EXPR_T_AND: ++ case EXPR_T_OR: ++ LIST_FOR_EACH (subexpr, node, &expr->andor) { ++ total_sz += expr_size(subexpr); ++ } ++ return total_sz; ++ ++ case EXPR_T_BOOLEAN: ++ return total_sz; ++ ++ case EXPR_T_CONDITION: ++ return total_sz + strlen(expr->cond.string); ++ ++ default: ++ OVS_NOT_REACHED(); ++ } ++} ++ + /* Parsing. */ + + #define MAX_PAREN_DEPTH 100 +@@ -2422,7 +2452,7 @@ crush_and_numeric(struct expr *expr, const struct expr_symbol *symbol) + free(or); + return cmp; + } else { +- return or; ++ return crush_cmps(or, symbol); + } + } else { + /* Transform "x && (a0 || a1) && (b0 || b1) && ..." into +@@ -3125,6 +3155,32 @@ expr_to_matches(const struct expr *expr, + return n_conjs; + } + ++/* Prepares the expr matches in the hmap 'matches' by updating the ++ * conj id offsets specified in 'conj_id_ofs'. ++ * ++ * Returns the total size (in bytes) of the matches data structure, including ++ * individual match entries. ++ */ ++size_t ++expr_matches_prepare(struct hmap *matches, uint32_t conj_id_ofs) ++{ ++ size_t total_size = sizeof *matches; ++ struct expr_match *m; ++ ++ HMAP_FOR_EACH (m, hmap_node, matches) { ++ if (m->match.wc.masks.conj_id) { ++ m->match.flow.conj_id += conj_id_ofs; ++ } ++ ++ for (size_t i = 0; i < m->n; i++) { ++ struct cls_conjunction *src = &m->conjunctions[i]; ++ src->id += conj_id_ofs; ++ } ++ total_size += sizeof *m + m->allocated * sizeof *m->conjunctions; ++ } ++ return total_size; ++} ++ + /* Destroys all of the 'struct expr_match'es in 'matches', as well as the + * 'matches' hmap itself. */ + void +@@ -3132,6 +3188,10 @@ expr_matches_destroy(struct hmap *matches) + { + struct expr_match *m; + ++ if (!matches) { ++ return; ++ } ++ + HMAP_FOR_EACH_POP (m, hmap_node, matches) { + free(m->conjunctions); + free(m); +diff --git a/lib/inc-proc-eng.c b/lib/inc-proc-eng.c +index 916dbbe39..a6337a1d9 100644 +--- a/lib/inc-proc-eng.c ++++ b/lib/inc-proc-eng.c +@@ -27,6 +27,7 @@ + #include "openvswitch/hmap.h" + #include "openvswitch/vlog.h" + #include "inc-proc-eng.h" ++#include "unixctl.h" + + VLOG_DEFINE_THIS_MODULE(inc_proc_eng); + +@@ -102,6 +103,40 @@ engine_get_nodes(struct engine_node *node, size_t *n_count) + return engine_topo_sort(node, NULL, n_count, &n_size); + } + ++static void ++engine_clear_stats(struct unixctl_conn *conn, int argc OVS_UNUSED, ++ const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED) ++{ ++ for (size_t i = 0; i < engine_n_nodes; i++) { ++ struct engine_node *node = engine_nodes[i]; ++ ++ memset(&node->stats, 0, sizeof node->stats); ++ } ++ unixctl_command_reply(conn, NULL); ++} ++ ++static void ++engine_dump_stats(struct unixctl_conn *conn, int argc OVS_UNUSED, ++ const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED) ++{ ++ struct ds dump = DS_EMPTY_INITIALIZER; ++ ++ for (size_t i = 0; i < engine_n_nodes; i++) { ++ struct engine_node *node = engine_nodes[i]; ++ ++ ds_put_format(&dump, ++ "Node: %s\n" ++ "- recompute: %12"PRIu64"\n" ++ "- compute: %12"PRIu64"\n" ++ "- abort: %12"PRIu64"\n", ++ node->name, node->stats.recompute, ++ node->stats.compute, node->stats.abort); ++ } ++ unixctl_command_reply(conn, ds_cstr(&dump)); ++ ++ ds_destroy(&dump); ++} ++ + void + engine_init(struct engine_node *node, struct engine_arg *arg) + { +@@ -115,6 +150,11 @@ engine_init(struct engine_node *node, struct engine_arg *arg) + engine_nodes[i]->data = NULL; + } + } ++ ++ unixctl_command_register("inc-engine/show-stats", "", 0, 0, ++ engine_dump_stats, NULL); ++ unixctl_command_register("inc-engine/clear-stats", "", 0, 0, ++ engine_clear_stats, NULL); + } + + void +@@ -288,6 +328,7 @@ engine_recompute(struct engine_node *node, bool forced, bool allowed) + + /* Run the node handler which might change state. */ + node->run(node, node->data); ++ node->stats.recompute++; + } + + /* Return true if the node could be computed, false otherwise. */ +@@ -312,6 +353,8 @@ engine_compute(struct engine_node *node, bool recompute_allowed) + } + } + } ++ node->stats.compute++; ++ + return true; } -+/* Prepares the expr matches in the hmap 'matches' by updating the -+ * conj id offsets specified in 'conj_id_ofs'. -+ */ -+void -+expr_matches_prepare(struct hmap *matches, uint32_t conj_id_ofs) -+{ -+ struct expr_match *m; -+ HMAP_FOR_EACH (m, hmap_node, matches) { -+ if (m->match.wc.masks.conj_id) { -+ m->match.flow.conj_id += conj_id_ofs; -+ } +@@ -321,6 +364,7 @@ engine_run_node(struct engine_node *node, bool recompute_allowed) + if (!node->n_inputs) { + /* Run the node handler which might change state. */ + node->run(node, node->data); ++ node->stats.recompute++; + return; + } + +@@ -377,6 +421,7 @@ engine_run(bool recompute_allowed) + engine_run_node(engine_nodes[i], recompute_allowed); + + if (engine_nodes[i]->state == EN_ABORTED) { ++ engine_nodes[i]->stats.abort++; + engine_run_aborted = true; + return; + } +@@ -393,6 +438,7 @@ engine_need_run(void) + } + + engine_nodes[i]->run(engine_nodes[i], engine_nodes[i]->data); ++ engine_nodes[i]->stats.recompute++; + VLOG_DBG("input node: %s, state: %s", engine_nodes[i]->name, + engine_node_state_name[engine_nodes[i]->state]); + if (engine_nodes[i]->state == EN_UPDATED) { +diff --git a/lib/inc-proc-eng.h b/lib/inc-proc-eng.h +index 857234677..7e9f5bb70 100644 +--- a/lib/inc-proc-eng.h ++++ b/lib/inc-proc-eng.h +@@ -107,6 +107,12 @@ enum engine_node_state { + EN_STATE_MAX, + }; + ++struct engine_stats { ++ uint64_t recompute; ++ uint64_t compute; ++ uint64_t abort; ++}; + -+ for (size_t i = 0; i < m->n; i++) { -+ struct cls_conjunction *src = &m->conjunctions[i]; -+ src->id += conj_id_ofs; -+ } -+ } -+} + struct engine_node { + /* A unique name for each node. */ + char *name; +@@ -154,6 +160,9 @@ struct engine_node { + /* Method to clear up tracked data maintained by the engine node in the + * engine 'data'. It may be NULL. */ + void (*clear_tracked_data)(void *tracked_data); + - /* Destroys all of the 'struct expr_match'es in 'matches', as well as the - * 'matches' hmap itself. */ - void ++ /* Engine stats. */ ++ struct engine_stats stats; + }; + + /* Initialize the data for the engine nodes. It calls each node's diff --git a/lib/lb.c b/lib/lb.c index a90042e58..f305e9a87 100644 --- a/lib/lb.c @@ -4770,6 +8831,21 @@ index 6644ad0d8..9a78c72f3 100644 }; struct ovn_controller_lb *ovn_controller_lb_create( +diff --git a/lib/logical-fields.c b/lib/logical-fields.c +index 9d08b44c2..72853013e 100644 +--- a/lib/logical-fields.c ++++ b/lib/logical-fields.c +@@ -121,6 +121,10 @@ ovn_init_symtab(struct shash *symtab) + MLF_FORCE_SNAT_FOR_LB_BIT); + expr_symtab_add_subfield(symtab, "flags.force_snat_for_lb", NULL, + flags_str); ++ snprintf(flags_str, sizeof flags_str, "flags[%d]", ++ MLF_SKIP_SNAT_FOR_LB_BIT); ++ expr_symtab_add_subfield(symtab, "flags.skip_snat_for_lb", NULL, ++ flags_str); + + /* Connection tracking state. */ + expr_symtab_add_field_scoped(symtab, "ct_mark", MFF_CT_MARK, NULL, false, diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h index c84a0e7a9..d00982449 100644 --- a/lib/ovn-l7.h @@ -4929,7 +9005,7 @@ index 679f47a97..40ecafe57 100644 + #endif diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml -index a9a3a9f4f..55b1c9655 100644 +index a9a3a9f4f..ace16281c 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -307,7 +307,73 @@ @@ -5016,7 +9092,25 @@ index a9a3a9f4f..55b1c9655 100644

This table prepares flows for possible stateful load balancing processing -@@ -399,7 +465,7 @@ +@@ -341,12 +407,13 @@ + it contains a priority-110 flow to move IPv6 Neighbor Discovery and MLD + traffic to the next table. If load balancing rules with virtual IP + addresses (and ports) are configured in OVN_Northbound +- database for alogical switch datapath, a priority-100 flow is added ++ database for a logical switch datapath, a priority-100 flow is added + with the match ip to match on IP packets and sets the action +- reg0[0] = 1; next; to act as a hint for table ++ reg0[2] = 1; next; to act as a hint for table + Pre-stateful to send IP packets to the connection tracker +- for packet de-fragmentation before eventually advancing to ingress +- table LB. ++ for packet de-fragmentation (and to possibly do DNAT for already ++ established load balanced traffic) before eventually advancing to ingress ++ table Stateful. + If controller_event has been enabled and load balancing rules with + empty backends have been added in OVN_Northbound, a 130 flow + is added to trigger ovn-controller events whenever the chassis receives a +@@ -399,23 +466,51 @@ logical router datapath to logical switch datapath.

@@ -5025,9 +9119,43 @@ index a9a3a9f4f..55b1c9655 100644

This table prepares flows for all possible stateful processing -@@ -410,12 +476,13 @@ - ct_next; action. + in next tables. It contains a priority-0 flow that simply moves +- traffic to the next table. A priority-100 flow sends the packets to +- connection tracker based on a hint provided by the previous tables +- (with a match for reg0[0] == 1) by using the +- ct_next; action. ++ traffic to the next table.

++
    ++
  • ++ Priority-120 flows that send the packets to connection tracker using ++ ct_lb; as the action so that the already established ++ traffic destined to the load balancer VIP gets DNATted based on a hint ++ provided by the previous tables (with a match ++ for reg0[2] == 1 and on supported load balancer protocols ++ and address families). For IPv4 traffic the flows also load the ++ original destination IP and transport port in registers ++ reg1 and reg2. For IPv6 traffic the flows ++ also load the original destination IP and transport port in ++ registers xxreg1 and reg2. ++
  • ++ ++
  • ++ A priority-110 flow sends the packets to connection tracker based ++ on a hint provided by the previous tables ++ (with a match for reg0[2] == 1) by using the ++ ct_lb; action. This flow is added to handle ++ the traffic for load balancer VIPs whose protocol is not defined ++ (mainly for ICMP traffic). ++
  • ++ ++
  • ++ A priority-100 flow sends the packets to connection tracker based ++ on a hint provided by the previous tables ++ (with a match for reg0[0] == 1) by using the ++ ct_next; action. ++
  • ++
-

Ingress Table 6: from-lport ACL hints

+

Ingress Table 8: from-lport ACL hints

@@ -5041,8 +9169,28 @@ index a9a3a9f4f..55b1c9655 100644 The possible hints are:

    -@@ -489,7 +556,7 @@ +@@ -444,6 +539,14 @@ +

    + The table contains the following flows: +

    ++
      ++
    • ++ A priority-65535 flow to advance to the next table if the logical ++ switch has no ACLs configured, otherwise a ++ priority-0 flow to advance to the next table. ++
    • ++
    ++ +
      +
    • + A priority-7 flow that matches on packets that initiate a new session. +@@ -484,12 +587,9 @@ + This flow sets reg0[10] and then advances to the next + table.
    • +-
    • +- A priority-0 flow to advance to the next table. +-
    -

    Ingress table 7: from-lport ACLs

    @@ -5050,7 +9198,7 @@ index a9a3a9f4f..55b1c9655 100644

    Logical flows in this table closely reproduce those in the -@@ -518,8 +585,9 @@ +@@ -518,8 +618,9 @@ flows with the tcp_reset { output <-> inport; next(pipeline=egress,table=5);} @@ -5062,7 +9210,77 @@ index a9a3a9f4f..55b1c9655 100644

  • Other ACLs translate to drop; for new or untracked -@@ -597,7 +665,7 @@ +@@ -531,9 +632,14 @@ +
+ +

+- This table also contains a priority 0 flow with action +- next;, so that ACLs allow packets by default. If the +- logical datapath has a stateful ACL or a load balancer with VIP ++ This table contains a priority-65535 flow to advance to the next table ++ if the logical switch has no ACLs configured, otherwise a ++ priority-0 flow to advance to the next table so that ACLs allow ++ packets by default. ++

++ ++

++ If the logical datapath has a stateful ACL or a load balancer with VIP + configured, the following flows will also be added: +

+ +@@ -547,7 +653,7 @@ + + +
  • +- A priority-65535 flow that allows any traffic in the reply ++ A priority-65532 flow that allows any traffic in the reply + direction for a connection that has been committed to the + connection tracker (i.e., established flows), as long as + the committed flow does not have ct_label.blocked set. +@@ -560,19 +666,19 @@ +
  • + +
  • +- A priority-65535 flow that allows any traffic that is considered ++ A priority-65532 flow that allows any traffic that is considered + related to a committed flow in the connection tracker (e.g., an + ICMP Port Unreachable from a non-listening UDP port), as long + as the committed flow does not have ct_label.blocked set. +
  • + +
  • +- A priority-65535 flow that drops all traffic marked by the ++ A priority-65532 flow that drops all traffic marked by the + connection tracker as invalid. +
  • + +
  • +- A priority-65535 flow that drops all traffic in the reply direction ++ A priority-65532 flow that drops all traffic in the reply direction + with ct_label.blocked set meaning that the connection + should no longer be allowed due to a policy change. Packets + in the request direction are skipped here to let a newly created +@@ -580,11 +686,18 @@ +
  • + +
  • +- A priority-65535 flow that allows IPv6 Neighbor solicitation, ++ A priority-65532 flow that allows IPv6 Neighbor solicitation, + Neighbor discover, Router solicitation, Router advertisement and MLD + packets. +
  • ++ + ++

    ++ If the logical datapath has any ACL or a load balancer with VIP ++ configured, the following flow will also be added: ++

    ++ ++
      +
    • + A priority 34000 logical flow is added for each logical switch datapath + with the match eth.dst = E to allow the service +@@ -597,7 +710,7 @@
    @@ -5071,7 +9289,7 @@ index a9a3a9f4f..55b1c9655 100644

    Logical flows in this table closely reproduce those in the -@@ -619,7 +687,7 @@ +@@ -619,7 +732,7 @@ @@ -5080,25 +9298,42 @@ index a9a3a9f4f..55b1c9655 100644

    Logical flows in this table closely reproduce those in the -@@ -641,7 +709,7 @@ +@@ -641,33 +754,7 @@ -

    Ingress Table 10: LB

    -+

    Ingress Table 12: LB

    - -

    - It contains a priority-0 flow that simply moves traffic to the next -@@ -667,7 +735,7 @@ - connection.) -

    - +- +-

    +- It contains a priority-0 flow that simply moves traffic to the next +- table. +-

    +- +-

    +- A priority-65535 flow with the match +- inport == I for all logical switch +- datapaths to move traffic to the next table. Where I +- is the peer of a logical router port. This flow is added to +- skip the connection tracking of packets which enter from +- logical router datapath to logical switch datapath. +-

    +- +-

    +- For established connections a priority 65534 flow matches on +- ct.est && !ct.rel && !ct.new && +- !ct.inv and sets an action reg0[2] = 1; next; to act +- as a hint for table Stateful to send packets through +- connection tracker to NAT the packets. (The packet will automatically +- get DNATed to the same IP address as the first packet in that +- connection.) +-

    +- -

    Ingress Table 11: Stateful

    -+

    Ingress Table 13: Stateful

    ++

    Ingress Table 12: Stateful

    • -@@ -687,7 +755,11 @@ +@@ -687,7 +774,11 @@ of VIP. If health check is enabled, then args will only contain those endpoints whose service monitor status entry in OVN_Southbound db is either online or @@ -5111,7 +9346,7 @@ index a9a3a9f4f..55b1c9655 100644
    • For all the configured load balancing rules for a switch in -@@ -699,40 +771,54 @@ +@@ -699,40 +790,43 @@ VIP. The action on this flow is ct_lb(args), where args contains comma separated IP addresses of the same address family as VIP. @@ -5136,26 +9371,18 @@ index a9a3a9f4f..55b1c9655 100644 ct_commit; next; action based on a hint provided by the previous tables (with a match for reg0[1] == 1).
    • -
    • +-
    • - A priority-100 flow sends the packets to connection tracker using -+ Priority-100 flows that send the packets to connection tracker using - ct_lb; as the action based on a hint provided by the +- ct_lb; as the action based on a hint provided by the - previous tables (with a match for reg0[2] == 1). -+ previous tables (with a match for reg0[2] == 1 and -+ on supported load balancer protocols and address families). -+ For IPv4 traffic the flows also load the original destination -+ IP and transport port in registers reg1 and -+ reg2. For IPv6 traffic the flows also load the original -+ destination IP and transport port in registers xxreg1 and -+ reg2. -
    • +-
    • A priority-0 flow that simply moves traffic to the next table.
    -

    Ingress Table 12: Pre-Hairpin

    -+

    Ingress Table 14: Pre-Hairpin

    ++

    Ingress Table 13: Pre-Hairpin

    • If the logical switch has load balancer(s) configured, then a @@ -5182,12 +9409,12 @@ index a9a3a9f4f..55b1c9655 100644
    • -@@ -740,21 +826,30 @@ +@@ -740,21 +834,30 @@
    -

    Ingress Table 13: Nat-Hairpin

    -+

    Ingress Table 15: Nat-Hairpin

    ++

    Ingress Table 14: Nat-Hairpin

    • If the logical switch has load balancer(s) configured, then a @@ -5219,79 +9446,79 @@ index a9a3a9f4f..55b1c9655 100644 of hairpinned traffic (i.e., destination IP is VIP, source IP is the backend IP and source L4 port is backend port for L4 load balancers) and executes ct_snat and advances the -@@ -766,7 +861,7 @@ +@@ -766,7 +869,7 @@
    -

    Ingress Table 14: Hairpin

    -+

    Ingress Table 16: Hairpin

    ++

    Ingress Table 15: Hairpin

    • A priority-1 flow that hairpins traffic matched by non-default -@@ -779,7 +874,7 @@ +@@ -779,7 +882,7 @@
    -

    Ingress Table 15: ARP/ND responder

    -+

    Ingress Table 17: ARP/ND responder

    ++

    Ingress Table 16: ARP/ND responder

    This table implements ARP/ND responder in a logical switch for known -@@ -1069,7 +1164,7 @@ output; +@@ -1069,7 +1172,7 @@ output; -

    Ingress Table 16: DHCP option processing

    -+

    Ingress Table 18: DHCP option processing

    ++

    Ingress Table 17: DHCP option processing

    This table adds the DHCPv4 options to a DHCPv4 packet from the -@@ -1130,7 +1225,7 @@ next; +@@ -1130,7 +1233,7 @@ next; -

    Ingress Table 17: DHCP responses

    -+

    Ingress Table 19: DHCP responses

    ++

    Ingress Table 18: DHCP responses

    This table implements DHCP responder for the DHCP replies generated by -@@ -1211,7 +1306,7 @@ output; +@@ -1211,7 +1314,7 @@ output; -

    Ingress Table 18 DNS Lookup

    -+

    Ingress Table 20 DNS Lookup

    ++

    Ingress Table 19 DNS Lookup

    This table looks up and resolves the DNS names to the corresponding -@@ -1240,7 +1335,7 @@ reg0[4] = dns_lookup(); next; +@@ -1240,7 +1343,7 @@ reg0[4] = dns_lookup(); next; -

    Ingress Table 19 DNS Responses

    -+

    Ingress Table 21 DNS Responses

    ++

    Ingress Table 20 DNS Responses

    This table implements DNS responder for the DNS replies generated by -@@ -1275,7 +1370,7 @@ output; +@@ -1275,7 +1378,7 @@ output; -

    Ingress table 20 External ports

    -+

    Ingress table 22 External ports

    ++

    Ingress table 21 External ports

    Traffic from the external logical ports enter the ingress -@@ -1318,7 +1413,7 @@ output; +@@ -1318,7 +1421,7 @@ output; -

    Ingress Table 21 Destination Lookup

    -+

    Ingress Table 23 Destination Lookup

    ++

    Ingress Table 22 Destination Lookup

    This table implements switching behavior. It contains these logical -@@ -1481,12 +1576,58 @@ output; +@@ -1481,12 +1584,58 @@ output;

  • @@ -5356,7 +9583,110 @@ index a9a3a9f4f..55b1c9655 100644
  • -@@ -1926,6 +2067,27 @@ next; +@@ -1498,9 +1647,11 @@ output; + Moreover it contains a priority-110 flow to move IPv6 Neighbor Discovery + traffic to the next table. If any load balancing rules exist for the + datapath, a priority-100 flow is added with a match of ip +- and action of reg0[0] = 1; next; to act as a hint for ++ and action of reg0[2] = 1; next; to act as a hint for + table Pre-stateful to send IP packets to the connection +- tracker for packet de-fragmentation. ++ tracker for packet de-fragmentation and possibly DNAT the destination ++ VIP to one of the selected backend for already commited load balanced ++ traffic. +

    + +

    +@@ -1542,20 +1693,39 @@ output; +

    Egress Table 2: Pre-stateful

    + +

    +- This is similar to ingress table Pre-stateful. ++ This is similar to ingress table Pre-stateful. This table ++ adds the below 3 logical flows. +

    + +-

    Egress Table 3: LB

    +-

    +- This is similar to ingress table LB. +-

    ++
      ++
    • ++ A Priority-120 flow that send the packets to connection tracker using ++ ct_lb; as the action so that the already established ++ traffic gets unDNATted from the backend IP to the load balancer VIP ++ based on a hint provided by the previous tables with a match ++ for reg0[2] == 1. If the packet was not DNATted earlier, ++ then ct_lb functions like ct_next. ++
    • ++ ++
    • ++ A priority-100 flow sends the packets to connection tracker based ++ on a hint provided by the previous tables ++ (with a match for reg0[0] == 1) by using the ++ ct_next; action. ++
    • + +-

      Egress Table 4: from-lport ACL hints

      ++
    • ++ A priority-0 flow that matches all packets to advance to the next ++ table. ++
    • ++
    ++ ++

    Egress Table 3: from-lport ACL hints

    +

    + This is similar to ingress table ACL hints. +

    + +-

    Egress Table 5: to-lport ACLs

    ++

    Egress Table 4: to-lport ACLs

    + +

    + This is similar to ingress table ACLs except for +@@ -1592,28 +1762,28 @@ output; + + + +-

    Egress Table 6: to-lport QoS Marking

    ++

    Egress Table 5: to-lport QoS Marking

    + +

    + This is similar to ingress table QoS marking except + they apply to to-lport QoS rules. +

    + +-

    Egress Table 7: to-lport QoS Meter

    ++

    Egress Table 6: to-lport QoS Meter

    + +

    + This is similar to ingress table QoS meter except + they apply to to-lport QoS rules. +

    + +-

    Egress Table 8: Stateful

    ++

    Egress Table 7: Stateful

    + +

    + This is similar to ingress table Stateful except that + there are no rules added for load balancing new connections. +

    + +-

    Egress Table 9: Egress Port Security - IP

    ++

    Egress Table 8: Egress Port Security - IP

    + +

    + This is similar to the port security logic in table +@@ -1623,7 +1793,7 @@ output; + ip4.src and ip6.src +

    + +-

    Egress Table 10: Egress Port Security - L2

    ++

    Egress Table 9: Egress Port Security - L2

    + +

    + This is similar to the ingress port security logic in ingress table +@@ -1926,6 +2096,27 @@ next;

    @@ -5384,7 +9714,17 @@ index a9a3a9f4f..55b1c9655 100644
  • L3 admission control: A priority-100 flow drops packets that match -@@ -2449,6 +2611,16 @@ icmp6 { +@@ -2121,8 +2312,7 @@ eth.src = xreg0[0..47]; + arp.op = 2; /* ARP reply. */ + arp.tha = arp.sha; + arp.sha = xreg0[0..47]; +-arp.tpa = arp.spa; +-arp.spa = A; ++arp.tpa <-> arp.spa; + outport = inport; + flags.loopback = 1; + output; +@@ -2449,6 +2639,16 @@ icmp6 { with an action ct_snat; .

    @@ -5401,10 +9741,47 @@ index a9a3a9f4f..55b1c9655 100644

    If the Gateway router has been configured to force SNAT any previously load-balanced packets to B, a priority-100 flow -@@ -2592,6 +2764,15 @@ icmp6 { +@@ -2548,7 +2748,11 @@ icmp6 { + (and optional port numbers) to load balance to. If the router is + configured to force SNAT any load-balanced packets, the above action + will be replaced by flags.force_snat_for_lb = 1; +- ct_lb(args);. If health check is enabled, then ++ ct_lb(args);. ++ If the load balancing rule is configured with skip_snat ++ set to true, the above action will be replaced by ++ flags.skip_snat_for_lb = 1; ct_lb(args);. ++ If health check is enabled, then + args will only contain those endpoints whose service + monitor status entry in OVN_Southbound db is + either online or empty. +@@ -2565,6 +2769,9 @@ icmp6 { + with an action of ct_dnat;. If the router is + configured to force SNAT any load-balanced packets, the above action + will be replaced by flags.force_snat_for_lb = 1; ct_dnat;. ++ If the load balancing rule is configured with skip_snat ++ set to true, the above action will be replaced by ++ flags.skip_snat_for_lb = 1; ct_dnat;. +

  • + +
  • +@@ -2579,6 +2786,9 @@ icmp6 { + to force SNAT any load-balanced packets, the above action will be + replaced by flags.force_snat_for_lb = 1; + ct_lb(args);. ++ If the load balancing rule is configured with skip_snat ++ set to true, the above action will be replaced by ++ flags.skip_snat_for_lb = 1; ct_lb(args);. +
  • + +
  • +@@ -2591,6 +2801,18 @@ icmp6 { + If the router is configured to force SNAT any load-balanced packets, the above action will be replaced by flags.force_snat_for_lb = 1; ct_dnat;. -
  • ++ If the load balancing rule is configured with skip_snat ++ set to true, the above action will be replaced by ++ flags.skip_snat_for_lb = 1; ct_dnat;. ++ + +
  • + If the load balancer is created with --reject option and @@ -5413,11 +9790,10 @@ index a9a3a9f4f..55b1c9655 100644 + whenever an incoming packet is received for this load-balancer. + Please note using --reject option will disable + empty_lb SB controller event for this load balancer. -+
  • + -

    Ingress Table 6: DNAT on Gateway Routers

    -@@ -3022,14 +3203,36 @@ outport = P; +@@ -3022,14 +3244,36 @@ outport = P;
  • @@ -5456,7 +9832,7 @@ index a9a3a9f4f..55b1c9655 100644 flags.loopback = 1; next; -@@ -3053,7 +3256,51 @@ next; +@@ -3053,7 +3297,51 @@ next;

  • @@ -5509,7 +9885,7 @@ index a9a3a9f4f..55b1c9655 100644

    Any packet that reaches this table is an IP packet whose next-hop -@@ -3239,7 +3486,7 @@ next; +@@ -3239,7 +3527,7 @@ next; @@ -5518,7 +9894,7 @@ index a9a3a9f4f..55b1c9655 100644

    For distributed logical routers with distributed gateway port configured -@@ -3269,7 +3516,7 @@ REGBIT_PKT_LARGER = check_pkt_larger(L); next; +@@ -3269,7 +3557,7 @@ REGBIT_PKT_LARGER = check_pkt_larger(L); next; and advances to the next table.

    @@ -5527,7 +9903,7 @@ index a9a3a9f4f..55b1c9655 100644

    For distributed logical routers with distributed gateway port configured -@@ -3330,7 +3577,7 @@ icmp6 { +@@ -3330,7 +3618,7 @@ icmp6 { and advances to the next table.

    @@ -5536,7 +9912,7 @@ index a9a3a9f4f..55b1c9655 100644

    For distributed logical routers where one of the logical router -@@ -3370,7 +3617,7 @@ icmp6 { +@@ -3370,7 +3658,7 @@ icmp6 { @@ -5545,7 +9921,7 @@ index a9a3a9f4f..55b1c9655 100644

    In the common case where the Ethernet destination has been resolved, this -@@ -3546,6 +3793,32 @@ nd_ns { +@@ -3546,6 +3834,41 @@ nd_ns { flags.force_snat_for_dnat == 1 && ip with an action ct_snat(B);.

    @@ -5553,6 +9929,15 @@ index a9a3a9f4f..55b1c9655 100644 + +
  • +

    ++ If a load balancer configured to skip snat has been applied to ++ the Gateway router pipeline, a priority-120 flow matches ++ flags.skip_snat_for_lb == 1 && ip with an ++ action next;. ++

    ++
  • ++ ++
  • ++

    + If the Gateway router in the OVN Northbound database has been + configured to force SNAT a packet (that has been previously + load-balanced) using router IP (i.e If the Gateway router in the OVN Northbound database has been configured to force SNAT a packet (that has been previously -@@ -3553,6 +3826,9 @@ nd_ns { +@@ -3553,6 +3876,9 @@ nd_ns { flags.force_snat_for_lb == 1 && ip with an action ct_snat(B);.

    @@ -5588,7 +9973,7 @@ index a9a3a9f4f..55b1c9655 100644

    For each configuration in the OVN Northbound database, that asks to change the source IP address of a packet from an IP address of -@@ -3566,14 +3842,18 @@ nd_ns { +@@ -3566,14 +3892,18 @@ nd_ns { options, then the action would be ip4/6.src= (B).

    @@ -5607,7 +9992,7 @@ index a9a3a9f4f..55b1c9655 100644

    If the NAT rule has exempted_ext_ips set, then there is an additional flow configured at the priority + 1 of -@@ -3582,7 +3862,9 @@ nd_ns { +@@ -3582,7 +3912,9 @@ nd_ns { . This flow is used to bypass the ct_snat action for a packet which is destinted to exempted_ext_ips.

    @@ -5618,18 +10003,35 @@ index a9a3a9f4f..55b1c9655 100644 A priority-0 logical flow with match 1 has actions next;. diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index 5a3227568..c81e3220c 100644 +index 5a3227568..e78a71728 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c -@@ -38,6 +38,7 @@ +@@ -37,10 +37,13 @@ + #include "lib/ovn-sb-idl.h" #include "lib/ovn-util.h" #include "lib/lb.h" ++#include "memory.h" #include "ovn/actions.h" +#include "ovn/features.h" #include "ovn/logical-fields.h" #include "packets.h" #include "openvswitch/poll-loop.h" -@@ -141,25 +142,28 @@ enum ovn_stage { ++#include "simap.h" + #include "smap.h" + #include "sset.h" + #include "svec.h" +@@ -100,6 +103,10 @@ static bool check_lsp_is_up; + static char svc_monitor_mac[ETH_ADDR_STRLEN + 1]; + static struct eth_addr svc_monitor_mac_ea; + ++/* If this option is 'true' northd will make use of ct.inv match fields. ++ * Otherwise, it will avoid using it. The default is true. */ ++static bool use_ct_inv_match = true; ++ + /* Default probe interval for NB and SB DB connections. */ + #define DEFAULT_PROBE_INTERVAL_MSEC 5000 + static int northd_probe_interval_nb = 0; +@@ -141,38 +148,39 @@ enum ovn_stage { PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \ PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \ PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \ @@ -5661,23 +10063,42 @@ index 5a3227568..c81e3220c 100644 + PIPELINE_STAGE(SWITCH, IN, ACL, 9, "ls_in_acl") \ + PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 10, "ls_in_qos_mark") \ + PIPELINE_STAGE(SWITCH, IN, QOS_METER, 11, "ls_in_qos_meter") \ -+ PIPELINE_STAGE(SWITCH, IN, LB, 12, "ls_in_lb") \ -+ PIPELINE_STAGE(SWITCH, IN, STATEFUL, 13, "ls_in_stateful") \ -+ PIPELINE_STAGE(SWITCH, IN, PRE_HAIRPIN, 14, "ls_in_pre_hairpin") \ -+ PIPELINE_STAGE(SWITCH, IN, NAT_HAIRPIN, 15, "ls_in_nat_hairpin") \ -+ PIPELINE_STAGE(SWITCH, IN, HAIRPIN, 16, "ls_in_hairpin") \ -+ PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 17, "ls_in_arp_rsp") \ -+ PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 18, "ls_in_dhcp_options") \ -+ PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 19, "ls_in_dhcp_response") \ -+ PIPELINE_STAGE(SWITCH, IN, DNS_LOOKUP, 20, "ls_in_dns_lookup") \ -+ PIPELINE_STAGE(SWITCH, IN, DNS_RESPONSE, 21, "ls_in_dns_response") \ -+ PIPELINE_STAGE(SWITCH, IN, EXTERNAL_PORT, 22, "ls_in_external_port") \ -+ PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 23, "ls_in_l2_lkup") \ -+ PIPELINE_STAGE(SWITCH, IN, L2_UNKNOWN, 24, "ls_in_l2_unknown") \ ++ PIPELINE_STAGE(SWITCH, IN, STATEFUL, 12, "ls_in_stateful") \ ++ PIPELINE_STAGE(SWITCH, IN, PRE_HAIRPIN, 13, "ls_in_pre_hairpin") \ ++ PIPELINE_STAGE(SWITCH, IN, NAT_HAIRPIN, 14, "ls_in_nat_hairpin") \ ++ PIPELINE_STAGE(SWITCH, IN, HAIRPIN, 15, "ls_in_hairpin") \ ++ PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 16, "ls_in_arp_rsp") \ ++ PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 17, "ls_in_dhcp_options") \ ++ PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 18, "ls_in_dhcp_response") \ ++ PIPELINE_STAGE(SWITCH, IN, DNS_LOOKUP, 19, "ls_in_dns_lookup") \ ++ PIPELINE_STAGE(SWITCH, IN, DNS_RESPONSE, 20, "ls_in_dns_response") \ ++ PIPELINE_STAGE(SWITCH, IN, EXTERNAL_PORT, 21, "ls_in_external_port") \ ++ PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 22, "ls_in_l2_lkup") \ ++ PIPELINE_STAGE(SWITCH, IN, L2_UNKNOWN, 23, "ls_in_l2_unknown") \ \ /* Logical switch egress stages. */ \ PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \ -@@ -188,11 +192,12 @@ enum ovn_stage { + PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \ + PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \ +- PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \ +- PIPELINE_STAGE(SWITCH, OUT, ACL_HINT, 4, "ls_out_acl_hint") \ +- PIPELINE_STAGE(SWITCH, OUT, ACL, 5, "ls_out_acl") \ +- PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 6, "ls_out_qos_mark") \ +- PIPELINE_STAGE(SWITCH, OUT, QOS_METER, 7, "ls_out_qos_meter") \ +- PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 8, "ls_out_stateful") \ +- PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 9, "ls_out_port_sec_ip") \ +- PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 10, "ls_out_port_sec_l2") \ ++ PIPELINE_STAGE(SWITCH, OUT, ACL_HINT, 3, "ls_out_acl_hint") \ ++ PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \ ++ PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 5, "ls_out_qos_mark") \ ++ PIPELINE_STAGE(SWITCH, OUT, QOS_METER, 6, "ls_out_qos_meter") \ ++ PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 7, "ls_out_stateful") \ ++ PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 8, "ls_out_port_sec_ip") \ ++ PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 9, "ls_out_port_sec_l2") \ + \ + /* Logical router ingress stages. */ \ + PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \ +@@ -188,11 +196,12 @@ enum ovn_stage { PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 10, "lr_in_ip_routing") \ PIPELINE_STAGE(ROUTER, IN, IP_ROUTING_ECMP, 11, "lr_in_ip_routing_ecmp") \ PIPELINE_STAGE(ROUTER, IN, POLICY, 12, "lr_in_policy") \ @@ -5695,7 +10116,7 @@ index 5a3227568..c81e3220c 100644 \ /* Logical router egress stages. */ \ PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \ -@@ -225,6 +230,12 @@ enum ovn_stage { +@@ -225,6 +234,12 @@ enum ovn_stage { #define REGBIT_ACL_HINT_ALLOW "reg0[8]" #define REGBIT_ACL_HINT_DROP "reg0[9]" #define REGBIT_ACL_HINT_BLOCK "reg0[10]" @@ -5708,7 +10129,7 @@ index 5a3227568..c81e3220c 100644 /* Register definitions for switches and routers. */ -@@ -259,12 +270,29 @@ enum ovn_stage { +@@ -259,12 +274,29 @@ enum ovn_stage { * OVS register usage: * * Logical Switch pipeline: @@ -5744,16 +10165,18 @@ index 5a3227568..c81e3220c 100644 * * Logical Router pipeline: * +-----+--------------------------+---+-----------------+---+---------------+ -@@ -608,6 +636,8 @@ struct ovn_datapath { +@@ -608,7 +640,10 @@ struct ovn_datapath { struct hmap port_tnlids; uint32_t port_key_hint; + bool has_stateful_acl; + bool has_lb_vip; bool has_unknown; ++ bool has_acls; /* IPAM data. */ -@@ -633,6 +663,7 @@ struct ovn_datapath { + struct ipam_info ipam_info; +@@ -633,6 +668,7 @@ struct ovn_datapath { struct lport_addresses dnat_force_snat_addrs; struct lport_addresses lb_force_snat_addrs; @@ -5761,17 +10184,7 @@ index 5a3227568..c81e3220c 100644 struct ovn_port **localnet_ports; size_t n_localnet_ports; -@@ -646,6 +677,9 @@ struct ovn_datapath { - struct hmap nb_pgs; - }; - -+static bool ls_has_stateful_acl(struct ovn_datapath *od); -+static bool ls_has_lb_vip(struct ovn_datapath *od); -+ - /* Contains a NAT entry with the external addresses pre-parsed. */ - struct ovn_nat { - const struct nbrec_nat *nb; -@@ -723,14 +757,28 @@ init_nat_entries(struct ovn_datapath *od) +@@ -723,14 +759,28 @@ init_nat_entries(struct ovn_datapath *od) } } @@ -5808,7 +10221,7 @@ index 5a3227568..c81e3220c 100644 } } -@@ -872,6 +920,20 @@ ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid) +@@ -872,6 +922,20 @@ ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid) return NULL; } @@ -5829,7 +10242,7 @@ index 5a3227568..c81e3220c 100644 static bool ovn_datapath_is_stale(const struct ovn_datapath *od) { -@@ -1472,6 +1534,8 @@ struct ovn_port { +@@ -1472,6 +1536,8 @@ struct ovn_port { bool has_unknown; /* If the addresses have 'unknown' defined. */ @@ -5838,7 +10251,7 @@ index 5a3227568..c81e3220c 100644 /* The port's peer: * * - A switch port S of type "router" has a router port R as a peer, -@@ -1543,17 +1607,38 @@ ovn_port_destroy(struct hmap *ports, struct ovn_port *port) +@@ -1543,17 +1609,38 @@ ovn_port_destroy(struct hmap *ports, struct ovn_port *port) } } @@ -5880,7 +10293,7 @@ index 5a3227568..c81e3220c 100644 } /* Returns true if the logical switch port 'enabled' column is empty or -@@ -2336,15 +2421,13 @@ join_logical_ports(struct northd_context *ctx, +@@ -2336,15 +2423,13 @@ join_logical_ports(struct northd_context *ctx, for (size_t i = 0; i < od->nbs->n_ports; i++) { const struct nbrec_logical_switch_port *nbsp = od->nbs->ports[i]; @@ -5903,7 +10316,7 @@ index 5a3227568..c81e3220c 100644 ovn_port_set_nb(op, nbsp, NULL); ovs_list_remove(&op->list); -@@ -2435,16 +2518,15 @@ join_logical_ports(struct northd_context *ctx, +@@ -2435,16 +2520,15 @@ join_logical_ports(struct northd_context *ctx, continue; } @@ -5929,7 +10342,7 @@ index 5a3227568..c81e3220c 100644 ovn_port_set_nb(op, NULL, nbrp); ovs_list_remove(&op->list); ovs_list_push_back(both, &op->list); -@@ -2487,7 +2569,7 @@ join_logical_ports(struct northd_context *ctx, +@@ -2487,7 +2571,7 @@ join_logical_ports(struct northd_context *ctx, char *redirect_name = ovn_chassis_redirect_name(nbrp->name); struct ovn_port *crp = ovn_port_find(ports, redirect_name); @@ -5938,7 +10351,7 @@ index 5a3227568..c81e3220c 100644 crp->derived = true; ovn_port_set_nb(crp, NULL, nbrp); ovs_list_remove(&crp->list); -@@ -3179,6 +3261,12 @@ ovn_port_update_sbrec(struct northd_context *ctx, +@@ -3179,6 +3263,12 @@ ovn_port_update_sbrec(struct northd_context *ctx, } else { sbrec_port_binding_set_ha_chassis_group(op->sb, NULL); } @@ -5951,7 +10364,7 @@ index 5a3227568..c81e3220c 100644 } } else { const char *chassis = NULL; -@@ -3308,6 +3396,14 @@ ovn_port_update_sbrec(struct northd_context *ctx, +@@ -3308,6 +3398,14 @@ ovn_port_update_sbrec(struct northd_context *ctx, if (op->tunnel_key != op->sb->tunnel_key) { sbrec_port_binding_set_tunnel_key(op->sb, op->tunnel_key); } @@ -5966,7 +10379,7 @@ index 5a3227568..c81e3220c 100644 } /* Remove mac_binding entries that refer to logical_ports which are -@@ -3340,6 +3436,26 @@ cleanup_sb_ha_chassis_groups(struct northd_context *ctx, +@@ -3340,6 +3438,26 @@ cleanup_sb_ha_chassis_groups(struct northd_context *ctx, } } @@ -5993,7 +10406,7 @@ index 5a3227568..c81e3220c 100644 struct service_monitor_info { struct hmap_node hmap_node; const struct sbrec_service_monitor *sbrec_mon; -@@ -3436,12 +3552,12 @@ ovn_lb_svc_create(struct northd_context *ctx, struct ovn_northd_lb *lb, +@@ -3436,12 +3554,12 @@ ovn_lb_svc_create(struct northd_context *ctx, struct ovn_northd_lb *lb, } static @@ -6011,7 +10424,7 @@ index 5a3227568..c81e3220c 100644 if (lb_vip_nb->lb_health_check) { ds_put_cstr(action, "ct_lb(backends="); -@@ -3463,18 +3579,30 @@ void build_lb_vip_ct_lb_actions(struct ovn_lb_vip *lb_vip, +@@ -3463,18 +3581,30 @@ void build_lb_vip_ct_lb_actions(struct ovn_lb_vip *lb_vip, } if (!n_active_backends) { @@ -6046,7 +10459,7 @@ index 5a3227568..c81e3220c 100644 ds_chomp(action, ';'); ds_chomp(action, ')'); ds_put_format(action, "; hash_fields=\"%s\");", selection_fields); -@@ -3547,10 +3675,18 @@ build_ovn_lbs(struct northd_context *ctx, struct hmap *datapaths, +@@ -3547,10 +3677,18 @@ build_ovn_lbs(struct northd_context *ctx, struct hmap *datapaths, /* Create SB Load balancer records if not present and sync * the SB load balancer columns. */ HMAP_FOR_EACH (lb, hmap_node, lbs) { @@ -6065,7 +10478,7 @@ index 5a3227568..c81e3220c 100644 if (!lb->slb) { sbrec_lb = sbrec_load_balancer_insert(ctx->ovnsb_txn); lb->slb = sbrec_lb; -@@ -3564,9 +3700,11 @@ build_ovn_lbs(struct northd_context *ctx, struct hmap *datapaths, +@@ -3564,9 +3702,11 @@ build_ovn_lbs(struct northd_context *ctx, struct hmap *datapaths, sbrec_load_balancer_set_name(lb->slb, lb->nlb->name); sbrec_load_balancer_set_vips(lb->slb, &lb->nlb->vips); sbrec_load_balancer_set_protocol(lb->slb, lb->nlb->protocol); @@ -6077,16 +10490,58 @@ index 5a3227568..c81e3220c 100644 } /* Set the list of associated load balanacers to a logical switch -@@ -4822,7 +4960,7 @@ ovn_ls_port_group_destroy(struct hmap *nb_pgs) +@@ -4821,27 +4961,38 @@ ovn_ls_port_group_destroy(struct hmap *nb_pgs) + hmap_destroy(nb_pgs); } - static bool +-static bool -has_stateful_acl(struct ovn_datapath *od) -+ls_has_stateful_acl(struct ovn_datapath *od) ++static void ++ls_get_acl_flags(struct ovn_datapath *od) { - for (size_t i = 0; i < od->nbs->n_acls; i++) { - struct nbrec_acl *acl = od->nbs->acls[i]; -@@ -4905,50 +5043,82 @@ build_lswitch_input_port_sec_od( +- for (size_t i = 0; i < od->nbs->n_acls; i++) { +- struct nbrec_acl *acl = od->nbs->acls[i]; +- if (!strcmp(acl->action, "allow-related")) { +- return true; ++ od->has_acls = false; ++ od->has_stateful_acl = false; ++ ++ if (od->nbs->n_acls) { ++ od->has_acls = true; ++ ++ for (size_t i = 0; i < od->nbs->n_acls; i++) { ++ struct nbrec_acl *acl = od->nbs->acls[i]; ++ if (!strcmp(acl->action, "allow-related")) { ++ od->has_stateful_acl = true; ++ return; ++ } + } + } + + struct ovn_ls_port_group *ls_pg; + HMAP_FOR_EACH (ls_pg, key_node, &od->nb_pgs) { +- for (size_t i = 0; i < ls_pg->nb_pg->n_acls; i++) { +- struct nbrec_acl *acl = ls_pg->nb_pg->acls[i]; +- if (!strcmp(acl->action, "allow-related")) { +- return true; ++ if (ls_pg->nb_pg->n_acls) { ++ od->has_acls = true; ++ ++ for (size_t i = 0; i < ls_pg->nb_pg->n_acls; i++) { ++ struct nbrec_acl *acl = ls_pg->nb_pg->acls[i]; ++ if (!strcmp(acl->action, "allow-related")) { ++ od->has_stateful_acl = true; ++ return; ++ } + } + } + } +- +- return false; + } + + /* Logical switch ingress table 0: Ingress port security - L2 +@@ -4905,50 +5056,82 @@ build_lswitch_input_port_sec_od( } static void @@ -6196,7 +10651,7 @@ index 5a3227568..c81e3220c 100644 &op->nbsp->header_); } -@@ -4956,23 +5126,20 @@ build_lswitch_output_port_sec(struct hmap *ports, struct hmap *datapaths, +@@ -4956,23 +5139,20 @@ build_lswitch_output_port_sec(struct hmap *ports, struct hmap *datapaths, build_port_security_ip(P_OUT, op, lflows, &op->nbsp->header_); } } @@ -6229,7 +10684,7 @@ index 5a3227568..c81e3220c 100644 } static void -@@ -5008,8 +5175,6 @@ skip_port_from_conntrack(struct ovn_datapath *od, struct ovn_port *op, +@@ -5008,8 +5188,6 @@ skip_port_from_conntrack(struct ovn_datapath *od, struct ovn_port *op, static void build_pre_acls(struct ovn_datapath *od, struct hmap *lflows) { @@ -6238,7 +10693,7 @@ index 5a3227568..c81e3220c 100644 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are * allowed by default. */ ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;"); -@@ -5024,7 +5189,7 @@ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows) +@@ -5024,7 +5202,7 @@ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows) /* If there are any stateful ACL rules in this datapath, we must * send all IP packets through the conntrack action, which handles * defragmentation, in order to match L4 headers. */ @@ -6247,7 +10702,7 @@ index 5a3227568..c81e3220c 100644 for (size_t i = 0; i < od->n_router_ports; i++) { skip_port_from_conntrack(od, od->router_ports[i], S_SWITCH_IN_PRE_ACL, S_SWITCH_OUT_PRE_ACL, -@@ -5084,7 +5249,10 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows, +@@ -5084,7 +5262,10 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows, struct nbrec_load_balancer *lb, int pl, struct shash *meter_groups) { @@ -6259,7 +10714,7 @@ index 5a3227568..c81e3220c 100644 return; } -@@ -5124,7 +5292,7 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows, +@@ -5124,7 +5305,7 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows, } static bool @@ -6268,12 +10723,86 @@ index 5a3227568..c81e3220c 100644 { for (int i = 0; i < od->nbs->n_load_balancer; i++) { struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i]; -@@ -5267,6 +5435,13 @@ build_acl_hints(struct ovn_datapath *od, struct hmap *lflows) +@@ -5190,8 +5371,8 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, + vip_configured = (vip_configured || lb->n_vips); + } + +- /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send +- * packet to conntrack for defragmentation. ++ /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send ++ * packet to conntrack for defragmentation and possibly for unNATting. + * + * Send all the packets to conntrack in the ingress pipeline if the + * logical switch has a load balancer with VIP configured. Earlier +@@ -5221,9 +5402,9 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, + */ + if (vip_configured) { + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, +- 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;"); ++ 100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;"); + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, +- 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;"); ++ 100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;"); + } + } + +@@ -5235,10 +5416,46 @@ build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows) + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;"); + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;"); + ++ const char *lb_protocols[] = {"tcp", "udp", "sctp"}; ++ struct ds actions = DS_EMPTY_INITIALIZER; ++ struct ds match = DS_EMPTY_INITIALIZER; ++ ++ for (size_t i = 0; i < ARRAY_SIZE(lb_protocols); i++) { ++ ds_clear(&match); ++ ds_clear(&actions); ++ ds_put_format(&match, REGBIT_CONNTRACK_NAT" == 1 && ip4 && %s", ++ lb_protocols[i]); ++ ds_put_format(&actions, REG_ORIG_DIP_IPV4 " = ip4.dst; " ++ REG_ORIG_TP_DPORT " = %s.dst; ct_lb;", ++ lb_protocols[i]); ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 120, ++ ds_cstr(&match), ds_cstr(&actions)); ++ ++ ds_clear(&match); ++ ds_clear(&actions); ++ ds_put_format(&match, REGBIT_CONNTRACK_NAT" == 1 && ip6 && %s", ++ lb_protocols[i]); ++ ds_put_format(&actions, REG_ORIG_DIP_IPV6 " = ip6.dst; " ++ REG_ORIG_TP_DPORT " = %s.dst; ct_lb;", ++ lb_protocols[i]); ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 120, ++ ds_cstr(&match), ds_cstr(&actions)); ++ } ++ ++ ds_destroy(&actions); ++ ds_destroy(&match); ++ ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 110, ++ REGBIT_CONNTRACK_NAT" == 1", "ct_lb;"); ++ ++ ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 110, ++ REGBIT_CONNTRACK_NAT" == 1", "ct_lb;"); ++ + /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be + * sent to conntrack for tracking and defragmentation. */ + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100, + REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;"); ++ + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100, + REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;"); + } +@@ -5267,6 +5484,17 @@ build_acl_hints(struct ovn_datapath *od, struct hmap *lflows) for (size_t i = 0; i < ARRAY_SIZE(stages); i++) { enum ovn_stage stage = stages[i]; + /* In any case, advance to the next stage. */ -+ ovn_lflow_add(lflows, od, stage, 0, "1", "next;"); ++ if (!od->has_acls && !od->has_lb_vip) { ++ ovn_lflow_add(lflows, od, stage, UINT16_MAX, "1", "next;"); ++ } else { ++ ovn_lflow_add(lflows, od, stage, 0, "1", "next;"); ++ } + + if (!od->has_stateful_acl && !od->has_lb_vip) { + continue; @@ -6282,7 +10811,7 @@ index 5a3227568..c81e3220c 100644 /* New, not already established connections, may hit either allow * or drop ACLs. For allow ACLs, the connection must also be committed * to conntrack so we set REGBIT_ACL_HINT_ALLOW_NEW. -@@ -5327,9 +5502,6 @@ build_acl_hints(struct ovn_datapath *od, struct hmap *lflows) +@@ -5327,9 +5555,6 @@ build_acl_hints(struct ovn_datapath *od, struct hmap *lflows) ovn_lflow_add(lflows, od, stage, 1, "ct.est && ct_label.blocked == 0", REGBIT_ACL_HINT_BLOCK " = 1; " "next;"); @@ -6290,27 +10819,190 @@ index 5a3227568..c81e3220c 100644 - /* In any case, advance to the next stage. */ - ovn_lflow_add(lflows, od, stage, 0, "1", "next;"); } - } + } + +@@ -5661,13 +5886,22 @@ static void + build_acls(struct ovn_datapath *od, struct hmap *lflows, + struct hmap *port_groups, const struct shash *meter_groups) + { +- bool has_stateful = (has_stateful_acl(od) || has_lb_vip(od)); ++ bool has_stateful = od->has_stateful_acl || od->has_lb_vip; + + /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by +- * default. A related rule at priority 1 is added below if there ++ * default. If the logical switch has no ACLs or no load balancers, ++ * then add 65535-priority flow to advance the packet to next ++ * stage. ++ * ++ * A related rule at priority 1 is added below if there + * are any stateful ACLs in this datapath. */ +- ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;"); +- ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;"); ++ if (!od->has_acls && !od->has_lb_vip) { ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "1", "next;"); ++ } else { ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;"); ++ } + + if (has_stateful) { + /* Ingress and Egress ACL Table (Priority 1). +@@ -5698,21 +5932,23 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows, + "ip && (!ct.est || (ct.est && ct_label.blocked == 1))", + REGBIT_CONNTRACK_COMMIT" = 1; next;"); + +- /* Ingress and Egress ACL Table (Priority 65535). ++ /* Ingress and Egress ACL Table (Priority 65532). + * + * Always drop traffic that's in an invalid state. Also drop + * reply direction packets for connections that have been marked + * for deletion (bit 0 of ct_label is set). + * + * This is enforced at a higher priority than ACLs can be defined. */ +- ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, +- "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)", +- "drop;"); +- ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, +- "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)", +- "drop;"); ++ char *match = ++ xasprintf("%s(ct.est && ct.rpl && ct_label.blocked == 1)", ++ use_ct_inv_match ? "ct.inv || " : ""); ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX - 3, ++ match, "drop;"); ++ ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX - 3, ++ match, "drop;"); ++ free(match); + +- /* Ingress and Egress ACL Table (Priority 65535). ++ /* Ingress and Egress ACL Table (Priority 65535 - 3). + * + * Allow reply traffic that is part of an established + * conntrack entry that has not been marked for deletion +@@ -5721,14 +5957,15 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows, + * direction to hit the currently defined policy from ACLs. + * + * This is enforced at a higher priority than ACLs can be defined. */ +- ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, +- "ct.est && !ct.rel && !ct.new && !ct.inv " +- "&& ct.rpl && ct_label.blocked == 0", +- "next;"); +- ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, +- "ct.est && !ct.rel && !ct.new && !ct.inv " +- "&& ct.rpl && ct_label.blocked == 0", +- "next;"); ++ match = xasprintf("ct.est && !ct.rel && !ct.new%s && " ++ "ct.rpl && ct_label.blocked == 0", ++ use_ct_inv_match ? " && !ct.inv" : ""); ++ ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX - 3, ++ match, "next;"); ++ ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX - 3, ++ match, "next;"); ++ free(match); + + /* Ingress and Egress ACL Table (Priority 65535). + * +@@ -5741,21 +5978,21 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows, + * a dynamically negotiated FTP data channel), but will allow + * related traffic such as an ICMP Port Unreachable through + * that's generated from a non-listening UDP port. */ +- ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, +- "!ct.est && ct.rel && !ct.new && !ct.inv " +- "&& ct_label.blocked == 0", +- "next;"); +- ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, +- "!ct.est && ct.rel && !ct.new && !ct.inv " +- "&& ct_label.blocked == 0", +- "next;"); ++ match = xasprintf("!ct.est && ct.rel && !ct.new%s && " ++ "ct_label.blocked == 0", ++ use_ct_inv_match ? " && !ct.inv" : ""); ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX - 3, ++ match, "next;"); ++ ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX - 3, ++ match, "next;"); ++ free(match); + +- /* Ingress and Egress ACL Table (Priority 65535). ++ /* Ingress and Egress ACL Table (Priority 65532). + * + * Not to do conntrack on ND packets. */ +- ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX - 3, + "nd || nd_ra || nd_rs || mldv1 || mldv2", "next;"); +- ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, ++ ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX - 3, + "nd || nd_ra || nd_rs || mldv1 || mldv2", "next;"); + } -@@ -5661,7 +5833,7 @@ static void - build_acls(struct ovn_datapath *od, struct hmap *lflows, - struct hmap *port_groups, const struct shash *meter_groups) - { -- bool has_stateful = (has_stateful_acl(od) || has_lb_vip(od)); -+ bool has_stateful = od->has_stateful_acl || od->has_lb_vip; +@@ -5842,15 +6079,18 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows, + actions); + } - /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by - * default. A related rule at priority 1 is added below if there -@@ -5930,7 +6102,7 @@ build_lb(struct ovn_datapath *od, struct hmap *lflows) - } +- /* Add a 34000 priority flow to advance the service monitor reply +- * packets to skip applying ingress ACLs. */ +- ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 34000, +- "eth.dst == $svc_monitor_mac", "next;"); + +- /* Add a 34000 priority flow to advance the service monitor packets +- * generated by ovn-controller to skip applying egress ACLs. */ +- ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 34000, +- "eth.src == $svc_monitor_mac", "next;"); ++ if (od->has_acls || od->has_lb_vip) { ++ /* Add a 34000 priority flow to advance the service monitor reply ++ * packets to skip applying ingress ACLs. */ ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 34000, ++ "eth.dst == $svc_monitor_mac", "next;"); ++ ++ /* Add a 34000 priority flow to advance the service monitor packets ++ * generated by ovn-controller to skip applying egress ACLs. */ ++ ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 34000, ++ "eth.src == $svc_monitor_mac", "next;"); ++ } + } + + static void +@@ -5914,37 +6154,6 @@ build_qos(struct ovn_datapath *od, struct hmap *lflows) { } + } +-static void +-build_lb(struct ovn_datapath *od, struct hmap *lflows) +-{ +- /* Ingress and Egress LB Table (Priority 0): Packets are allowed by +- * default. */ +- ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;"); +- ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;"); +- +- if (od->nbs->n_load_balancer) { +- for (size_t i = 0; i < od->n_router_ports; i++) { +- skip_port_from_conntrack(od, od->router_ports[i], +- S_SWITCH_IN_LB, S_SWITCH_OUT_LB, +- UINT16_MAX, lflows); +- } +- } +- - if (has_lb_vip(od)) { -+ if (od->has_lb_vip) { - /* Ingress and Egress LB Table (Priority 65534). - * - * Send established traffic through conntrack for just NAT. */ -@@ -5953,11 +6125,20 @@ build_lb_rules(struct ovn_datapath *od, struct hmap *lflows, +- /* Ingress and Egress LB Table (Priority 65534). +- * +- * Send established traffic through conntrack for just NAT. */ +- ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX - 1, +- "ct.est && !ct.rel && !ct.new && !ct.inv && " +- "ct_label.natted == 1", +- REGBIT_CONNTRACK_NAT" = 1; next;"); +- ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX - 1, +- "ct.est && !ct.rel && !ct.new && !ct.inv && " +- "ct_label.natted == 1", +- REGBIT_CONNTRACK_NAT" = 1; next;"); +- } +-} +- + static void + build_lb_rules(struct ovn_datapath *od, struct hmap *lflows, + struct ovn_northd_lb *lb) +@@ -5953,11 +6162,20 @@ build_lb_rules(struct ovn_datapath *od, struct hmap *lflows, struct ovn_lb_vip *lb_vip = &lb->vips[i]; struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[i]; @@ -6331,7 +11023,7 @@ index 5a3227568..c81e3220c 100644 } const char *proto = NULL; -@@ -5970,12 +6151,17 @@ build_lb_rules(struct ovn_datapath *od, struct hmap *lflows, +@@ -5970,12 +6188,17 @@ build_lb_rules(struct ovn_datapath *od, struct hmap *lflows, proto = "sctp"; } } @@ -6352,49 +11044,26 @@ index 5a3227568..c81e3220c 100644 struct ds match = DS_EMPTY_INITIALIZER; ds_put_format(&match, "ct.new && %s.dst == %s", ip_match, -@@ -6021,9 +6207,39 @@ build_stateful(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs) - * REGBIT_CONNTRACK_COMMIT is set for new connections and - * REGBIT_CONNTRACK_NAT is set for established connections. So they - * don't overlap. -+ * -+ * In the ingress pipeline, also store the original destination IP and -+ * transport port to be used when detecting hairpin packets. - */ +@@ -6015,18 +6238,6 @@ build_stateful(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs) + REGBIT_CONNTRACK_COMMIT" == 1", + "ct_commit { ct_label.blocked = 0; }; next;"); + +- /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent +- * through nat (without committing). +- * +- * REGBIT_CONNTRACK_COMMIT is set for new connections and +- * REGBIT_CONNTRACK_NAT is set for established connections. So they +- * don't overlap. +- */ - ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100, - REGBIT_CONNTRACK_NAT" == 1", "ct_lb;"); -+ const char *lb_protocols[] = {"tcp", "udp", "sctp"}; -+ struct ds actions = DS_EMPTY_INITIALIZER; -+ struct ds match = DS_EMPTY_INITIALIZER; -+ -+ for (size_t i = 0; i < ARRAY_SIZE(lb_protocols); i++) { -+ ds_clear(&match); -+ ds_clear(&actions); -+ ds_put_format(&match, REGBIT_CONNTRACK_NAT" == 1 && ip4 && %s", -+ lb_protocols[i]); -+ ds_put_format(&actions, REG_ORIG_DIP_IPV4 " = ip4.dst; " -+ REG_ORIG_TP_DPORT " = %s.dst; ct_lb;", -+ lb_protocols[i]); -+ ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100, -+ ds_cstr(&match), ds_cstr(&actions)); -+ -+ ds_clear(&match); -+ ds_clear(&actions); -+ ds_put_format(&match, REGBIT_CONNTRACK_NAT" == 1 && ip6 && %s", -+ lb_protocols[i]); -+ ds_put_format(&actions, REG_ORIG_DIP_IPV6 " = ip6.dst; " -+ REG_ORIG_TP_DPORT " = %s.dst; ct_lb;", -+ lb_protocols[i]); -+ ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100, -+ ds_cstr(&match), ds_cstr(&actions)); -+ } -+ -+ ds_destroy(&actions); -+ ds_destroy(&match); -+ - ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100, - REGBIT_CONNTRACK_NAT" == 1", "ct_lb;"); - -@@ -6051,40 +6267,50 @@ build_lb_hairpin(struct ovn_datapath *od, struct hmap *lflows) +- ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100, +- REGBIT_CONNTRACK_NAT" == 1", "ct_lb;"); +- + /* Load balancing rules for new connections get committed to conntrack + * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table + * a higher priority rule for load balancing below also commits the +@@ -6051,40 +6262,50 @@ build_lb_hairpin(struct ovn_datapath *od, struct hmap *lflows) ovn_lflow_add(lflows, od, S_SWITCH_IN_NAT_HAIRPIN, 0, "1", "next;"); ovn_lflow_add(lflows, od, S_SWITCH_IN_HAIRPIN, 0, "1", "next;"); @@ -6465,7 +11134,7 @@ index 5a3227568..c81e3220c 100644 } } -@@ -6754,9 +6980,7 @@ is_vlan_transparent(const struct ovn_datapath *od) +@@ -6754,9 +6975,7 @@ is_vlan_transparent(const struct ovn_datapath *od) } static void @@ -6476,7 +11145,7 @@ index 5a3227568..c81e3220c 100644 { /* This flow table structure is documented in ovn-northd(8), so please * update ovn-northd.8.xml if you change anything. */ -@@ -6765,32 +6989,111 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -6765,49 +6984,127 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, struct ds actions = DS_EMPTY_INITIALIZER; struct ovn_datapath *od; @@ -6486,12 +11155,19 @@ index 5a3227568..c81e3220c 100644 - struct ovn_port *op; - HMAP_FOR_EACH (op, key_node, ports) { - if (!op->nbsp) { -+ /* Ingress table 24: Destination lookup for unknown MACs (priority 0). */ ++ /* Ingress table 23: Destination lookup for unknown MACs (priority 0). */ + HMAP_FOR_EACH (od, key_node, datapaths) { + if (!od->nbs) { continue; } +- if ((!strcmp(op->nbsp->type, "localnet")) || +- (!strcmp(op->nbsp->type, "vtep"))) { +- ds_clear(&match); +- ds_put_format(&match, "inport == %s", op->json_key); +- ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, +- 100, ds_cstr(&match), "next;", +- &op->nbsp->header_); + ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1", + "outport = get_fdb(eth.dst); next;"); + @@ -6502,15 +11178,38 @@ index 5a3227568..c81e3220c 100644 + } else { + ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_UNKNOWN, 50, + "outport == \"none\"", "drop;"); -+ } + } + ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_UNKNOWN, 0, "1", + "output;"); -+ } -+ + } + +- /* Ingress table 13: ARP/ND responder, reply for known IPs. +- * (priority 50). */ +- HMAP_FOR_EACH (op, key_node, ports) { +- if (!op->nbsp) { +- continue; +- } + ds_destroy(&match); + ds_destroy(&actions); +} -+ + +- if (!strcmp(op->nbsp->type, "virtual")) { +- /* Handle +- * - GARPs for virtual ip which belongs to a logical port +- * of type 'virtual' and bind that port. +- * +- * - ARP reply from the virtual ip which belongs to a logical +- * port of type 'virtual' and bind that port. +- * */ +- ovs_be32 ip; +- const char *virtual_ip = smap_get(&op->nbsp->options, +- "virtual-ip"); +- const char *virtual_parents = smap_get(&op->nbsp->options, +- "virtual-parents"); +- if (!virtual_ip || !virtual_parents || +- !ip_parse(virtual_ip, &ip)) { +- continue; +- } +/* Build pre-ACL and ACL tables for both ingress and egress. + * Ingress tables 3 through 10. Egress tables 0 through 7. */ +static void @@ -6521,8 +11220,8 @@ index 5a3227568..c81e3220c 100644 + struct hmap *lbs) +{ + if (od->nbs) { -+ od->has_stateful_acl = ls_has_stateful_acl(od); + od->has_lb_vip = ls_has_lb_vip(od); ++ ls_get_acl_flags(od); + + build_pre_acls(od, lflows); + build_pre_lb(od, lflows, meter_groups, lbs); @@ -6530,7 +11229,6 @@ index 5a3227568..c81e3220c 100644 + build_acl_hints(od, lflows); + build_acls(od, lflows, port_groups, meter_groups); + build_qos(od, lflows); -+ build_lb(od, lflows); + build_stateful(od, lflows, lbs); + build_lb_hairpin(od, lflows); + } @@ -6570,27 +11268,17 @@ index 5a3227568..c81e3220c 100644 + struct ds *match) +{ + if (op->nbsp) { - if ((!strcmp(op->nbsp->type, "localnet")) || - (!strcmp(op->nbsp->type, "vtep"))) { -- ds_clear(&match); -- ds_put_format(&match, "inport == %s", op->json_key); ++ if ((!strcmp(op->nbsp->type, "localnet")) || ++ (!strcmp(op->nbsp->type, "vtep"))) { + ds_clear(match); + ds_put_format(match, "inport == %s", op->json_key); - ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, -- 100, ds_cstr(&match), "next;", ++ ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, + 100, ds_cstr(match), "next;", - &op->nbsp->header_); - } - } ++ &op->nbsp->header_); ++ } ++ } +} - -- /* Ingress table 13: ARP/ND responder, reply for known IPs. -- * (priority 50). */ -- HMAP_FOR_EACH (op, key_node, ports) { -- if (!op->nbsp) { -- continue; -- } -- ++ +/* Ingress table 13: ARP/ND responder, reply for known IPs. + * (priority 50). */ +static void @@ -6601,19 +11289,27 @@ index 5a3227568..c81e3220c 100644 + struct ds *match) +{ + if (op->nbsp) { - if (!strcmp(op->nbsp->type, "virtual")) { - /* Handle - * - GARPs for virtual ip which belongs to a logical port -@@ -6806,7 +7109,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, - "virtual-parents"); - if (!virtual_ip || !virtual_parents || - !ip_parse(virtual_ip, &ip)) { -- continue; ++ if (!strcmp(op->nbsp->type, "virtual")) { ++ /* Handle ++ * - GARPs for virtual ip which belongs to a logical port ++ * of type 'virtual' and bind that port. ++ * ++ * - ARP reply from the virtual ip which belongs to a logical ++ * port of type 'virtual' and bind that port. ++ * */ ++ ovs_be32 ip; ++ const char *virtual_ip = smap_get(&op->nbsp->options, ++ "virtual-ip"); ++ const char *virtual_parents = smap_get(&op->nbsp->options, ++ "virtual-parents"); ++ if (!virtual_ip || !virtual_parents || ++ !ip_parse(virtual_ip, &ip)) { + return; - } ++ } char *tokstr = xstrdup(virtual_parents); -@@ -6821,21 +7124,21 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, + char *save_ptr = NULL; +@@ -6821,21 +7118,21 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, continue; } @@ -6640,7 +11336,7 @@ index 5a3227568..c81e3220c 100644 &vp->nbsp->header_); } -@@ -6850,20 +7153,20 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -6850,20 +7147,20 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, if (check_lsp_is_up && !lsp_is_up(op->nbsp) && !lsp_is_router(op->nbsp) && strcmp(op->nbsp->type, "localport")) { @@ -6667,7 +11363,7 @@ index 5a3227568..c81e3220c 100644 "eth.dst = eth.src; " "eth.src = %s; " "arp.op = 2; /* ARP reply */ " -@@ -6878,8 +7181,8 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -6878,8 +7175,8 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, op->lsp_addrs[i].ipv4_addrs[j].addr_s); ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50, @@ -6678,7 +11374,7 @@ index 5a3227568..c81e3220c 100644 &op->nbsp->header_); /* Do not reply to an ARP request from the port that owns -@@ -6894,10 +7197,10 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -6894,10 +7191,10 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, * address is intended to detect situations where the * network is not working as configured, so dropping the * request would frustrate that intent.) */ @@ -6691,7 +11387,7 @@ index 5a3227568..c81e3220c 100644 &op->nbsp->header_); } -@@ -6905,15 +7208,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -6905,15 +7202,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, * unicast IPv6 address and its all-nodes multicast address, * but always respond with the unicast IPv6 address. */ for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { @@ -6711,7 +11407,7 @@ index 5a3227568..c81e3220c 100644 "%s { " "eth.src = %s; " "ip6.src = %s; " -@@ -6930,93 +7233,99 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -6930,93 +7227,99 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, op->lsp_addrs[i].ea_s); ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50, @@ -6868,7 +11564,7 @@ index 5a3227568..c81e3220c 100644 } bool is_external = lsp_is_external(op->nbsp); -@@ -7024,7 +7333,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -7024,7 +7327,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, !op->nbsp->ha_chassis_group)) { /* If it's an external port and there are no localnet ports * and if it doesn't belong to an HA chassis group ignore it. */ @@ -6877,7 +11573,7 @@ index 5a3227568..c81e3220c 100644 } for (size_t i = 0; i < op->n_lsp_addrs; i++) { -@@ -7047,14 +7356,35 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -7047,14 +7350,35 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, } } } @@ -6920,7 +11616,7 @@ index 5a3227568..c81e3220c 100644 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 100, "udp.dst == 53", -@@ -7071,47 +7401,33 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -7071,47 +7395,33 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100, dns_match, dns_action); } @@ -6987,7 +11683,7 @@ index 5a3227568..c81e3220c 100644 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 110, "eth.dst == $svc_monitor_mac", -@@ -7120,22 +7436,22 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -7120,22 +7430,22 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, struct mcast_switch_info *mcast_sw_info = &od->mcast_info.sw; if (mcast_sw_info->enabled) { @@ -7015,7 +11711,7 @@ index 5a3227568..c81e3220c 100644 /* Flood all IP multicast traffic destined to 224.0.0.X to all * ports - RFC 4541, section 2.1.2, item 2. -@@ -7157,10 +7473,10 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -7157,10 +7467,10 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, * handled by the L2 multicast flow. */ if (!mcast_sw_info->flood_unregistered) { @@ -7028,7 +11724,7 @@ index 5a3227568..c81e3220c 100644 "clone { " "outport = \""MC_MROUTER_FLOOD"\"; " "output; " -@@ -7168,7 +7484,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -7168,7 +7478,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, } if (mcast_sw_info->flood_static) { @@ -7037,7 +11733,7 @@ index 5a3227568..c81e3220c 100644 } /* Explicitly drop the traffic if relay or static flooding -@@ -7176,30 +7492,33 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -7176,30 +7486,33 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, */ if (!mcast_sw_info->flood_relay && !mcast_sw_info->flood_static) { @@ -7082,7 +11778,7 @@ index 5a3227568..c81e3220c 100644 struct mcast_switch_info *mcast_sw_info = &igmp_group->datapath->mcast_info.sw; -@@ -7211,57 +7530,62 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -7211,57 +7524,62 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, ovs_be32 group_address = in6_addr_get_mapped_ipv4(&igmp_group->address); if (ip_is_local_multicast(group_address)) { @@ -7160,7 +11856,7 @@ index 5a3227568..c81e3220c 100644 /* For ports connected to logical routers add flows to bypass the * broadcast flooding of ARP/ND requests in table 19. We direct the -@@ -7279,15 +7603,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -7279,15 +7597,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, struct eth_addr mac; if (ovs_scan(op->nbsp->addresses[i], ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) { @@ -7182,7 +11878,7 @@ index 5a3227568..c81e3220c 100644 &op->nbsp->header_); } else if (!strcmp(op->nbsp->addresses[i], "unknown")) { if (lsp_is_enabled(op->nbsp)) { -@@ -7300,15 +7624,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -7300,15 +7618,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) { continue; } @@ -7204,7 +11900,7 @@ index 5a3227568..c81e3220c 100644 &op->nbsp->header_); } else if (!strcmp(op->nbsp->addresses[i], "router")) { if (!op->peer || !op->peer->nbrp -@@ -7316,8 +7640,8 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -7316,8 +7634,8 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) { continue; } @@ -7215,7 +11911,7 @@ index 5a3227568..c81e3220c 100644 ETH_ADDR_ARGS(mac)); if (op->peer->od->l3dgw_port && op->peer->od->l3redirect_port -@@ -7343,16 +7667,16 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -7343,16 +7661,16 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, } if (add_chassis_resident_check) { @@ -7236,7 +11932,7 @@ index 5a3227568..c81e3220c 100644 &op->nbsp->header_); /* Add ethernet addresses specified in NAT rules on -@@ -7366,19 +7690,19 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -7366,19 +7684,19 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, && nat->logical_port && nat->external_mac && eth_addr_from_string(nat->external_mac, &mac)) { @@ -7262,7 +11958,7 @@ index 5a3227568..c81e3220c 100644 &op->nbsp->header_); } } -@@ -7392,71 +7716,202 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +@@ -7392,71 +7710,202 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, } } } @@ -7271,14 +11967,24 @@ index 5a3227568..c81e3220c 100644 - /* Ingress table 19: Destination lookup for unknown MACs (priority 0). */ - HMAP_FOR_EACH (od, key_node, datapaths) { - if (!od->nbs) { +- continue; +- } +struct bfd_entry { + struct hmap_node hmap_node; -+ + +- if (od->has_unknown) { +- ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1", +- "outport = \""MC_UNKNOWN"\"; output;"); +- } +- } + const struct sbrec_bfd *sb_bt; -+ + +- build_lswitch_output_port_sec(ports, datapaths, lflows); + bool ref; +}; -+ + +- ds_destroy(&match); +- ds_destroy(&actions); +static struct bfd_entry * +bfd_port_lookup(struct hmap *bfd_map, const char *logical_port, + const char *dst_ip) @@ -7295,71 +12001,81 @@ index 5a3227568..c81e3220c 100644 + } + } + return NULL; -+} -+ -+static void + } + +-/* Build pre-ACL and ACL tables for both ingress and egress. +- * Ingress tables 3 through 10. Egress tables 0 through 7. */ + static void +-build_lswitch_lflows_pre_acl_and_acl(struct ovn_datapath *od, +- struct hmap *port_groups, +- struct hmap *lflows, +- struct shash *meter_groups, +- struct hmap *lbs) +bfd_cleanup_connections(struct northd_context *ctx, struct hmap *bfd_map) -+{ + { +- if (od->nbs) { +- build_pre_acls(od, lflows); +- build_pre_lb(od, lflows, meter_groups, lbs); +- build_pre_stateful(od, lflows); +- build_acl_hints(od, lflows); +- build_acls(od, lflows, port_groups, meter_groups); +- build_qos(od, lflows); +- build_lb(od, lflows); +- build_stateful(od, lflows, lbs); +- build_lb_hairpin(od, lflows); + const struct nbrec_bfd *nb_bt; + struct bfd_entry *bfd_e; + + NBREC_BFD_FOR_EACH (nb_bt, ctx->ovnnb_idl) { + bfd_e = bfd_port_lookup(bfd_map, nb_bt->logical_port, nb_bt->dst_ip); + if (!bfd_e) { - continue; - } - -- if (od->has_unknown) { -- ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1", -- "outport = \""MC_UNKNOWN"\"; output;"); ++ continue; ++ } ++ + if (!bfd_e->ref && strcmp(nb_bt->status, "admin_down")) { + /* no user for this bfd connection */ + nbrec_bfd_set_status(nb_bt, "admin_down"); - } - } - -- build_lswitch_output_port_sec(ports, datapaths, lflows); -- -- ds_destroy(&match); -- ds_destroy(&actions); ++ } ++ } ++ + HMAP_FOR_EACH_POP (bfd_e, hmap_node, bfd_map) { + free(bfd_e); -+ } + } } --/* Build pre-ACL and ACL tables for both ingress and egress. -- * Ingress tables 3 through 10. Egress tables 0 through 7. */ +-/* Logical switch ingress table 0: Admission control framework (priority +- * 100). */ +#define BFD_DEF_MINTX 1000 /* 1s */ +#define BFD_DEF_MINRX 1000 /* 1s */ +#define BFD_DEF_DETECT_MULT 5 + static void --build_lswitch_lflows_pre_acl_and_acl(struct ovn_datapath *od, -- struct hmap *port_groups, -- struct hmap *lflows, -- struct shash *meter_groups, -- struct hmap *lbs) +-build_lswitch_lflows_admission_control(struct ovn_datapath *od, +- struct hmap *lflows) +build_bfd_update_sb_conf(const struct nbrec_bfd *nb_bt, + const struct sbrec_bfd *sb_bt) { -- if (od->nbs) { -- build_pre_acls(od, lflows); -- build_pre_lb(od, lflows, meter_groups, lbs); -- build_pre_stateful(od, lflows); -- build_acl_hints(od, lflows); -- build_acls(od, lflows, port_groups, meter_groups); -- build_qos(od, lflows); -- build_lb(od, lflows); -- build_stateful(od, lflows, lbs); -- build_lb_hairpin(od, lflows); +- if (od->nbs) { +- /* Logical VLANs not supported. */ +- if (!is_vlan_transparent(od)) { +- /* Block logical VLANs. */ +- ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, +- "vlan.present", "drop;"); +- } + if (strcmp(nb_bt->dst_ip, sb_bt->dst_ip)) { + sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip); + } -+ + +- /* Broadcast/multicast source address is invalid. */ +- ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]", +- "drop;"); + if (strcmp(nb_bt->logical_port, sb_bt->logical_port)) { + sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port); + } -+ + +- /* Port security flows have priority 50 +- * (see build_lswitch_input_port_sec()) and will continue +- * to the next table if packet source is acceptable. */ + if (strcmp(nb_bt->status, sb_bt->status)) { + sbrec_bfd_set_status(sb_bt, nb_bt->status); + } @@ -7378,11 +12094,9 @@ index 5a3227568..c81e3220c 100644 + int min_rx = nb_bt->n_min_rx ? nb_bt->min_rx[0] : BFD_DEF_MINRX; + if (min_rx != sb_bt->min_rx) { + sbrec_bfd_set_min_rx(sb_bt, min_rx); - } - } - --/* Logical switch ingress table 0: Admission control framework (priority -- * 100). */ ++ } ++} ++ +/* RFC 5881 section 4 + * The source port MUST be in the range 49152 through 65535. + * The same UDP source port number MUST be used for all BFD @@ -7400,24 +12114,16 @@ index 5a3227568..c81e3220c 100644 + port = bitmap_scan(bfd_src_ports, 0, 0, BFD_UDP_SRC_PORT_LEN); + if (port == BFD_UDP_SRC_PORT_LEN) { + return -ENOSPC; -+ } + } + bitmap_set1(bfd_src_ports, port); + + return port + BFD_UDP_SRC_PORT_START; -+} -+ - static void --build_lswitch_lflows_admission_control(struct ovn_datapath *od, -- struct hmap *lflows) + } + ++static void +build_bfd_table(struct northd_context *ctx, struct hmap *bfd_connections, + struct hmap *ports) - { -- if (od->nbs) { -- /* Logical VLANs not supported. */ -- if (!is_vlan_transparent(od)) { -- /* Block logical VLANs. */ -- ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, -- "vlan.present", "drop;"); ++{ + struct hmap sb_only = HMAP_INITIALIZER(&sb_only); + const struct sbrec_bfd *sb_bt; + unsigned long *bfd_src_ports; @@ -7440,21 +12146,15 @@ index 5a3227568..c81e3220c 100644 + if (!nb_bt->status) { + /* default state is admin_down */ + nbrec_bfd_set_status(nb_bt, "admin_down"); - } - -- /* Broadcast/multicast source address is invalid. */ -- ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]", -- "drop;"); ++ } ++ + bfd_e = bfd_port_lookup(&sb_only, nb_bt->logical_port, nb_bt->dst_ip); + if (!bfd_e) { + int udp_src = bfd_get_unused_port(bfd_src_ports); + if (udp_src < 0) { + continue; + } - -- /* Port security flows have priority 50 -- * (see build_lswitch_input_port_sec()) and will continue -- * to the next table if packet source is acceptable. */ ++ + sb_bt = sbrec_bfd_insert(ctx->ovnsb_txn); + sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port); + sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip); @@ -7491,9 +12191,8 @@ index 5a3227568..c81e3220c 100644 + if (op) { + op->has_bfd = true; + } - } --} - ++ } ++ + HMAP_FOR_EACH_POP (bfd_e, hmap_node, &sb_only) { + struct ovn_port *op = ovn_port_find(ports, bfd_e->sb_bt->logical_port); + if (op) { @@ -7509,7 +12208,7 @@ index 5a3227568..c81e3220c 100644 /* Returns a string of the IP address of the router port 'op' that * overlaps with 'ip_s". If one is not found, returns NULL. -@@ -7549,33 +8004,39 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od, +@@ -7549,33 +7998,39 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od, struct ds actions = DS_EMPTY_INITIALIZER; if (!strcmp(rule->action, "reroute")) { @@ -7554,7 +12253,7 @@ index 5a3227568..c81e3220c 100644 is_ipv4 ? REG_SRC_IPV4 : REG_SRC_IPV6, lrp_addr_s, out_port->lrp_networks.ea_s, -@@ -7588,7 +8049,7 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od, +@@ -7588,7 +8043,7 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od, if (pkt_mark) { ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark); } @@ -7563,19 +12262,10 @@ index 5a3227568..c81e3220c 100644 } ds_put_format(&match, "%s", rule->match); -@@ -7598,15 +8059,116 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od, +@@ -7598,6 +8053,107 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od, ds_destroy(&actions); } --struct parsed_route { -- struct ovs_list list_node; -- struct in6_addr prefix; -- unsigned int plen; -- bool is_src_route; -- uint32_t hash; -- const struct nbrec_logical_router_static_route *route; -- bool ecmp_symmetric_reply; --}; +static void +build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od, + struct hmap *ports, @@ -7677,19 +12367,10 @@ index 5a3227568..c81e3220c 100644 + ds_destroy(&actions); +} + -+struct parsed_route { -+ struct ovs_list list_node; -+ struct in6_addr prefix; -+ unsigned int plen; -+ bool is_src_route; -+ uint32_t hash; -+ const struct nbrec_logical_router_static_route *route; -+ bool ecmp_symmetric_reply; -+}; - - static uint32_t - route_hash(struct parsed_route *route) -@@ -7619,7 +8181,8 @@ route_hash(struct parsed_route *route) + struct parsed_route { + struct ovs_list list_node; + struct in6_addr prefix; +@@ -7619,7 +8175,8 @@ route_hash(struct parsed_route *route) * Otherwise return NULL. */ static struct parsed_route * parsed_routes_add(struct ovs_list *routes, @@ -7699,7 +12380,7 @@ index 5a3227568..c81e3220c 100644 { /* Verify that the next hop is an IP address with an all-ones mask. */ struct in6_addr nexthop; -@@ -7660,6 +8223,25 @@ parsed_routes_add(struct ovs_list *routes, +@@ -7660,6 +8217,25 @@ parsed_routes_add(struct ovs_list *routes, return NULL; } @@ -7725,7 +12406,7 @@ index 5a3227568..c81e3220c 100644 struct parsed_route *pr = xzalloc(sizeof *pr); pr->prefix = prefix; pr->plen = plen; -@@ -8102,16 +8684,15 @@ add_route(struct hmap *lflows, const struct ovn_port *op, +@@ -8102,16 +8678,15 @@ add_route(struct hmap *lflows, const struct ovn_port *op, build_route_match(op_inport, network_s, plen, is_src_route, is_ipv4, &match, &priority); @@ -7747,7 +12428,7 @@ index 5a3227568..c81e3220c 100644 "%s = %s; " "eth.src = %s; " "outport = %s; " -@@ -8121,11 +8702,20 @@ add_route(struct hmap *lflows, const struct ovn_port *op, +@@ -8121,11 +8696,20 @@ add_route(struct hmap *lflows, const struct ovn_port *op, lrp_addr_s, op->lrp_networks.ea_s, op->json_key); @@ -7768,7 +12449,7 @@ index 5a3227568..c81e3220c 100644 ds_destroy(&actions); } -@@ -8203,15 +8793,10 @@ get_force_snat_ip(struct ovn_datapath *od, const char *key_type, +@@ -8203,25 +8787,26 @@ get_force_snat_ip(struct ovn_datapath *od, const char *key_type, return false; } @@ -7785,43 +12466,195 @@ index 5a3227568..c81e3220c 100644 return false; } -@@ -8221,7 +8806,7 @@ get_force_snat_ip(struct ovn_datapath *od, const char *key_type, + return true; + } + ++enum lb_snat_type { ++ NO_FORCE_SNAT, ++ FORCE_SNAT, ++ SKIP_SNAT, ++}; ++ static void add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, struct ds *match, struct ds *actions, int priority, - bool lb_force_snat_ip, struct ovn_lb_vip *lb_vip, -+ bool force_snat_for_lb, struct ovn_lb_vip *lb_vip, ++ enum lb_snat_type snat_type, struct ovn_lb_vip *lb_vip, const char *proto, struct nbrec_load_balancer *lb, struct shash *meter_groups, struct sset *nat_entries) { -@@ -8230,7 +8815,7 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, +@@ -8230,9 +8815,10 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, /* A match and actions for new connections. */ char *new_match = xasprintf("ct.new && %s", ds_cstr(match)); - if (lb_force_snat_ip) { -+ if (force_snat_for_lb) { - char *new_actions = xasprintf("flags.force_snat_for_lb = 1; %s", - ds_cstr(actions)); +- char *new_actions = xasprintf("flags.force_snat_for_lb = 1; %s", +- ds_cstr(actions)); ++ if (snat_type == FORCE_SNAT || snat_type == SKIP_SNAT) { ++ char *new_actions = xasprintf("flags.%s_snat_for_lb = 1; %s", ++ snat_type == SKIP_SNAT ? "skip" : "force", ++ ds_cstr(actions)); ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, priority, -@@ -8243,7 +8828,7 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, + new_match, new_actions, &lb->header_); + free(new_actions); +@@ -8243,11 +8829,12 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, /* A match and actions for established connections. */ char *est_match = xasprintf("ct.est && %s", ds_cstr(match)); - if (lb_force_snat_ip) { -+ if (force_snat_for_lb) { ++ if (snat_type == FORCE_SNAT || snat_type == SKIP_SNAT) { ++ char *est_actions = xasprintf("flags.%s_snat_for_lb = 1; ct_dnat;", ++ snat_type == SKIP_SNAT ? "skip" : "force"); + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, priority, +- est_match, +- "flags.force_snat_for_lb = 1; ct_dnat;", +- &lb->header_); ++ est_match, est_actions, &lb->header_); ++ free(est_actions); + } else { ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, priority, - est_match, - "flags.force_snat_for_lb = 1; ct_dnat;", -@@ -8320,7 +8905,7 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, + est_match, "ct_dnat;", &lb->header_); +@@ -8320,11 +8907,13 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, ds_put_format(&undnat_match, ") && outport == %s && " "is_chassis_resident(%s)", od->l3dgw_port->json_key, od->l3redirect_port->json_key); - if (lb_force_snat_ip) { -+ if (force_snat_for_lb) { ++ if (snat_type == FORCE_SNAT || snat_type == SKIP_SNAT) { ++ char *action = xasprintf("flags.%s_snat_for_lb = 1; ct_dnat;", ++ snat_type == SKIP_SNAT ? "skip" : "force"); + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 120, +- ds_cstr(&undnat_match), +- "flags.force_snat_for_lb = 1; ct_dnat;", ++ ds_cstr(&undnat_match), action, + &lb->header_); ++ free(action); + } else { ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 120, - ds_cstr(&undnat_match), - "flags.force_snat_for_lb = 1; ct_dnat;", -@@ -8788,2375 +9373,2531 @@ build_lrouter_force_snat_flows(struct hmap *lflows, struct ovn_datapath *od, + ds_cstr(&undnat_match), "ct_dnat;", +@@ -8334,6 +8923,105 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, + ds_destroy(&undnat_match); + } + ++static void ++build_lrouter_lb_flows(struct hmap *lflows, struct ovn_datapath *od, ++ struct hmap *lbs, struct shash *meter_groups, ++ struct sset *nat_entries, struct ds *match, ++ struct ds *actions) ++{ ++ /* A set to hold all ips that need defragmentation and tracking. */ ++ struct sset all_ips = SSET_INITIALIZER(&all_ips); ++ bool lb_force_snat_ip = ++ !lport_addresses_is_empty(&od->lb_force_snat_addrs); ++ ++ for (int i = 0; i < od->nbr->n_load_balancer; i++) { ++ struct nbrec_load_balancer *nb_lb = od->nbr->load_balancer[i]; ++ struct ovn_northd_lb *lb = ++ ovn_northd_lb_find(lbs, &nb_lb->header_.uuid); ++ ovs_assert(lb); ++ ++ bool lb_skip_snat = smap_get_bool(&nb_lb->options, "skip_snat", false); ++ if (lb_skip_snat) { ++ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, ++ "flags.skip_snat_for_lb == 1 && ip", "next;"); ++ } ++ ++ for (size_t j = 0; j < lb->n_vips; j++) { ++ struct ovn_lb_vip *lb_vip = &lb->vips[j]; ++ struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[j]; ++ ds_clear(actions); ++ build_lb_vip_actions(lb_vip, lb_vip_nb, actions, ++ lb->selection_fields, false); ++ ++ if (!sset_contains(&all_ips, lb_vip->vip_str)) { ++ sset_add(&all_ips, lb_vip->vip_str); ++ /* If there are any load balancing rules, we should send ++ * the packet to conntrack for defragmentation and ++ * tracking. This helps with two things. ++ * ++ * 1. With tracking, we can send only new connections to ++ * pick a DNAT ip address from a group. ++ * 2. If there are L4 ports in load balancing rules, we ++ * need the defragmentation to match on L4 ports. */ ++ ds_clear(match); ++ if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { ++ ds_put_format(match, "ip && ip4.dst == %s", ++ lb_vip->vip_str); ++ } else { ++ ds_put_format(match, "ip && ip6.dst == %s", ++ lb_vip->vip_str); ++ } ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, ++ 100, ds_cstr(match), "ct_next;", ++ &nb_lb->header_); ++ } ++ ++ /* Higher priority rules are added for load-balancing in DNAT ++ * table. For every match (on a VIP[:port]), we add two flows ++ * via add_router_lb_flow(). One flow is for specific matching ++ * on ct.new with an action of "ct_lb($targets);". The other ++ * flow is for ct.est with an action of "ct_dnat;". */ ++ ds_clear(match); ++ if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { ++ ds_put_format(match, "ip && ip4.dst == %s", ++ lb_vip->vip_str); ++ } else { ++ ds_put_format(match, "ip && ip6.dst == %s", ++ lb_vip->vip_str); ++ } ++ ++ int prio = 110; ++ bool is_udp = nullable_string_is_equal(nb_lb->protocol, "udp"); ++ bool is_sctp = nullable_string_is_equal(nb_lb->protocol, ++ "sctp"); ++ const char *proto = is_udp ? "udp" : is_sctp ? "sctp" : "tcp"; ++ ++ if (lb_vip->vip_port) { ++ ds_put_format(match, " && %s && %s.dst == %d", proto, ++ proto, lb_vip->vip_port); ++ prio = 120; ++ } ++ ++ if (od->l3redirect_port && ++ (lb_vip->n_backends || !lb_vip->empty_backend_rej)) { ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ od->l3redirect_port->json_key); ++ } ++ ++ enum lb_snat_type snat_type = NO_FORCE_SNAT; ++ if (lb_skip_snat) { ++ snat_type = SKIP_SNAT; ++ } else if (lb_force_snat_ip || od->lb_force_snat_router_ip) { ++ snat_type = FORCE_SNAT; ++ } ++ add_router_lb_flow(lflows, od, match, actions, prio, ++ snat_type, lb_vip, proto, nb_lb, ++ meter_groups, nat_entries); ++ } ++ } ++ sset_destroy(&all_ips); ++} ++ + #define ND_RA_MAX_INTERVAL_MAX 1800 + #define ND_RA_MAX_INTERVAL_MIN 4 + +@@ -8538,14 +9226,12 @@ build_lrouter_arp_flow(struct ovn_datapath *od, struct ovn_port *op, + "arp.op = 2; /* ARP reply */ " + "arp.tha = arp.sha; " + "arp.sha = %s; " +- "arp.tpa = arp.spa; " +- "arp.spa = %s; " ++ "arp.tpa <-> arp.spa; " + "outport = inport; " + "flags.loopback = 1; " + "output;", + eth_addr, +- eth_addr, +- ip_address); ++ eth_addr); + } + + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT, priority, +@@ -8788,2375 +9474,2514 @@ build_lrouter_force_snat_flows(struct hmap *lflows, struct ovn_datapath *od, } static void @@ -8390,7 +13223,17 @@ index 5a3227568..c81e3220c 100644 - */ - build_lrouter_drop_own_dest(op, S_ROUTER_IN_IP_INPUT, 60, false, - lflows); -- ++/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: IPv6 Router ++ * Adv (RA) options and response. */ ++static void ++build_ND_RA_flows_for_lrouter_port( ++ struct ovn_port *op, struct hmap *lflows, ++ struct ds *match, struct ds *actions) ++{ ++ if (!op->nbrp || op->nbrp->peer || !op->peer) { ++ return; ++ } + - /* ARP / ND handling for external IP addresses. - * - * DNAT and SNAT IP addresses are external IP addresses that need ARP @@ -8403,38 +13246,19 @@ index 5a3227568..c81e3220c 100644 - if (op != op->od->l3dgw_port) { - continue; - } -+/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: IPv6 Router -+ * Adv (RA) options and response. */ -+static void -+build_ND_RA_flows_for_lrouter_port( -+ struct ovn_port *op, struct hmap *lflows, -+ struct ds *match, struct ds *actions) -+{ -+ if (!op->nbrp || op->nbrp->peer || !op->peer) { ++ if (!op->lrp_networks.n_ipv6_addrs) { + return; + } - for (size_t i = 0; i < op->od->nbr->n_nat; i++) { - struct ovn_nat *nat_entry = &op->od->nat_entries[i]; -+ if (!op->lrp_networks.n_ipv6_addrs) { -+ return; -+ } - -- /* Skip entries we failed to parse. */ -- if (!nat_entry_is_valid(nat_entry)) { -- continue; -- } + struct smap options; + smap_clone(&options, &op->sb->options); -- /* Skip SNAT entries for now, we handle unique SNAT IPs separately -- * below. -- */ -- if (!strcmp(nat_entry->nb->type, "snat")) { +- /* Skip entries we failed to parse. */ +- if (!nat_entry_is_valid(nat_entry)) { - continue; - } -- build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows); -- } + /* enable IPv6 prefix delegation */ + bool prefix_delegation = smap_get_bool(&op->nbrp->options, + "prefix_delegation", false); @@ -8444,10 +13268,14 @@ index 5a3227568..c81e3220c 100644 + smap_add(&options, "ipv6_prefix_delegation", + prefix_delegation ? "true" : "false"); -- /* Now handle SNAT entries too, one per unique SNAT IP. */ -- struct shash_node *snat_snode; -- SHASH_FOR_EACH (snat_snode, &op->od->snat_ips) { -- struct ovn_snat_ip *snat_ip = snat_snode->data; +- /* Skip SNAT entries for now, we handle unique SNAT IPs separately +- * below. +- */ +- if (!strcmp(nat_entry->nb->type, "snat")) { +- continue; +- } +- build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows); +- } + bool ipv6_prefix = smap_get_bool(&op->nbrp->options, + "prefix", false); + if (!lrport_is_enabled(op->nbrp)) { @@ -8457,19 +13285,23 @@ index 5a3227568..c81e3220c 100644 + ipv6_prefix ? "true" : "false"); + sbrec_port_binding_set_options(op->sb, &options); +- /* Now handle SNAT entries too, one per unique SNAT IP. */ +- struct shash_node *snat_snode; +- SHASH_FOR_EACH (snat_snode, &op->od->snat_ips) { +- struct ovn_snat_ip *snat_ip = snat_snode->data; ++ smap_destroy(&options); + - if (ovs_list_is_empty(&snat_ip->snat_entries)) { - continue; - } -+ smap_destroy(&options); ++ const char *address_mode = smap_get( ++ &op->nbrp->ipv6_ra_configs, "address_mode"); - struct ovn_nat *nat_entry = - CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries), - struct ovn_nat, ext_addr_list_node); - build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows); - } -+ const char *address_mode = smap_get( -+ &op->nbrp->ipv6_ra_configs, "address_mode"); -+ + if (!address_mode) { + return; + } @@ -8487,11 +13319,7 @@ index 5a3227568..c81e3220c 100644 - if (!od->nbr) { - continue; - } -+ if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic", -+ false)) { -+ copy_ra_to_sb(op, address_mode); -+ } - +- - /* Packets are allowed by default. */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;"); - ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;"); @@ -8500,54 +13328,48 @@ index 5a3227568..c81e3220c 100644 - ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;"); - ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;"); - ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;"); -+ ds_clear(match); -+ ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && nd_rs", -+ op->json_key); -+ ds_clear(actions); - +- - /* Send the IPv6 NS packets to next table. When ovn-controller - * generates IPv6 NS (for the action - nd_ns{}), the injected - * packet would go through conntrack - which is not required. */ - ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, "nd_ns", "next;"); -+ const char *mtu_s = smap_get( -+ &op->nbrp->ipv6_ra_configs, "mtu"); - +- - /* NAT rules are only valid on Gateway routers and routers with - * l3dgw_port (router has a port with gateway chassis - * specified). */ - if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { - continue; - } -+ /* As per RFC 2460, 1280 is minimum IPv6 MTU. */ -+ uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0; ++ if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic", ++ false)) { ++ copy_ra_to_sb(op, address_mode); ++ } - struct sset nat_entries = SSET_INITIALIZER(&nat_entries); -+ ds_put_format(actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts(" -+ "addr_mode = \"%s\", slla = %s", -+ address_mode, op->lrp_networks.ea_s); -+ if (mtu > 0) { -+ ds_put_format(actions, ", mtu = %u", mtu); -+ } ++ ds_clear(match); ++ ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && nd_rs", ++ op->json_key); ++ ds_clear(actions); - bool dnat_force_snat_ip = - !lport_addresses_is_empty(&od->dnat_force_snat_addrs); - bool lb_force_snat_ip = - !lport_addresses_is_empty(&od->lb_force_snat_addrs); -+ const char *prf = smap_get_def( -+ &op->nbrp->ipv6_ra_configs, "router_preference", "MEDIUM"); -+ if (strcmp(prf, "MEDIUM")) { -+ ds_put_format(actions, ", router_preference = \"%s\"", prf); -+ } ++ const char *mtu_s = smap_get( ++ &op->nbrp->ipv6_ra_configs, "mtu"); - for (int i = 0; i < od->nbr->n_nat; i++) { - const struct nbrec_nat *nat; -+ bool add_rs_response_flow = false; ++ /* As per RFC 2460, 1280 is minimum IPv6 MTU. */ ++ uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0; - nat = od->nbr->nat[i]; -+ for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { -+ if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) { -+ continue; -+ } ++ ds_put_format(actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts(" ++ "addr_mode = \"%s\", slla = %s", ++ address_mode, op->lrp_networks.ea_s); ++ if (mtu > 0) { ++ ds_put_format(actions, ", mtu = %u", mtu); ++ } - ovs_be32 ip, mask; - struct in6_addr ipv6, mask_v6, v6_exact = IN6ADDR_EXACT_INIT; @@ -8557,9 +13379,11 @@ index 5a3227568..c81e3220c 100644 - nat->allowed_ext_ips; - struct nbrec_address_set *exempted_ext_ips = - nat->exempted_ext_ips; -+ ds_put_format(actions, ", prefix = %s/%u", -+ op->lrp_networks.ipv6_addrs[i].network_s, -+ op->lrp_networks.ipv6_addrs[i].plen); ++ const char *prf = smap_get_def( ++ &op->nbrp->ipv6_ra_configs, "router_preference", "MEDIUM"); ++ if (strcmp(prf, "MEDIUM")) { ++ ds_put_format(actions, ", router_preference = \"%s\"", prf); ++ } - if (allowed_ext_ips && exempted_ext_ips) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); @@ -8568,8 +13392,7 @@ index 5a3227568..c81e3220c 100644 - UUID_ARGS(&(nat->header_.uuid))); - continue; - } -+ add_rs_response_flow = true; -+ } ++ bool add_rs_response_flow = false; - char *error = ip_parse_masked(nat->external_ip, &ip, &mask); - if (error || mask != OVS_BE32_MAX) { @@ -8589,16 +13412,7 @@ index 5a3227568..c81e3220c 100644 - * as IPv6. */ - is_v6 = true; - } -+ if (add_rs_response_flow) { -+ ds_put_cstr(actions, "); next;"); -+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS, -+ 50, ds_cstr(match), ds_cstr(actions), -+ &op->nbrp->header_); -+ ds_clear(actions); -+ ds_clear(match); -+ ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && " -+ "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key); - +- - /* Check the validity of nat->logical_ip. 'logical_ip' can - * be a subnet when the type is "snat". */ - int cidr_bits; @@ -8633,22 +13447,10 @@ index 5a3227568..c81e3220c 100644 - continue; - } - } -+ char ip6_str[INET6_ADDRSTRLEN + 1]; -+ struct in6_addr lla; -+ in6_generate_lla(op->lrp_networks.ea, &lla); -+ memset(ip6_str, 0, sizeof(ip6_str)); -+ ipv6_string_mapped(ip6_str, &lla); -+ ds_put_format(actions, "eth.dst = eth.src; eth.src = %s; " -+ "ip6.dst = ip6.src; ip6.src = %s; " -+ "outport = inport; flags.loopback = 1; " -+ "output;", -+ op->lrp_networks.ea_s, ip6_str); -+ ovn_lflow_add_with_hint(lflows, op->od, -+ S_ROUTER_IN_ND_RA_RESPONSE, 50, -+ ds_cstr(match), ds_cstr(actions), -+ &op->nbrp->header_); -+ } -+} ++ for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { ++ if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) { ++ continue; ++ } - /* For distributed router NAT, determine whether this NAT rule - * satisfies the conditions for distributed NAT processing. */ @@ -8666,16 +13468,9 @@ index 5a3227568..c81e3220c 100644 - continue; - } - } -+/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: RS -+ * responder, by default goto next. (priority 0). */ -+static void -+build_ND_RA_flows_for_lrouter(struct ovn_datapath *od, struct hmap *lflows) -+{ -+ if (od->nbr) { -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;"); -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;"); -+ } -+} ++ ds_put_format(actions, ", prefix = %s/%u", ++ op->lrp_networks.ipv6_addrs[i].network_s, ++ op->lrp_networks.ipv6_addrs[i].plen); - /* Ingress UNSNAT table: It is for already established connections' - * reverse traffic. i.e., SNAT has already been done in egress @@ -8701,41 +13496,14 @@ index 5a3227568..c81e3220c 100644 - } else { - ds_put_cstr(&actions, "ct_snat;"); - } -+/* Logical router ingress table IP_ROUTING : IP Routing. -+ * -+ * A packet that arrives at this table is an IP packet that should be -+ * routed to the address in 'ip[46].dst'. -+ * -+ * For regular routes without ECMP, table IP_ROUTING sets outport to the -+ * correct output port, eth.src to the output port's MAC address, and -+ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address -+ * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and -+ * advances to the next table. -+ * -+ * For ECMP routes, i.e. multiple routes with same policy and prefix, table -+ * IP_ROUTING remembers ECMP group id and selects a member id, and advances -+ * to table IP_ROUTING_ECMP, which sets outport, eth.src and -+ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 for the selected ECMP member. -+ */ -+static void -+build_ip_routing_flows_for_lrouter_port( -+ struct ovn_port *op, struct hmap *lflows) -+{ -+ if (op->nbrp) { - +- - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, - 90, ds_cstr(&match), - ds_cstr(&actions), - &nat->header_); - } else { - /* Distributed router. */ -+ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { -+ add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s, -+ op->lrp_networks.ipv4_addrs[i].network_s, -+ op->lrp_networks.ipv4_addrs[i].plen, NULL, false, -+ &op->nbrp->header_); -+ } - +- - /* Traffic received on l3dgw_port is subject to NAT. */ - ds_clear(&match); - ds_clear(&actions); @@ -8750,75 +13518,22 @@ index 5a3227568..c81e3220c 100644 - ds_put_format(&match, " && is_chassis_resident(%s)", - od->l3redirect_port->json_key); - } -+ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { -+ add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s, -+ op->lrp_networks.ipv6_addrs[i].network_s, -+ op->lrp_networks.ipv6_addrs[i].plen, NULL, false, -+ &op->nbrp->header_); -+ } -+ } -+} - +- - if (!strcmp(nat->type, "dnat_and_snat") && stateless) { - ds_put_format(&actions, "ip%s.dst=%s; next;", - is_v6 ? "6" : "4", nat->logical_ip); - } else { - ds_put_cstr(&actions, "ct_snat;"); - } -+static void -+build_static_route_flows_for_lrouter( -+ struct ovn_datapath *od, struct hmap *lflows, -+ struct hmap *ports, struct hmap *bfd_connections) -+{ -+ if (od->nbr) { -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, 150, -+ REG_ECMP_GROUP_ID" == 0", "next;"); - +- - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, - 100, - ds_cstr(&match), ds_cstr(&actions), - &nat->header_); -+ struct hmap ecmp_groups = HMAP_INITIALIZER(&ecmp_groups); -+ struct hmap unique_routes = HMAP_INITIALIZER(&unique_routes); -+ struct ovs_list parsed_routes = OVS_LIST_INITIALIZER(&parsed_routes); -+ struct ecmp_groups_node *group; -+ for (int i = 0; i < od->nbr->n_static_routes; i++) { -+ struct parsed_route *route = -+ parsed_routes_add(&parsed_routes, od->nbr->static_routes[i], -+ bfd_connections); -+ if (!route) { -+ continue; -+ } -+ group = ecmp_groups_find(&ecmp_groups, route); -+ if (group) { -+ ecmp_groups_add_route(group, route); -+ } else { -+ const struct parsed_route *existed_route = -+ unique_routes_remove(&unique_routes, route); -+ if (existed_route) { -+ group = ecmp_groups_add(&ecmp_groups, existed_route); -+ if (group) { -+ ecmp_groups_add_route(group, route); -+ } -+ } else { -+ unique_routes_add(&unique_routes, route); - } - } -+ } -+ HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) { -+ /* add a flow in IP_ROUTING, and one flow for each member in -+ * IP_ROUTING_ECMP. */ -+ build_ecmp_route_flow(lflows, od, ports, group); -+ } -+ const struct unique_routes_node *ur; -+ HMAP_FOR_EACH (ur, hmap_node, &unique_routes) { -+ build_static_route_flow(lflows, od, ports, ur->route); -+ } -+ ecmp_groups_destroy(&ecmp_groups); -+ unique_routes_destroy(&unique_routes); -+ parsed_routes_destroy(&parsed_routes); +- } +- } ++ add_rs_response_flow = true; + } -+} - /* Ingress DNAT table: Packets enter the pipeline with destination - * IP address that needs to be DNATted from a external IP address @@ -8839,15 +13554,15 @@ index 5a3227568..c81e3220c 100644 - lrouter_nat_add_ext_ip_match(od, lflows, &match, nat, - is_v6, true, mask); - } -+/* IP Multicast lookup. Here we set the output port, adjust TTL and -+ * advance to next table (priority 500). -+ */ -+static void -+build_mcast_lookup_flows_for_lrouter( -+ struct ovn_datapath *od, struct hmap *lflows, -+ struct ds *match, struct ds *actions) -+{ -+ if (od->nbr) { ++ if (add_rs_response_flow) { ++ ds_put_cstr(actions, "); next;"); ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS, ++ 50, ds_cstr(match), ds_cstr(actions), ++ &op->nbrp->header_); ++ ds_clear(actions); ++ ds_clear(match); ++ ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && " ++ "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key); - if (dnat_force_snat_ip) { - /* Indicate to the future tables that a DNAT has taken @@ -8856,14 +13571,22 @@ index 5a3227568..c81e3220c 100644 - ds_put_format(&actions, - "flags.force_snat_for_dnat = 1; "); - } -+ /* Drop IPv6 multicast traffic that shouldn't be forwarded, -+ * i.e., router solicitation and router advertisement. -+ */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550, -+ "nd_rs || nd_ra", "drop;"); -+ if (!od->mcast_info.rtr.relay) { -+ return; -+ } ++ char ip6_str[INET6_ADDRSTRLEN + 1]; ++ struct in6_addr lla; ++ in6_generate_lla(op->lrp_networks.ea, &lla); ++ memset(ip6_str, 0, sizeof(ip6_str)); ++ ipv6_string_mapped(ip6_str, &lla); ++ ds_put_format(actions, "eth.dst = eth.src; eth.src = %s; " ++ "ip6.dst = ip6.src; ip6.src = %s; " ++ "outport = inport; flags.loopback = 1; " ++ "output;", ++ op->lrp_networks.ea_s, ip6_str); ++ ovn_lflow_add_with_hint(lflows, op->od, ++ S_ROUTER_IN_ND_RA_RESPONSE, 50, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbrp->header_); ++ } ++} - if (!strcmp(nat->type, "dnat_and_snat") && stateless) { - ds_put_format(&actions, "flags.loopback = 1; " @@ -8872,7 +13595,16 @@ index 5a3227568..c81e3220c 100644 - } else { - ds_put_format(&actions, "flags.loopback = 1; " - "ct_dnat(%s", nat->logical_ip); -+ struct ovn_igmp_group *igmp_group; ++/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: RS ++ * responder, by default goto next. (priority 0). */ ++static void ++build_ND_RA_flows_for_lrouter(struct ovn_datapath *od, struct hmap *lflows) ++{ ++ if (od->nbr) { ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;"); ++ } ++} - if (nat->external_port_range[0]) { - ds_put_format(&actions, ",%s", @@ -8880,52 +13612,39 @@ index 5a3227568..c81e3220c 100644 - } - ds_put_format(&actions, ");"); - } -+ LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) { -+ ds_clear(match); -+ ds_clear(actions); -+ if (IN6_IS_ADDR_V4MAPPED(&igmp_group->address)) { -+ ds_put_format(match, "ip4 && ip4.dst == %s ", -+ igmp_group->mcgroup.name); -+ } else { -+ ds_put_format(match, "ip6 && ip6.dst == %s ", -+ igmp_group->mcgroup.name); -+ } -+ if (od->mcast_info.rtr.flood_static) { -+ ds_put_cstr(actions, -+ "clone { " -+ "outport = \""MC_STATIC"\"; " -+ "ip.ttl--; " -+ "next; " -+ "};"); -+ } -+ ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;", -+ igmp_group->mcgroup.name); -+ ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 500, -+ ds_cstr(match), ds_cstr(actions)); -+ } ++/* Logical router ingress table IP_ROUTING : IP Routing. ++ * ++ * A packet that arrives at this table is an IP packet that should be ++ * routed to the address in 'ip[46].dst'. ++ * ++ * For regular routes without ECMP, table IP_ROUTING sets outport to the ++ * correct output port, eth.src to the output port's MAC address, and ++ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address ++ * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and ++ * advances to the next table. ++ * ++ * For ECMP routes, i.e. multiple routes with same policy and prefix, table ++ * IP_ROUTING remembers ECMP group id and selects a member id, and advances ++ * to table IP_ROUTING_ECMP, which sets outport, eth.src and ++ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 for the selected ECMP member. ++ */ ++static void ++build_ip_routing_flows_for_lrouter_port( ++ struct ovn_port *op, struct hmap *lflows) ++{ ++ if (op->nbrp) { - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100, - ds_cstr(&match), ds_cstr(&actions), - &nat->header_); - } else { - /* Distributed router. */ -+ /* If needed, flood unregistered multicast on statically configured -+ * ports. Otherwise drop any multicast traffic. -+ */ -+ if (od->mcast_info.rtr.flood_static) { -+ ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 450, -+ "ip4.mcast || ip6.mcast", -+ "clone { " -+ "outport = \""MC_STATIC"\"; " -+ "ip.ttl--; " -+ "next; " -+ "};"); -+ } else { -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450, -+ "ip4.mcast || ip6.mcast", "drop;"); ++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { ++ add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s, ++ op->lrp_networks.ipv4_addrs[i].network_s, ++ op->lrp_networks.ipv4_addrs[i].plen, NULL, false, ++ &op->nbrp->header_); + } -+ } -+} - /* Traffic received on l3dgw_port is subject to NAT. */ - ds_clear(&match); @@ -8945,27 +13664,14 @@ index 5a3227568..c81e3220c 100644 - lrouter_nat_add_ext_ip_match(od, lflows, &match, nat, - is_v6, true, mask); - } -+/* Logical router ingress table POLICY: Policy. -+ * -+ * A packet that arrives at this table is an IP packet that should be -+ * permitted/denied/rerouted to the address in the rule's nexthop. -+ * This table sets outport to the correct out_port, -+ * eth.src to the output port's MAC address, -+ * and REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address -+ * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and -+ * advances to the next table for ARP/ND resolution. */ -+static void -+build_ingress_policy_flows_for_lrouter( -+ struct ovn_datapath *od, struct hmap *lflows, -+ struct hmap *ports) -+{ -+ if (od->nbr) { -+ /* This is a catch-all rule. It has the lowest priority (0) -+ * does a match-all("1") and pass-through (next) */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1", -+ REG_ECMP_GROUP_ID" = 0; next;"); -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY_ECMP, 150, -+ REG_ECMP_GROUP_ID" == 0", "next;"); ++ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { ++ add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s, ++ op->lrp_networks.ipv6_addrs[i].network_s, ++ op->lrp_networks.ipv6_addrs[i].plen, NULL, false, ++ &op->nbrp->header_); ++ } ++ } ++} - if (!strcmp(nat->type, "dnat_and_snat") && stateless) { - ds_put_format(&actions, "ip%s.dst=%s; next;", @@ -8978,30 +13684,31 @@ index 5a3227568..c81e3220c 100644 - } - ds_put_format(&actions, ");"); - } -+ /* Convert routing policies to flows. */ -+ uint16_t ecmp_group_id = 1; -+ for (int i = 0; i < od->nbr->n_policies; i++) { -+ const struct nbrec_logical_router_policy *rule -+ = od->nbr->policies[i]; -+ bool is_ecmp_reroute = -+ (!strcmp(rule->action, "reroute") && rule->n_nexthops > 1); ++static void ++build_static_route_flows_for_lrouter( ++ struct ovn_datapath *od, struct hmap *lflows, ++ struct hmap *ports, struct hmap *bfd_connections) ++{ ++ if (od->nbr) { ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, 150, ++ REG_ECMP_GROUP_ID" == 0", "next;"); - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100, - ds_cstr(&match), ds_cstr(&actions), - &nat->header_); - } -+ if (is_ecmp_reroute) { -+ build_ecmp_routing_policy_flows(lflows, od, ports, rule, -+ ecmp_group_id); -+ ecmp_group_id++; -+ } else { -+ build_routing_policy_flow(lflows, od, ports, rule, -+ &rule->header_); ++ struct hmap ecmp_groups = HMAP_INITIALIZER(&ecmp_groups); ++ struct hmap unique_routes = HMAP_INITIALIZER(&unique_routes); ++ struct ovs_list parsed_routes = OVS_LIST_INITIALIZER(&parsed_routes); ++ struct ecmp_groups_node *group; ++ for (int i = 0; i < od->nbr->n_static_routes; i++) { ++ struct parsed_route *route = ++ parsed_routes_add(&parsed_routes, od->nbr->static_routes[i], ++ bfd_connections); ++ if (!route) { ++ continue; } -+ } -+ } -+} - +- - /* ARP resolve for NAT IPs. */ - if (od->l3dgw_port) { - if (!strcmp(nat->type, "snat")) { @@ -9014,17 +13721,7 @@ index 5a3227568..c81e3220c 100644 - 120, ds_cstr(&match), "next;", - &nat->header_); - } -+/* Local router ingress table ARP_RESOLVE: ARP Resolution. */ -+static void -+build_arp_resolve_flows_for_lrouter( -+ struct ovn_datapath *od, struct hmap *lflows) -+{ -+ if (od->nbr) { -+ /* Multicast packets already have the outport set so just advance to -+ * next table (priority 500). */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 500, -+ "ip4.mcast || ip6.mcast", "next;"); - +- - if (!sset_contains(&nat_entries, nat->external_ip)) { - ds_clear(&match); - ds_put_format( @@ -9044,15 +13741,16 @@ index 5a3227568..c81e3220c 100644 - &nat->header_); - sset_add(&nat_entries, nat->external_ip); - } -- } else { ++ group = ecmp_groups_find(&ecmp_groups, route); ++ if (group) { ++ ecmp_groups_add_route(group, route); + } else { - /* Add the NAT external_ip to the nat_entries even for - * gateway routers. This is required for adding load balancer - * flows.*/ - sset_add(&nat_entries, nat->external_ip); - } -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4", -+ "get_arp(outport, " REG_NEXT_HOP_IPV4 "); next;"); - +- - /* Egress UNDNAT table: It is for already established connections' - * reverse traffic. i.e., DNAT has already been done in ingress - * pipeline and now the packet has entered the egress pipeline as @@ -9078,59 +13776,21 @@ index 5a3227568..c81e3220c 100644 - ds_clear(&actions); - if (distributed) { - ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ", -- ETH_ADDR_ARGS(mac)); -- } -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6", -+ "get_nd(outport, " REG_NEXT_HOP_IPV6 "); next;"); -+ } -+} - +- ETH_ADDR_ARGS(mac)); +- } +- - if (!strcmp(nat->type, "dnat_and_snat") && stateless) { - ds_put_format(&actions, "ip%s.src=%s; next;", - is_v6 ? "6" : "4", nat->external_ip); - } else { - ds_put_format(&actions, "ct_dnat;"); - } -+/* Local router ingress table ARP_RESOLVE: ARP Resolution. -+ * -+ * Any unicast packet that reaches this table is an IP packet whose -+ * next-hop IP address is in REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 -+ * (ip4.dst/ipv6.dst is the final destination). -+ * This table resolves the IP address in -+ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 into an output port in outport and -+ * an Ethernet address in eth.dst. -+ */ -+static void -+build_arp_resolve_flows_for_lrouter_port( -+ struct ovn_port *op, struct hmap *lflows, -+ struct hmap *ports, -+ struct ds *match, struct ds *actions) -+{ -+ if (op->nbsp && !lsp_is_enabled(op->nbsp)) { -+ return; -+ } - +- - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 100, - ds_cstr(&match), ds_cstr(&actions), - &nat->header_); - } -+ if (op->nbrp) { -+ /* This is a logical router port. If next-hop IP address in -+ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 matches IP address of this -+ * router port, then the packet is intended to eventually be sent -+ * to this logical port. Set the destination mac address using -+ * this port's mac address. -+ * -+ * The packet is still in peer's logical pipeline. So the match -+ * should be on peer's outport. */ -+ if (op->peer && op->nbrp->peer) { -+ if (op->lrp_networks.n_ipv4_addrs) { -+ ds_clear(match); -+ ds_put_format(match, "outport == %s && " -+ REG_NEXT_HOP_IPV4 "== ", -+ op->peer->json_key); -+ op_put_v4_networks(match, op, false); - +- - /* Egress SNAT table: Packets enter the egress pipeline with - * source ip address that needs to be SNATted to a external ip - * address. */ @@ -9143,66 +13803,26 @@ index 5a3227568..c81e3220c 100644 - is_v6 ? "6" : "4", - nat->logical_ip); - ds_clear(&actions); -+ ds_clear(actions); -+ ds_put_format(actions, "eth.dst = %s; next;", -+ op->lrp_networks.ea_s); -+ ovn_lflow_add_with_hint(lflows, op->peer->od, -+ S_ROUTER_IN_ARP_RESOLVE, 100, -+ ds_cstr(match), ds_cstr(actions), -+ &op->nbrp->header_); -+ } - +- - if (allowed_ext_ips || exempted_ext_ips) { - lrouter_nat_add_ext_ip_match(od, lflows, &match, nat, - is_v6, false, mask); - } -+ if (op->lrp_networks.n_ipv6_addrs) { -+ ds_clear(match); -+ ds_put_format(match, "outport == %s && " -+ REG_NEXT_HOP_IPV6 " == ", -+ op->peer->json_key); -+ op_put_v6_networks(match, op); - +- - if (!strcmp(nat->type, "dnat_and_snat") && stateless) { - ds_put_format(&actions, "ip%s.src=%s; next;", - is_v6 ? "6" : "4", nat->external_ip); - } else { - ds_put_format(&actions, "ct_snat(%s", - nat->external_ip); -+ ds_clear(actions); -+ ds_put_format(actions, "eth.dst = %s; next;", -+ op->lrp_networks.ea_s); -+ ovn_lflow_add_with_hint(lflows, op->peer->od, -+ S_ROUTER_IN_ARP_RESOLVE, 100, -+ ds_cstr(match), ds_cstr(actions), -+ &op->nbrp->header_); -+ } -+ } - +- - if (nat->external_port_range[0]) { - ds_put_format(&actions, ",%s", - nat->external_port_range); - } - ds_put_format(&actions, ");"); - } -+ if (!op->derived && op->od->l3redirect_port) { -+ const char *redirect_type = smap_get(&op->nbrp->options, -+ "redirect-type"); -+ if (redirect_type && !strcasecmp(redirect_type, "bridged")) { -+ /* Packet is on a non gateway chassis and -+ * has an unresolved ARP on a network behind gateway -+ * chassis attached router port. Since, redirect type -+ * is "bridged", instead of calling "get_arp" -+ * on this node, we will redirect the packet to gateway -+ * chassis, by setting destination mac router port mac.*/ -+ ds_clear(match); -+ ds_put_format(match, "outport == %s && " -+ "!is_chassis_resident(%s)", op->json_key, -+ op->od->l3redirect_port->json_key); -+ ds_clear(actions); -+ ds_put_format(actions, "eth.dst = %s; next;", -+ op->lrp_networks.ea_s); - +- - /* The priority here is calculated such that the - * nat->logical_ip with the longest mask gets a higher - * priority. */ @@ -9212,13 +13832,7 @@ index 5a3227568..c81e3220c 100644 - &nat->header_); - } else { - uint16_t priority = cidr_bits + 1; -+ ovn_lflow_add_with_hint(lflows, op->od, -+ S_ROUTER_IN_ARP_RESOLVE, 50, -+ ds_cstr(match), ds_cstr(actions), -+ &op->nbrp->header_); -+ } -+ } - +- - /* Distributed router. */ - ds_clear(&match); - ds_put_format(&match, "ip && ip%s.src == %s" @@ -9234,48 +13848,17 @@ index 5a3227568..c81e3220c 100644 - od->l3redirect_port->json_key); - } - ds_clear(&actions); -+ /* Drop IP traffic destined to router owned IPs. Part of it is dropped -+ * in stage "lr_in_ip_input" but traffic that could have been unSNATed -+ * but didn't match any existing session might still end up here. -+ * -+ * Priority 1. -+ */ -+ build_lrouter_drop_own_dest(op, S_ROUTER_IN_ARP_RESOLVE, 1, true, -+ lflows); -+ } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp) -+ && strcmp(op->nbsp->type, "virtual")) { -+ /* This is a logical switch port that backs a VM or a container. -+ * Extract its addresses. For each of the address, go through all -+ * the router ports attached to the switch (to which this port -+ * connects) and if the address in question is reachable from the -+ * router port, add an ARP/ND entry in that router's pipeline. */ - +- - if (allowed_ext_ips || exempted_ext_ips) { - lrouter_nat_add_ext_ip_match(od, lflows, &match, nat, - is_v6, false, mask); -+ for (size_t i = 0; i < op->n_lsp_addrs; i++) { -+ const char *ea_s = op->lsp_addrs[i].ea_s; -+ for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { -+ const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s; -+ for (size_t k = 0; k < op->od->n_router_ports; k++) { -+ /* Get the Logical_Router_Port that the -+ * Logical_Switch_Port is connected to, as -+ * 'peer'. */ -+ const char *peer_name = smap_get( -+ &op->od->router_ports[k]->nbsp->options, -+ "router-port"); -+ if (!peer_name) { -+ continue; - } - +- } +- - if (distributed) { - ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ", - ETH_ADDR_ARGS(mac)); -+ struct ovn_port *peer = ovn_port_find(ports, peer_name); -+ if (!peer || !peer->nbrp) { -+ continue; - } - +- } +- - if (!strcmp(nat->type, "dnat_and_snat") && stateless) { - ds_put_format(&actions, "ip%s.src=%s; next;", - is_v6 ? "6" : "4", nat->external_ip); @@ -9287,10 +13870,14 @@ index 5a3227568..c81e3220c 100644 - nat->external_port_range); - } - ds_put_format(&actions, ");"); -+ if (!find_lrp_member_ip(peer, ip_s)) { -+ continue; ++ const struct parsed_route *existed_route = ++ unique_routes_remove(&unique_routes, route); ++ if (existed_route) { ++ group = ecmp_groups_add(&ecmp_groups, existed_route); ++ if (group) { ++ ecmp_groups_add_route(group, route); } - +- - /* The priority here is calculated such that the - * nat->logical_ip with the longest mask gets a higher - * priority. */ @@ -9298,21 +13885,9 @@ index 5a3227568..c81e3220c 100644 - priority, ds_cstr(&match), - ds_cstr(&actions), - &nat->header_); -+ ds_clear(match); -+ ds_put_format(match, "outport == %s && " -+ REG_NEXT_HOP_IPV4 " == %s", -+ peer->json_key, ip_s); -+ -+ ds_clear(actions); -+ ds_put_format(actions, "eth.dst = %s; next;", ea_s); -+ ovn_lflow_add_with_hint(lflows, peer->od, -+ S_ROUTER_IN_ARP_RESOLVE, 100, -+ ds_cstr(match), -+ ds_cstr(actions), -+ &op->nbsp->header_); - } - } - +- } +- } +- - /* Logical router ingress table 0: - * For NAT on a distributed router, add rules allowing - * ingress traffic with eth.dst matching nat->external_mac @@ -9326,19 +13901,7 @@ index 5a3227568..c81e3220c 100644 - ds_clear(&actions); - ds_put_format(&actions, REG_INPORT_ETH_ADDR " = %s; next;", - od->l3dgw_port->lrp_networks.ea_s); -+ for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { -+ const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s; -+ for (size_t k = 0; k < op->od->n_router_ports; k++) { -+ /* Get the Logical_Router_Port that the -+ * Logical_Switch_Port is connected to, as -+ * 'peer'. */ -+ const char *peer_name = smap_get( -+ &op->od->router_ports[k]->nbsp->options, -+ "router-port"); -+ if (!peer_name) { -+ continue; -+ } - +- - ds_clear(&match); - ds_put_format(&match, - "eth.dst == "ETH_ADDR_FMT" && inport == %s" @@ -9350,11 +13913,7 @@ index 5a3227568..c81e3220c 100644 - ds_cstr(&match), ds_cstr(&actions), - &nat->header_); - } -+ struct ovn_port *peer = ovn_port_find(ports, peer_name); -+ if (!peer || !peer->nbrp) { -+ continue; -+ } - +- - /* Ingress Gateway Redirect Table: For NAT on a distributed - * router, add flows that are specific to a NAT rule. These - * flows indicate the presence of an applicable NAT rule that @@ -9380,10 +13939,7 @@ index 5a3227568..c81e3220c 100644 - 100, ds_cstr(&match), - ds_cstr(&actions), &nat->header_); - } -+ if (!find_lrp_member_ip(peer, ip_s)) { -+ continue; -+ } - +- - /* Egress Loopback table: For NAT on a distributed router. - * If packets in the egress pipeline on the distributed - * gateway port have ip.dst matching a NAT external IP, then @@ -9399,15 +13955,11 @@ index 5a3227568..c81e3220c 100644 - if (!distributed) { - ds_put_format(&match, " && is_chassis_resident(%s)", - od->l3redirect_port->json_key); -- } else { + } else { - ds_put_format(&match, " && is_chassis_resident(\"%s\")", - nat->logical_port); - } -+ ds_clear(match); -+ ds_put_format(match, "outport == %s && " -+ REG_NEXT_HOP_IPV6 " == %s", -+ peer->json_key, ip_s); - +- - ds_clear(&actions); - ds_put_format(&actions, - "clone { ct_clear; " @@ -9415,33 +13967,16 @@ index 5a3227568..c81e3220c 100644 - "flags = 0; flags.loopback = 1; "); - for (int j = 0; j < MFF_N_LOG_REGS; j++) { - ds_put_format(&actions, "reg%d = 0; ", j); -+ ds_clear(actions); -+ ds_put_format(actions, "eth.dst = %s; next;", ea_s); -+ ovn_lflow_add_with_hint(lflows, peer->od, -+ S_ROUTER_IN_ARP_RESOLVE, 100, -+ ds_cstr(match), -+ ds_cstr(actions), -+ &op->nbsp->header_); - } +- } - ds_put_format(&actions, REGBIT_EGRESS_LOOPBACK" = 1; " - "next(pipeline=ingress, table=%d); };", - ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100, - ds_cstr(&match), ds_cstr(&actions), - &nat->header_); - } - } -+ } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp) -+ && !strcmp(op->nbsp->type, "virtual")) { -+ /* This is a virtual port. Add ARP replies for the virtual ip with -+ * the mac of the present active virtual parent. -+ * If the logical port doesn't have virtual parent set in -+ * Port_Binding table, then add the flow to set eth.dst to -+ * 00:00:00:00:00:00 and advance to next table so that ARP is -+ * resolved by router pipeline using the arp{} action. -+ * The MAC_Binding entry for the virtual ip might be invalid. */ -+ ovs_be32 ip; - +- } +- } +- - /* Handle force SNAT options set in the gateway router. */ - if (!od->l3dgw_port) { - if (dnat_force_snat_ip) { @@ -9464,8 +13999,9 @@ index 5a3227568..c81e3220c 100644 - if (od->lb_force_snat_addrs.n_ipv6_addrs) { - build_lrouter_force_snat_flows(lflows, od, "6", - od->lb_force_snat_addrs.ipv6_addrs[0].addr_s, "lb"); -- } -- } ++ unique_routes_add(&unique_routes, route); + } + } - - /* For gateway router, re-circulate every packet through - * the DNAT zone. This helps with the following. @@ -9478,73 +14014,35 @@ index 5a3227568..c81e3220c 100644 - * we can do it here, saving a future re-circulation. */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50, - "ip", "flags.loopback = 1; ct_dnat;"); -+ const char *vip = smap_get(&op->nbsp->options, -+ "virtual-ip"); -+ const char *virtual_parents = smap_get(&op->nbsp->options, -+ "virtual-parents"); -+ if (!vip || !virtual_parents || -+ !ip_parse(vip, &ip) || !op->sb) { -+ return; } - +- - /* Load balancing and packet defrag are only valid on - * Gateway routers or router with gateway port. */ - if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { - sset_destroy(&nat_entries); - continue; -- } -+ if (!op->sb->virtual_parent || !op->sb->virtual_parent[0] || -+ !op->sb->chassis) { -+ /* The virtual port is not claimed yet. */ -+ for (size_t i = 0; i < op->od->n_router_ports; i++) { -+ const char *peer_name = smap_get( -+ &op->od->router_ports[i]->nbsp->options, -+ "router-port"); -+ if (!peer_name) { -+ continue; -+ } - ++ HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) { ++ /* add a flow in IP_ROUTING, and one flow for each member in ++ * IP_ROUTING_ECMP. */ ++ build_ecmp_route_flow(lflows, od, ports, group); + } +- - /* A set to hold all ips that need defragmentation and tracking. */ - struct sset all_ips = SSET_INITIALIZER(&all_ips); -+ struct ovn_port *peer = ovn_port_find(ports, peer_name); -+ if (!peer || !peer->nbrp) { -+ continue; -+ } - +- - for (int i = 0; i < od->nbr->n_load_balancer; i++) { - struct nbrec_load_balancer *nb_lb = od->nbr->load_balancer[i]; - struct ovn_northd_lb *lb = - ovn_northd_lb_find(lbs, &nb_lb->header_.uuid); - ovs_assert(lb); -+ if (find_lrp_member_ip(peer, vip)) { -+ ds_clear(match); -+ ds_put_format(match, "outport == %s && " -+ REG_NEXT_HOP_IPV4 " == %s", -+ peer->json_key, vip); - +- - for (size_t j = 0; j < lb->n_vips; j++) { - struct ovn_lb_vip *lb_vip = &lb->vips[j]; - struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[j]; - ds_clear(&actions); - build_lb_vip_ct_lb_actions(lb_vip, lb_vip_nb, &actions, - lb->selection_fields); -+ const char *arp_actions = -+ "eth.dst = 00:00:00:00:00:00; next;"; -+ ovn_lflow_add_with_hint(lflows, peer->od, -+ S_ROUTER_IN_ARP_RESOLVE, 100, -+ ds_cstr(match), -+ arp_actions, -+ &op->nbsp->header_); -+ break; -+ } -+ } -+ } else { -+ struct ovn_port *vp = -+ ovn_port_find(ports, op->sb->virtual_parent); -+ if (!vp || !vp->nbsp) { -+ return; -+ } - +- - if (!sset_contains(&all_ips, lb_vip->vip_str)) { - sset_add(&all_ips, lb_vip->vip_str); - /* If there are any load balancing rules, we should send @@ -9562,24 +14060,12 @@ index 5a3227568..c81e3220c 100644 - } else { - ds_put_format(&match, "ip && ip6.dst == %s", - lb_vip->vip_str); -+ for (size_t i = 0; i < vp->n_lsp_addrs; i++) { -+ bool found_vip_network = false; -+ const char *ea_s = vp->lsp_addrs[i].ea_s; -+ for (size_t j = 0; j < vp->od->n_router_ports; j++) { -+ /* Get the Logical_Router_Port that the -+ * Logical_Switch_Port is connected to, as -+ * 'peer'. */ -+ const char *peer_name = smap_get( -+ &vp->od->router_ports[j]->nbsp->options, -+ "router-port"); -+ if (!peer_name) { -+ continue; - } +- } - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, - 100, ds_cstr(&match), "ct_next;", - &nb_lb->header_); - } - +- - /* Higher priority rules are added for load-balancing in DNAT - * table. For every match (on a VIP[:port]), we add two flows - * via add_router_lb_flow(). One flow is for specific matching @@ -9593,80 +14079,61 @@ index 5a3227568..c81e3220c 100644 - ds_put_format(&match, "ip && ip6.dst == %s", - lb_vip->vip_str); - } -+ struct ovn_port *peer = -+ ovn_port_find(ports, peer_name); -+ if (!peer || !peer->nbrp) { -+ continue; -+ } - +- - int prio = 110; - bool is_udp = nullable_string_is_equal(nb_lb->protocol, "udp"); - bool is_sctp = nullable_string_is_equal(nb_lb->protocol, - "sctp"); - const char *proto = is_udp ? "udp" : is_sctp ? "sctp" : "tcp"; -+ if (!find_lrp_member_ip(peer, vip)) { -+ continue; -+ } - +- - if (lb_vip->vip_port) { - ds_put_format(&match, " && %s && %s.dst == %d", proto, - proto, lb_vip->vip_port); - prio = 120; -+ ds_clear(match); -+ ds_put_format(match, "outport == %s && " -+ REG_NEXT_HOP_IPV4 " == %s", -+ peer->json_key, vip); -+ -+ ds_clear(actions); -+ ds_put_format(actions, "eth.dst = %s; next;", ea_s); -+ ovn_lflow_add_with_hint(lflows, peer->od, -+ S_ROUTER_IN_ARP_RESOLVE, 100, -+ ds_cstr(match), -+ ds_cstr(actions), -+ &op->nbsp->header_); -+ found_vip_network = true; -+ break; - } - +- } +- - if (od->l3redirect_port) { - ds_put_format(&match, " && is_chassis_resident(%s)", - od->l3redirect_port->json_key); -+ if (found_vip_network) { -+ break; - } +- } - add_router_lb_flow(lflows, od, &match, &actions, prio, - lb_force_snat_ip, lb_vip, proto, - nb_lb, meter_groups, &nat_entries); - } +- } ++ const struct unique_routes_node *ur; ++ HMAP_FOR_EACH (ur, hmap_node, &unique_routes) { ++ build_static_route_flow(lflows, od, ports, ur->route); } - sset_destroy(&all_ips); - sset_destroy(&nat_entries); -- } ++ ecmp_groups_destroy(&ecmp_groups); ++ unique_routes_destroy(&unique_routes); ++ parsed_routes_destroy(&parsed_routes); + } - - ds_destroy(&match); - ds_destroy(&actions); --} -+ } else if (lsp_is_router(op->nbsp)) { -+ /* This is a logical switch port that connects to a router. */ + } -/* Logical router ingress Table 0: L2 Admission Control - * Generic admission control flows (without inport check). -- */ --static void ++/* IP Multicast lookup. Here we set the output port, adjust TTL and ++ * advance to next table (priority 500). + */ + static void -build_adm_ctrl_flows_for_lrouter( - struct ovn_datapath *od, struct hmap *lflows) --{ -- if (od->nbr) { ++build_mcast_lookup_flows_for_lrouter( ++ struct ovn_datapath *od, struct hmap *lflows, ++ struct ds *match, struct ds *actions) + { + if (od->nbr) { - /* Logical VLANs not supported. - * Broadcast/multicast source address is invalid. */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100, - "vlan.present || eth.src[40]", "drop;"); - } -} -+ /* The peer of this switch port is the router port for which -+ * we need to add logical flows such that it can resolve -+ * ARP entries for all the other router ports connected to -+ * the switch in question. */ -/* Logical router ingress Table 0: L2 Admission Control - * This table drops packets that the router shouldn’t see at all based @@ -9681,24 +14148,51 @@ index 5a3227568..c81e3220c 100644 - if (!lrport_is_enabled(op->nbrp)) { - /* Drop packets from disabled logical ports (since logical flow - * tables are default-drop). */ -+ const char *peer_name = smap_get(&op->nbsp->options, -+ "router-port"); -+ if (!peer_name) { ++ /* Drop IPv6 multicast traffic that shouldn't be forwarded, ++ * i.e., router solicitation and router advertisement. ++ */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550, ++ "nd_rs || nd_ra", "drop;"); ++ if (!od->mcast_info.rtr.relay) { return; } - if (op->derived) { - /* No ingress packets should be received on a chassisredirect - * port. */ -+ struct ovn_port *peer = ovn_port_find(ports, peer_name); -+ if (!peer || !peer->nbrp) { - return; +- return; ++ struct ovn_igmp_group *igmp_group; ++ ++ LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) { ++ ds_clear(match); ++ ds_clear(actions); ++ if (IN6_IS_ADDR_V4MAPPED(&igmp_group->address)) { ++ ds_put_format(match, "ip4 && ip4.dst == %s ", ++ igmp_group->mcgroup.name); ++ } else { ++ ds_put_format(match, "ip6 && ip6.dst == %s ", ++ igmp_group->mcgroup.name); ++ } ++ if (od->mcast_info.rtr.flood_static) { ++ ds_put_cstr(actions, ++ "clone { " ++ "outport = \""MC_STATIC"\"; " ++ "ip.ttl--; " ++ "next; " ++ "};"); ++ } ++ ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;", ++ igmp_group->mcgroup.name); ++ ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 500, ++ ds_cstr(match), ds_cstr(actions)); } - /* Store the ethernet address of the port receiving the packet. - * This will save us from having to match on inport further down in - * the pipeline. -- */ ++ /* If needed, flood unregistered multicast on statically configured ++ * ports. Otherwise drop any multicast traffic. + */ - ds_clear(actions); - ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;", - op->lrp_networks.ea_s); @@ -9718,27 +14212,51 @@ index 5a3227568..c81e3220c 100644 - * should only be received on the gateway chassis. */ - ds_put_format(match, " && is_chassis_resident(%s)", - op->od->l3redirect_port->json_key); -+ if (peer->od->nbr && -+ smap_get_bool(&peer->od->nbr->options, -+ "dynamic_neigh_routers", false)) { -+ return; ++ if (od->mcast_info.rtr.flood_static) { ++ ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 450, ++ "ip4.mcast || ip6.mcast", ++ "clone { " ++ "outport = \""MC_STATIC"\"; " ++ "ip.ttl--; " ++ "next; " ++ "};"); ++ } else { ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450, ++ "ip4.mcast || ip6.mcast", "drop;"); } - ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50, - ds_cstr(match), ds_cstr(actions), - &op->nbrp->header_); -- } --} -- + } + } + - -/* Logical router ingress Table 1 and 2: Neighbor lookup and learning - * lflows for logical routers. */ --static void ++/* Logical router ingress table POLICY: Policy. ++ * ++ * A packet that arrives at this table is an IP packet that should be ++ * permitted/denied/rerouted to the address in the rule's nexthop. ++ * This table sets outport to the correct out_port, ++ * eth.src to the output port's MAC address, ++ * and REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address ++ * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and ++ * advances to the next table for ARP/ND resolution. */ + static void -build_neigh_learning_flows_for_lrouter( -- struct ovn_datapath *od, struct hmap *lflows, ++build_ingress_policy_flows_for_lrouter( + struct ovn_datapath *od, struct hmap *lflows, - struct ds *match, struct ds *actions) --{ -- if (od->nbr) { -- ++ struct hmap *ports) + { + if (od->nbr) { ++ /* This is a catch-all rule. It has the lowest priority (0) ++ * does a match-all("1") and pass-through (next) */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1", ++ REG_ECMP_GROUP_ID" = 0; next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY_ECMP, 150, ++ REG_ECMP_GROUP_ID" == 0", "next;"); + - /* Learn MAC bindings from ARP/IPv6 ND. - * - * For ARP packets, table LOOKUP_NEIGHBOR does a lookup for the @@ -9805,7 +14323,14 @@ index 5a3227568..c81e3220c 100644 - * So set REGBIT_LOOKUP_NEIGHBOR_RESULT to 1. */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 0, "1", - REGBIT_LOOKUP_NEIGHBOR_RESULT" = 1; next;"); -- ++ /* Convert routing policies to flows. */ ++ uint16_t ecmp_group_id = 1; ++ for (int i = 0; i < od->nbr->n_policies; i++) { ++ const struct nbrec_logical_router_policy *rule ++ = od->nbr->policies[i]; ++ bool is_ecmp_reroute = ++ (!strcmp(rule->action, "reroute") && rule->n_nexthops > 1); + - /* Flows for LEARN_NEIGHBOR. */ - /* Skip Neighbor learning if not required. */ - ds_clear(match); @@ -9814,56 +14339,92 @@ index 5a3227568..c81e3220c 100644 - " || "REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" == 0"); - ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 100, - ds_cstr(match), "next;"); -- ++ if (is_ecmp_reroute) { ++ build_ecmp_routing_policy_flows(lflows, od, ports, rule, ++ ecmp_group_id); ++ ecmp_group_id++; ++ } else { ++ build_routing_policy_flow(lflows, od, ports, rule, ++ &rule->header_); ++ } ++ } ++ } ++} + - ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90, - "arp", "put_arp(inport, arp.spa, arp.sha); next;"); ++/* Local router ingress table ARP_RESOLVE: ARP Resolution. */ ++static void ++build_arp_resolve_flows_for_lrouter( ++ struct ovn_datapath *od, struct hmap *lflows) ++{ ++ if (od->nbr) { ++ /* Multicast packets already have the outport set so just advance to ++ * next table (priority 500). */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 500, ++ "ip4.mcast || ip6.mcast", "next;"); - ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90, - "nd_na", "put_nd(inport, nd.target, nd.tll); next;"); -- ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4", ++ "get_arp(outport, " REG_NEXT_HOP_IPV4 "); next;"); + - ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90, - "nd_ns", "put_nd(inport, ip6.src, nd.sll); next;"); -- } -+ for (size_t i = 0; i < op->od->n_router_ports; i++) { -+ const char *router_port_name = smap_get( -+ &op->od->router_ports[i]->nbsp->options, -+ "router-port"); -+ struct ovn_port *router_port = ovn_port_find(ports, -+ router_port_name); -+ if (!router_port || !router_port->nbrp) { -+ continue; -+ } - --} -+ /* Skip the router port under consideration. */ -+ if (router_port == peer) { -+ continue; -+ } ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6", ++ "get_nd(outport, " REG_NEXT_HOP_IPV6 "); next;"); + } +- + } -/* Logical router ingress Table 1: Neighbor lookup lflows - * for logical router ports. */ --static void ++/* Local router ingress table ARP_RESOLVE: ARP Resolution. ++ * ++ * Any unicast packet that reaches this table is an IP packet whose ++ * next-hop IP address is in REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 ++ * (ip4.dst/ipv6.dst is the final destination). ++ * This table resolves the IP address in ++ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 into an output port in outport and ++ * an Ethernet address in eth.dst. ++ */ + static void -build_neigh_learning_flows_for_lrouter_port( -- struct ovn_port *op, struct hmap *lflows, -- struct ds *match, struct ds *actions) --{ -- if (op->nbrp) { -+ if (router_port->lrp_networks.n_ipv4_addrs) { ++build_arp_resolve_flows_for_lrouter_port( + struct ovn_port *op, struct hmap *lflows, ++ struct hmap *ports, + struct ds *match, struct ds *actions) + { ++ if (op->nbsp && !lsp_is_enabled(op->nbsp)) { ++ return; ++ } ++ + if (op->nbrp) { ++ /* This is a logical router port. If next-hop IP address in ++ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 matches IP address of this ++ * router port, then the packet is intended to eventually be sent ++ * to this logical port. Set the destination mac address using ++ * this port's mac address. ++ * ++ * The packet is still in peer's logical pipeline. So the match ++ * should be on peer's outport. */ ++ if (op->peer && op->nbrp->peer) { ++ if (op->lrp_networks.n_ipv4_addrs) { + ds_clear(match); + ds_put_format(match, "outport == %s && " -+ REG_NEXT_HOP_IPV4 " == ", -+ peer->json_key); -+ op_put_v4_networks(match, router_port, false); ++ REG_NEXT_HOP_IPV4 "== ", ++ op->peer->json_key); ++ op_put_v4_networks(match, op, false); - bool learn_from_arp_request = smap_get_bool(&op->od->nbr->options, - "always_learn_from_arp_request", true); + ds_clear(actions); + ds_put_format(actions, "eth.dst = %s; next;", -+ router_port->lrp_networks.ea_s); -+ ovn_lflow_add_with_hint(lflows, peer->od, ++ op->lrp_networks.ea_s); ++ ovn_lflow_add_with_hint(lflows, op->peer->od, + S_ROUTER_IN_ARP_RESOLVE, 100, + ds_cstr(match), ds_cstr(actions), -+ &op->nbsp->header_); ++ &op->nbrp->header_); + } - /* Check if we need to learn mac-binding from ARP requests. */ @@ -9872,7 +14433,7 @@ index 5a3227568..c81e3220c 100644 - /* ARP request to this address should always get learned, - * so add a priority-110 flow to set - * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT to 1. */ -+ if (router_port->lrp_networks.n_ipv6_addrs) { ++ if (op->lrp_networks.n_ipv6_addrs) { ds_clear(match); - ds_put_format(match, - "inport == %s && arp.spa == %s/%u && " @@ -9893,8 +14454,19 @@ index 5a3227568..c81e3220c 100644 - ovn_lflow_add_with_hint(lflows, op->od, - S_ROUTER_IN_LOOKUP_NEIGHBOR, 110, - ds_cstr(match), actions_s, -- &op->nbrp->header_); -- } ++ ds_put_format(match, "outport == %s && " ++ REG_NEXT_HOP_IPV6 " == ", ++ op->peer->json_key); ++ op_put_v6_networks(match, op); ++ ++ ds_clear(actions); ++ ds_put_format(actions, "eth.dst = %s; next;", ++ op->lrp_networks.ea_s); ++ ovn_lflow_add_with_hint(lflows, op->peer->od, ++ S_ROUTER_IN_ARP_RESOLVE, 100, ++ ds_cstr(match), ds_cstr(actions), + &op->nbrp->header_); + } - ds_clear(match); - ds_put_format(match, - "inport == %s && arp.spa == %s/%u && arp.op == 1", @@ -9904,19 +14476,30 @@ index 5a3227568..c81e3220c 100644 - if (op->od->l3dgw_port && op == op->od->l3dgw_port - && op->od->l3redirect_port) { - ds_put_format(match, " && is_chassis_resident(%s)", -- op->od->l3redirect_port->json_key); -+ ds_put_format(match, "outport == %s && " -+ REG_NEXT_HOP_IPV6 " == ", -+ peer->json_key); -+ op_put_v6_networks(match, router_port); ++ } + ++ if (!op->derived && op->od->l3redirect_port) { ++ const char *redirect_type = smap_get(&op->nbrp->options, ++ "redirect-type"); ++ if (redirect_type && !strcasecmp(redirect_type, "bridged")) { ++ /* Packet is on a non gateway chassis and ++ * has an unresolved ARP on a network behind gateway ++ * chassis attached router port. Since, redirect type ++ * is "bridged", instead of calling "get_arp" ++ * on this node, we will redirect the packet to gateway ++ * chassis, by setting destination mac router port mac.*/ ++ ds_clear(match); ++ ds_put_format(match, "outport == %s && " ++ "!is_chassis_resident(%s)", op->json_key, + op->od->l3redirect_port->json_key); + ds_clear(actions); + ds_put_format(actions, "eth.dst = %s; next;", -+ router_port->lrp_networks.ea_s); -+ ovn_lflow_add_with_hint(lflows, peer->od, -+ S_ROUTER_IN_ARP_RESOLVE, 100, ++ op->lrp_networks.ea_s); ++ ++ ovn_lflow_add_with_hint(lflows, op->od, ++ S_ROUTER_IN_ARP_RESOLVE, 50, + ds_cstr(match), ds_cstr(actions), -+ &op->nbsp->header_); ++ &op->nbrp->header_); } - ds_clear(actions); - ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT @@ -9929,44 +14512,60 @@ index 5a3227568..c81e3220c 100644 - ds_cstr(match), ds_cstr(actions), - &op->nbrp->header_); } - } -+ - } +- } +-} -/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: IPv6 Router - * Adv (RA) options and response. */ -+/* Local router ingress table CHK_PKT_LEN: Check packet length. -+ * -+ * Any IPv4 packet with outport set to the distributed gateway -+ * router port, check the packet length and store the result in the -+ * 'REGBIT_PKT_LARGER' register bit. -+ * -+ * Local router ingress table LARGER_PKTS: Handle larger packets. -+ * -+ * Any IPv4 packet with outport set to the distributed gateway -+ * router port and the 'REGBIT_PKT_LARGER' register bit is set, -+ * generate ICMPv4 packet with type 3 (Destination Unreachable) and -+ * code 4 (Fragmentation needed). -+ * */ - static void +-static void -build_ND_RA_flows_for_lrouter_port( - struct ovn_port *op, struct hmap *lflows, -+build_check_pkt_len_flows_for_lrouter( -+ struct ovn_datapath *od, struct hmap *lflows, -+ struct hmap *ports, - struct ds *match, struct ds *actions) - { +- struct ds *match, struct ds *actions) +-{ - if (!op->nbrp || op->nbrp->peer || !op->peer) { - return; - } -- ++ /* Drop IP traffic destined to router owned IPs. Part of it is dropped ++ * in stage "lr_in_ip_input" but traffic that could have been unSNATed ++ * but didn't match any existing session might still end up here. ++ * ++ * Priority 1. ++ */ ++ build_lrouter_drop_own_dest(op, S_ROUTER_IN_ARP_RESOLVE, 1, true, ++ lflows); ++ } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp) ++ && strcmp(op->nbsp->type, "virtual")) { ++ /* This is a logical switch port that backs a VM or a container. ++ * Extract its addresses. For each of the address, go through all ++ * the router ports attached to the switch (to which this port ++ * connects) and if the address in question is reachable from the ++ * router port, add an ARP/ND entry in that router's pipeline. */ + - if (!op->lrp_networks.n_ipv6_addrs) { - return; - } -- ++ for (size_t i = 0; i < op->n_lsp_addrs; i++) { ++ const char *ea_s = op->lsp_addrs[i].ea_s; ++ for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { ++ const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s; ++ for (size_t k = 0; k < op->od->n_router_ports; k++) { ++ /* Get the Logical_Router_Port that the ++ * Logical_Switch_Port is connected to, as ++ * 'peer'. */ ++ const char *peer_name = smap_get( ++ &op->od->router_ports[k]->nbsp->options, ++ "router-port"); ++ if (!peer_name) { ++ continue; ++ } + - struct smap options; - smap_clone(&options, &op->sb->options); -- ++ struct ovn_port *peer = ovn_port_find(ports, peer_name); ++ if (!peer || !peer->nbrp) { ++ continue; ++ } + - /* enable IPv6 prefix delegation */ - bool prefix_delegation = smap_get_bool(&op->nbrp->options, - "prefix_delegation", false); @@ -9975,7 +14574,9 @@ index 5a3227568..c81e3220c 100644 - } - smap_add(&options, "ipv6_prefix_delegation", - prefix_delegation ? "true" : "false"); -+ if (od->nbr) { ++ if (!find_lrp_member_ip(peer, ip_s)) { ++ continue; ++ } - bool ipv6_prefix = smap_get_bool(&op->nbrp->options, - "prefix", false); @@ -9985,28 +14586,36 @@ index 5a3227568..c81e3220c 100644 - smap_add(&options, "ipv6_prefix", - ipv6_prefix ? "true" : "false"); - sbrec_port_binding_set_options(op->sb, &options); -+ /* Packets are allowed by default. */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 0, "1", -+ "next;"); -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LARGER_PKTS, 0, "1", -+ "next;"); ++ ds_clear(match); ++ ds_put_format(match, "outport == %s && " ++ REG_NEXT_HOP_IPV4 " == %s", ++ peer->json_key, ip_s); - smap_destroy(&options); -+ if (od->l3dgw_port && od->l3redirect_port) { -+ int gw_mtu = 0; -+ if (od->l3dgw_port->nbrp) { -+ gw_mtu = smap_get_int(&od->l3dgw_port->nbrp->options, -+ "gateway_mtu", 0); -+ } -+ /* Add the flows only if gateway_mtu is configured. */ -+ if (gw_mtu <= 0) { -+ return; ++ ds_clear(actions); ++ ds_put_format(actions, "eth.dst = %s; next;", ea_s); ++ ovn_lflow_add_with_hint(lflows, peer->od, ++ S_ROUTER_IN_ARP_RESOLVE, 100, ++ ds_cstr(match), ++ ds_cstr(actions), ++ &op->nbsp->header_); ++ } + } - const char *address_mode = smap_get( - &op->nbrp->ipv6_ra_configs, "address_mode"); -+ ds_clear(match); -+ ds_put_format(match, "outport == %s", od->l3dgw_port->json_key); ++ for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { ++ const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s; ++ for (size_t k = 0; k < op->od->n_router_ports; k++) { ++ /* Get the Logical_Router_Port that the ++ * Logical_Switch_Port is connected to, as ++ * 'peer'. */ ++ const char *peer_name = smap_get( ++ &op->od->router_ports[k]->nbsp->options, ++ "router-port"); ++ if (!peer_name) { ++ continue; ++ } - if (!address_mode) { - return; @@ -10019,151 +14628,142 @@ index 5a3227568..c81e3220c 100644 - address_mode); - return; - } -+ ds_clear(actions); -+ ds_put_format(actions, -+ REGBIT_PKT_LARGER" = check_pkt_larger(%d);" -+ " next;", gw_mtu + VLAN_ETH_HEADER_LEN); -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 50, -+ ds_cstr(match), ds_cstr(actions), -+ &od->l3dgw_port->nbrp->header_); ++ struct ovn_port *peer = ovn_port_find(ports, peer_name); ++ if (!peer || !peer->nbrp) { ++ continue; ++ } - if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic", -- false)) { -- copy_ra_to_sb(op, address_mode); -- } -+ for (size_t i = 0; i < od->nbr->n_ports; i++) { -+ struct ovn_port *rp = ovn_port_find(ports, -+ od->nbr->ports[i]->name); -+ if (!rp || rp == od->l3dgw_port) { -+ continue; -+ } - -- ds_clear(match); -- ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && nd_rs", -- op->json_key); -- ds_clear(actions); -+ if (rp->lrp_networks.ipv4_addrs) { -+ ds_clear(match); -+ ds_put_format(match, "inport == %s && outport == %s" -+ " && ip4 && "REGBIT_PKT_LARGER, -+ rp->json_key, od->l3dgw_port->json_key); - -- const char *mtu_s = smap_get( -- &op->nbrp->ipv6_ra_configs, "mtu"); -+ ds_clear(actions); -+ /* Set icmp4.frag_mtu to gw_mtu */ -+ ds_put_format(actions, -+ "icmp4_error {" -+ REGBIT_EGRESS_LOOPBACK" = 1; " -+ "eth.dst = %s; " -+ "ip4.dst = ip4.src; " -+ "ip4.src = %s; " -+ "ip.ttl = 255; " -+ "icmp4.type = 3; /* Destination Unreachable. */ " -+ "icmp4.code = 4; /* Frag Needed and DF was Set. */ " -+ "icmp4.frag_mtu = %d; " -+ "next(pipeline=ingress, table=%d); };", -+ rp->lrp_networks.ea_s, -+ rp->lrp_networks.ipv4_addrs[0].addr_s, -+ gw_mtu, -+ ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); -+ ovn_lflow_add_with_hint(lflows, od, -+ S_ROUTER_IN_LARGER_PKTS, 50, -+ ds_cstr(match), ds_cstr(actions), -+ &rp->nbrp->header_); +- false)) { +- copy_ra_to_sb(op, address_mode); +- } ++ if (!find_lrp_member_ip(peer, ip_s)) { ++ continue; ++ } + +- ds_clear(match); +- ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && nd_rs", +- op->json_key); +- ds_clear(actions); ++ ds_clear(match); ++ ds_put_format(match, "outport == %s && " ++ REG_NEXT_HOP_IPV6 " == %s", ++ peer->json_key, ip_s); + +- const char *mtu_s = smap_get( +- &op->nbrp->ipv6_ra_configs, "mtu"); ++ ds_clear(actions); ++ ds_put_format(actions, "eth.dst = %s; next;", ea_s); ++ ovn_lflow_add_with_hint(lflows, peer->od, ++ S_ROUTER_IN_ARP_RESOLVE, 100, ++ ds_cstr(match), ++ ds_cstr(actions), ++ &op->nbsp->header_); + } ++ } ++ } ++ } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp) ++ && !strcmp(op->nbsp->type, "virtual")) { ++ /* This is a virtual port. Add ARP replies for the virtual ip with ++ * the mac of the present active virtual parent. ++ * If the logical port doesn't have virtual parent set in ++ * Port_Binding table, then add the flow to set eth.dst to ++ * 00:00:00:00:00:00 and advance to next table so that ARP is ++ * resolved by router pipeline using the arp{} action. ++ * The MAC_Binding entry for the virtual ip might be invalid. */ ++ ovs_be32 ip; - /* As per RFC 2460, 1280 is minimum IPv6 MTU. */ - uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0; -+ if (rp->lrp_networks.ipv6_addrs) { -+ ds_clear(match); -+ ds_put_format(match, "inport == %s && outport == %s" -+ " && ip6 && "REGBIT_PKT_LARGER, -+ rp->json_key, od->l3dgw_port->json_key); ++ const char *vip = smap_get(&op->nbsp->options, ++ "virtual-ip"); ++ const char *virtual_parents = smap_get(&op->nbsp->options, ++ "virtual-parents"); ++ if (!vip || !virtual_parents || ++ !ip_parse(vip, &ip) || !op->sb) { ++ return; ++ } - ds_put_format(actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts(" - "addr_mode = \"%s\", slla = %s", - address_mode, op->lrp_networks.ea_s); - if (mtu > 0) { - ds_put_format(actions, ", mtu = %u", mtu); -+ ds_clear(actions); -+ /* Set icmp6.frag_mtu to gw_mtu */ -+ ds_put_format(actions, -+ "icmp6_error {" -+ REGBIT_EGRESS_LOOPBACK" = 1; " -+ "eth.dst = %s; " -+ "ip6.dst = ip6.src; " -+ "ip6.src = %s; " -+ "ip.ttl = 255; " -+ "icmp6.type = 2; /* Packet Too Big. */ " -+ "icmp6.code = 0; " -+ "icmp6.frag_mtu = %d; " -+ "next(pipeline=ingress, table=%d); };", -+ rp->lrp_networks.ea_s, -+ rp->lrp_networks.ipv6_addrs[0].addr_s, -+ gw_mtu, -+ ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); -+ ovn_lflow_add_with_hint(lflows, od, -+ S_ROUTER_IN_LARGER_PKTS, 50, -+ ds_cstr(match), ds_cstr(actions), -+ &rp->nbrp->header_); +- } ++ if (!op->sb->virtual_parent || !op->sb->virtual_parent[0] || ++ !op->sb->chassis) { ++ /* The virtual port is not claimed yet. */ ++ for (size_t i = 0; i < op->od->n_router_ports; i++) { ++ const char *peer_name = smap_get( ++ &op->od->router_ports[i]->nbsp->options, ++ "router-port"); ++ if (!peer_name) { ++ continue; + } -+ } -+ } - } -+} - const char *prf = smap_get_def( - &op->nbrp->ipv6_ra_configs, "router_preference", "MEDIUM"); - if (strcmp(prf, "MEDIUM")) { - ds_put_format(actions, ", router_preference = \"%s\"", prf); - } -+/* Logical router ingress table GW_REDIRECT: Gateway redirect. -+ * -+ * For traffic with outport equal to the l3dgw_port -+ * on a distributed router, this table redirects a subset -+ * of the traffic to the l3redirect_port which represents -+ * the central instance of the l3dgw_port. -+ */ -+static void -+build_gateway_redirect_flows_for_lrouter( -+ struct ovn_datapath *od, struct hmap *lflows, -+ struct ds *match, struct ds *actions) -+{ -+ if (od->nbr) { -+ if (od->l3dgw_port && od->l3redirect_port) { -+ const struct ovsdb_idl_row *stage_hint = NULL; ++ struct ovn_port *peer = ovn_port_find(ports, peer_name); ++ if (!peer || !peer->nbrp) { ++ continue; ++ } - bool add_rs_response_flow = false; -+ if (od->l3dgw_port->nbrp) { -+ stage_hint = &od->l3dgw_port->nbrp->header_; -+ } ++ if (find_lrp_member_ip(peer, vip)) { ++ ds_clear(match); ++ ds_put_format(match, "outport == %s && " ++ REG_NEXT_HOP_IPV4 " == %s", ++ peer->json_key, vip); - for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { - if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) { - continue; -+ /* For traffic with outport == l3dgw_port, if the -+ * packet did not match any higher priority redirect -+ * rule, then the traffic is redirected to the central -+ * instance of the l3dgw_port. */ -+ ds_clear(match); -+ ds_put_format(match, "outport == %s", -+ od->l3dgw_port->json_key); -+ ds_clear(actions); -+ ds_put_format(actions, "outport = %s; next;", -+ od->l3redirect_port->json_key); -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50, -+ ds_cstr(match), ds_cstr(actions), -+ stage_hint); - } +- } ++ const char *arp_actions = ++ "eth.dst = 00:00:00:00:00:00; next;"; ++ ovn_lflow_add_with_hint(lflows, peer->od, ++ S_ROUTER_IN_ARP_RESOLVE, 100, ++ ds_cstr(match), ++ arp_actions, ++ &op->nbsp->header_); ++ break; ++ } ++ } ++ } else { ++ struct ovn_port *vp = ++ ovn_port_find(ports, op->sb->virtual_parent); ++ if (!vp || !vp->nbsp) { ++ return; ++ } - ds_put_format(actions, ", prefix = %s/%u", - op->lrp_networks.ipv6_addrs[i].network_s, - op->lrp_networks.ipv6_addrs[i].plen); -- ++ for (size_t i = 0; i < vp->n_lsp_addrs; i++) { ++ bool found_vip_network = false; ++ const char *ea_s = vp->lsp_addrs[i].ea_s; ++ for (size_t j = 0; j < vp->od->n_router_ports; j++) { ++ /* Get the Logical_Router_Port that the ++ * Logical_Switch_Port is connected to, as ++ * 'peer'. */ ++ const char *peer_name = smap_get( ++ &vp->od->router_ports[j]->nbsp->options, ++ "router-port"); ++ if (!peer_name) { ++ continue; ++ } + - add_rs_response_flow = true; - } -- ++ struct ovn_port *peer = ++ ovn_port_find(ports, peer_name); ++ if (!peer || !peer->nbrp) { ++ continue; ++ } + - if (add_rs_response_flow) { - ds_put_cstr(actions, "); next;"); - ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS, @@ -10173,7 +14773,10 @@ index 5a3227568..c81e3220c 100644 - ds_clear(match); - ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && " - "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key); -- ++ if (!find_lrp_member_ip(peer, vip)) { ++ continue; ++ } + - char ip6_str[INET6_ADDRSTRLEN + 1]; - struct in6_addr lla; - in6_generate_lla(op->lrp_networks.ea, &lla); @@ -10188,84 +14791,33 @@ index 5a3227568..c81e3220c 100644 - S_ROUTER_IN_ND_RA_RESPONSE, 50, - ds_cstr(match), ds_cstr(actions), - &op->nbrp->header_); -+ /* Packets are allowed by default. */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;"); - } - } +- } +-} ++ ds_clear(match); ++ ds_put_format(match, "outport == %s && " ++ REG_NEXT_HOP_IPV4 " == %s", ++ peer->json_key, vip); -/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: RS - * responder, by default goto next. (priority 0). */ -+/* Local router ingress table ARP_REQUEST: ARP request. -+ * -+ * In the common case where the Ethernet destination has been resolved, -+ * this table outputs the packet (priority 0). Otherwise, it composes -+ * and sends an ARP/IPv6 NA request (priority 100). */ - static void +-static void -build_ND_RA_flows_for_lrouter(struct ovn_datapath *od, struct hmap *lflows) -+build_arp_request_flows_for_lrouter( -+ struct ovn_datapath *od, struct hmap *lflows, -+ struct ds *match, struct ds *actions) - { - if (od->nbr) { +-{ +- if (od->nbr) { - ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;"); - ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;"); -+ for (int i = 0; i < od->nbr->n_static_routes; i++) { -+ const struct nbrec_logical_router_static_route *route; -+ -+ route = od->nbr->static_routes[i]; -+ struct in6_addr gw_ip6; -+ unsigned int plen; -+ char *error = ipv6_parse_cidr(route->nexthop, &gw_ip6, &plen); -+ if (error || plen != 128) { -+ free(error); -+ continue; -+ } -+ -+ ds_clear(match); -+ ds_put_format(match, "eth.dst == 00:00:00:00:00:00 && " -+ "ip6 && " REG_NEXT_HOP_IPV6 " == %s", -+ route->nexthop); -+ struct in6_addr sn_addr; -+ struct eth_addr eth_dst; -+ in6_addr_solicited_node(&sn_addr, &gw_ip6); -+ ipv6_multicast_to_ethernet(ð_dst, &sn_addr); -+ -+ char sn_addr_s[INET6_ADDRSTRLEN + 1]; -+ ipv6_string_mapped(sn_addr_s, &sn_addr); -+ -+ ds_clear(actions); -+ ds_put_format(actions, -+ "nd_ns { " -+ "eth.dst = "ETH_ADDR_FMT"; " -+ "ip6.dst = %s; " -+ "nd.target = %s; " -+ "output; " -+ "};", ETH_ADDR_ARGS(eth_dst), sn_addr_s, -+ route->nexthop); -+ -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ARP_REQUEST, 200, -+ ds_cstr(match), ds_cstr(actions), -+ &route->header_); -+ } -+ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100, -+ "eth.dst == 00:00:00:00:00:00 && ip4", -+ "arp { " -+ "eth.dst = ff:ff:ff:ff:ff:ff; " -+ "arp.spa = " REG_SRC_IPV4 "; " -+ "arp.tpa = " REG_NEXT_HOP_IPV4 "; " -+ "arp.op = 1; " /* ARP request */ -+ "output; " -+ "};"); -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100, -+ "eth.dst == 00:00:00:00:00:00 && ip6", -+ "nd_ns { " -+ "nd.target = " REG_NEXT_HOP_IPV6 "; " -+ "output; " -+ "};"); -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;"); - } - } +- } +-} ++ ds_clear(actions); ++ ds_put_format(actions, "eth.dst = %s; next;", ea_s); ++ ovn_lflow_add_with_hint(lflows, peer->od, ++ S_ROUTER_IN_ARP_RESOLVE, 100, ++ ds_cstr(match), ++ ds_cstr(actions), ++ &op->nbsp->header_); ++ found_vip_network = true; ++ break; ++ } -/* Logical router ingress table IP_ROUTING : IP Routing. - * @@ -10277,41 +14829,38 @@ index 5a3227568..c81e3220c 100644 - * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address - * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and - * advances to the next table. -+/* Logical router egress table DELIVERY: Delivery (priority 100-110). - * +- * - * For ECMP routes, i.e. multiple routes with same policy and prefix, table - * IP_ROUTING remembers ECMP group id and selects a member id, and advances - * to table IP_ROUTING_ECMP, which sets outport, eth.src and - * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 for the selected ECMP member. -+ * Priority 100 rules deliver packets to enabled logical ports. -+ * Priority 110 rules match multicast packets and update the source -+ * mac before delivering to enabled logical ports. IP multicast traffic -+ * bypasses S_ROUTER_IN_IP_ROUTING route lookups. - */ - static void +- */ +-static void -build_ip_routing_flows_for_lrouter_port( - struct ovn_port *op, struct hmap *lflows) -+build_egress_delivery_flows_for_lrouter_port( -+ struct ovn_port *op, struct hmap *lflows, -+ struct ds *match, struct ds *actions) - { - if (op->nbrp) { -+ if (!lrport_is_enabled(op->nbrp)) { -+ /* Drop packets to disabled logical ports (since logical flow -+ * tables are default-drop). */ -+ return; +-{ +- if (op->nbrp) { ++ if (found_vip_network) { ++ break; ++ } ++ } + } ++ } else if (lsp_is_router(op->nbsp)) { ++ /* This is a logical switch port that connects to a router. */ - for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { - add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s, - op->lrp_networks.ipv4_addrs[i].network_s, - op->lrp_networks.ipv4_addrs[i].plen, NULL, false, - &op->nbrp->header_); -+ if (op->derived) { -+ /* No egress packets should be processed in the context of -+ * a chassisredirect port. The chassisredirect port should -+ * be replaced by the l3dgw port in the local output -+ * pipeline stage before egress processing. */ ++ /* The peer of this switch port is the router port for which ++ * we need to add logical flows such that it can resolve ++ * ARP entries for all the other router ports connected to ++ * the switch in question. */ ++ ++ const char *peer_name = smap_get(&op->nbsp->options, ++ "router-port"); ++ if (!peer_name) { + return; } @@ -10320,95 +14869,27 @@ index 5a3227568..c81e3220c 100644 - op->lrp_networks.ipv6_addrs[i].network_s, - op->lrp_networks.ipv6_addrs[i].plen, NULL, false, - &op->nbrp->header_); -+ /* If multicast relay is enabled then also adjust source mac for IP -+ * multicast traffic. -+ */ -+ if (op->od->mcast_info.rtr.relay) { -+ ds_clear(match); -+ ds_clear(actions); -+ ds_put_format(match, "(ip4.mcast || ip6.mcast) && outport == %s", -+ op->json_key); -+ ds_put_format(actions, "eth.src = %s; output;", -+ op->lrp_networks.ea_s); -+ ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 110, -+ ds_cstr(match), ds_cstr(actions)); ++ struct ovn_port *peer = ovn_port_find(ports, peer_name); ++ if (!peer || !peer->nbrp) { ++ return; } -+ -+ ds_clear(match); -+ ds_put_format(match, "outport == %s", op->json_key); -+ ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100, -+ ds_cstr(match), "output;"); -+ } -+ -+} -+ -+static void -+build_misc_local_traffic_drop_flows_for_lrouter( -+ struct ovn_datapath *od, struct hmap *lflows) -+{ -+ if (od->nbr) { -+ /* L3 admission control: drop multicast and broadcast source, localhost -+ * source or destination, and zero network source or destination -+ * (priority 100). */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100, -+ "ip4.src_mcast ||" -+ "ip4.src == 255.255.255.255 || " -+ "ip4.src == 127.0.0.0/8 || " -+ "ip4.dst == 127.0.0.0/8 || " -+ "ip4.src == 0.0.0.0/8 || " -+ "ip4.dst == 0.0.0.0/8", -+ "drop;"); -+ -+ /* Drop ARP packets (priority 85). ARP request packets for router's own -+ * IPs are handled with priority-90 flows. -+ * Drop IPv6 ND packets (priority 85). ND NA packets for router's own -+ * IPs are handled with priority-90 flows. -+ */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 85, -+ "arp || nd", "drop;"); -+ -+ /* Allow IPv6 multicast traffic that's supposed to reach the -+ * router pipeline (e.g., router solicitations). -+ */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 84, "nd_rs || nd_ra", -+ "next;"); -+ -+ /* Drop other reserved multicast. */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 83, -+ "ip6.mcast_rsvd", "drop;"); -+ -+ /* Allow other multicast if relay enabled (priority 82). */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 82, -+ "ip4.mcast || ip6.mcast", -+ od->mcast_info.rtr.relay ? "next;" : "drop;"); -+ -+ /* Drop Ethernet local broadcast. By definition this traffic should -+ * not be forwarded.*/ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50, -+ "eth.bcast", "drop;"); -+ -+ /* TTL discard */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, -+ "ip4 && ip.ttl == {0, 1}", "drop;"); -+ -+ /* Pass other traffic not already handled to the next table for -+ * routing. */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;"); - } - } +- } +-} - static void +-static void -build_static_route_flows_for_lrouter( - struct ovn_datapath *od, struct hmap *lflows, - struct hmap *ports) -+build_dhcpv6_reply_flows_for_lrouter_port( -+ struct ovn_port *op, struct hmap *lflows, -+ struct ds *match) - { +-{ - if (od->nbr) { - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, 150, - REG_ECMP_GROUP_ID" == 0", "next;"); -- ++ if (peer->od->nbr && ++ smap_get_bool(&peer->od->nbr->options, ++ "dynamic_neigh_routers", false)) { ++ return; ++ } + - struct hmap ecmp_groups = HMAP_INITIALIZER(&ecmp_groups); - struct hmap unique_routes = HMAP_INITIALIZER(&unique_routes); - struct ovs_list parsed_routes = OVS_LIST_INITIALIZER(&parsed_routes); @@ -10417,8 +14898,15 @@ index 5a3227568..c81e3220c 100644 - struct parsed_route *route = - parsed_routes_add(&parsed_routes, od->nbr->static_routes[i]); - if (!route) { -- continue; -- } ++ for (size_t i = 0; i < op->od->n_router_ports; i++) { ++ const char *router_port_name = smap_get( ++ &op->od->router_ports[i]->nbsp->options, ++ "router-port"); ++ struct ovn_port *router_port = ovn_port_find(ports, ++ router_port_name); ++ if (!router_port || !router_port->nbrp) { + continue; + } - group = ecmp_groups_find(&ecmp_groups, route); - if (group) { - ecmp_groups_add_route(group, route); @@ -10433,7 +14921,11 @@ index 5a3227568..c81e3220c 100644 - } else { - unique_routes_add(&unique_routes, route); - } -- } ++ ++ /* Skip the router port under consideration. */ ++ if (router_port == peer) { ++ continue; + } - } - HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) { - /* add a flow in IP_ROUTING, and one flow for each member in @@ -10443,44 +14935,28 @@ index 5a3227568..c81e3220c 100644 - const struct unique_routes_node *ur; - HMAP_FOR_EACH (ur, hmap_node, &unique_routes) { - build_static_route_flow(lflows, od, ports, ur->route); -+ if (op->nbrp && (!op->derived)) { -+ for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { -+ ds_clear(match); -+ ds_put_format(match, "ip6.dst == %s && udp.src == 547 &&" -+ " udp.dst == 546", -+ op->lrp_networks.ipv6_addrs[i].addr_s); -+ ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, -+ ds_cstr(match), -+ "reg0 = 0; handle_dhcpv6_reply;"); - } +- } - ecmp_groups_destroy(&ecmp_groups); - unique_routes_destroy(&unique_routes); - parsed_routes_destroy(&parsed_routes); - } -+ - } +- } +-} -/* IP Multicast lookup. Here we set the output port, adjust TTL and - * advance to next table (priority 500). - */ - static void +-static void -build_mcast_lookup_flows_for_lrouter( - struct ovn_datapath *od, struct hmap *lflows, -+build_ipv6_input_flows_for_lrouter_port( -+ struct ovn_port *op, struct hmap *lflows, - struct ds *match, struct ds *actions) - { +- struct ds *match, struct ds *actions) +-{ - if (od->nbr) { -+ if (op->nbrp && (!op->derived)) { -+ /* No ingress packets are accepted on a chassisredirect -+ * port, so no need to program flows for that port. */ -+ if (op->lrp_networks.n_ipv6_addrs) { -+ /* ICMPv6 echo reply. These flows reply to echo requests -+ * received for the router's IP address. */ -+ ds_clear(match); -+ ds_put_cstr(match, "ip6.dst == "); -+ op_put_v6_networks(match, op); -+ ds_put_cstr(match, " && icmp6.type == 128 && icmp6.code == 0"); ++ if (router_port->lrp_networks.n_ipv4_addrs) { ++ ds_clear(match); ++ ds_put_format(match, "outport == %s && " ++ REG_NEXT_HOP_IPV4 " == ", ++ peer->json_key); ++ op_put_v4_networks(match, router_port, false); - /* Drop IPv6 multicast traffic that shouldn't be forwarded, - * i.e., router solicitation and router advertisement. @@ -10489,24 +14965,26 @@ index 5a3227568..c81e3220c 100644 - "nd_rs || nd_ra", "drop;"); - if (!od->mcast_info.rtr.relay) { - return; -+ const char *lrp_actions = -+ "ip6.dst <-> ip6.src; " -+ "ip.ttl = 255; " -+ "icmp6.type = 129; " -+ "flags.loopback = 1; " -+ "next; "; -+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, -+ ds_cstr(match), lrp_actions, -+ &op->nbrp->header_); - } +- } ++ ds_clear(actions); ++ ds_put_format(actions, "eth.dst = %s; next;", ++ router_port->lrp_networks.ea_s); ++ ovn_lflow_add_with_hint(lflows, peer->od, ++ S_ROUTER_IN_ARP_RESOLVE, 100, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbsp->header_); ++ } - struct ovn_igmp_group *igmp_group; -- ++ if (router_port->lrp_networks.n_ipv6_addrs) { ++ ds_clear(match); ++ ds_put_format(match, "outport == %s && " ++ REG_NEXT_HOP_IPV6 " == ", ++ peer->json_key); ++ op_put_v6_networks(match, router_port); + - LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) { -+ /* ND reply. These flows reply to ND solicitations for the -+ * router's own IP address. */ -+ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { - ds_clear(match); +- ds_clear(match); - ds_clear(actions); - if (IN6_IS_ADDR_V4MAPPED(&igmp_group->address)) { - ds_put_format(match, "ip4 && ip4.dst == %s ", @@ -10522,28 +15000,20 @@ index 5a3227568..c81e3220c 100644 - "ip.ttl--; " - "next; " - "};"); -+ if (op->od->l3dgw_port && op == op->od->l3dgw_port -+ && op->od->l3redirect_port) { -+ /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s -+ * should only be sent from the gateway chassi, so that -+ * upstream MAC learning points to the gateway chassis. -+ * Also need to avoid generation of multiple ND replies -+ * from different chassis. */ -+ ds_put_format(match, "is_chassis_resident(%s)", -+ op->od->l3redirect_port->json_key); ++ ds_clear(actions); ++ ds_put_format(actions, "eth.dst = %s; next;", ++ router_port->lrp_networks.ea_s); ++ ovn_lflow_add_with_hint(lflows, peer->od, ++ S_ROUTER_IN_ARP_RESOLVE, 100, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbsp->header_); } - ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;", - igmp_group->mcgroup.name); - ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 500, -- ds_cstr(match), ds_cstr(actions)); -+ -+ build_lrouter_nd_flow(op->od, op, "nd_na_router", -+ op->lrp_networks.ipv6_addrs[i].addr_s, -+ op->lrp_networks.ipv6_addrs[i].sn_addr_s, -+ REG_INPORT_ETH_ADDR, match, false, 90, -+ &op->nbrp->header_, lflows); - } - +- ds_cstr(match), ds_cstr(actions)); +- } +- - /* If needed, flood unregistered multicast on statically configured - * ports. Otherwise drop any multicast traffic. - */ @@ -10558,70 +15028,14 @@ index 5a3227568..c81e3220c 100644 - } else { - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450, - "ip4.mcast || ip6.mcast", "drop;"); -+ /* UDP/TCP/SCTP port unreachable */ -+ if (!smap_get(&op->od->nbr->options, "chassis") -+ && !op->od->l3dgw_port) { -+ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { -+ ds_clear(match); -+ ds_put_format(match, -+ "ip6 && ip6.dst == %s && !ip.later_frag && tcp", -+ op->lrp_networks.ipv6_addrs[i].addr_s); -+ const char *action = "tcp_reset {" -+ "eth.dst <-> eth.src; " -+ "ip6.dst <-> ip6.src; " -+ "next; };"; -+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, -+ 80, ds_cstr(match), action, -+ &op->nbrp->header_); -+ -+ ds_clear(match); -+ ds_put_format(match, -+ "ip6 && ip6.dst == %s && !ip.later_frag && sctp", -+ op->lrp_networks.ipv6_addrs[i].addr_s); -+ action = "sctp_abort {" -+ "eth.dst <-> eth.src; " -+ "ip6.dst <-> ip6.src; " -+ "next; };"; -+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, -+ 80, ds_cstr(match), action, -+ &op->nbrp->header_); -+ -+ ds_clear(match); -+ ds_put_format(match, -+ "ip6 && ip6.dst == %s && !ip.later_frag && udp", -+ op->lrp_networks.ipv6_addrs[i].addr_s); -+ action = "icmp6 {" -+ "eth.dst <-> eth.src; " -+ "ip6.dst <-> ip6.src; " -+ "ip.ttl = 255; " -+ "icmp6.type = 1; " -+ "icmp6.code = 4; " -+ "next; };"; -+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, -+ 80, ds_cstr(match), action, -+ &op->nbrp->header_); -+ -+ ds_clear(match); -+ ds_put_format(match, -+ "ip6 && ip6.dst == %s && !ip.later_frag", -+ op->lrp_networks.ipv6_addrs[i].addr_s); -+ action = "icmp6 {" -+ "eth.dst <-> eth.src; " -+ "ip6.dst <-> ip6.src; " -+ "ip.ttl = 255; " -+ "icmp6.type = 1; " -+ "icmp6.code = 3; " -+ "next; };"; -+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, -+ 70, ds_cstr(match), action, -+ &op->nbrp->header_); -+ } } -- } --} + } ++ + } -/* Logical router ingress table POLICY: Policy. -- * ++/* Local router ingress table CHK_PKT_LEN: Check packet length. + * - * A packet that arrives at this table is an IP packet that should be - * permitted/denied/rerouted to the address in the rule's nexthop. - * This table sets outport to the correct out_port, @@ -10629,116 +15043,186 @@ index 5a3227568..c81e3220c 100644 - * and REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address - * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and - * advances to the next table for ARP/ND resolution. */ --static void ++ * Any IPv4 packet with outport set to the distributed gateway ++ * router port, check the packet length and store the result in the ++ * 'REGBIT_PKT_LARGER' register bit. ++ * ++ * Local router ingress table LARGER_PKTS: Handle larger packets. ++ * ++ * Any IPv4 packet with outport set to the distributed gateway ++ * router port and the 'REGBIT_PKT_LARGER' register bit is set, ++ * generate ICMPv4 packet with type 3 (Destination Unreachable) and ++ * code 4 (Fragmentation needed). ++ * */ + static void -build_ingress_policy_flows_for_lrouter( -- struct ovn_datapath *od, struct hmap *lflows, ++build_check_pkt_len_flows_for_lrouter( + struct ovn_datapath *od, struct hmap *lflows, - struct hmap *ports) --{ -- if (od->nbr) { ++ struct hmap *ports, ++ struct ds *match, struct ds *actions) + { + if (od->nbr) { - /* This is a catch-all rule. It has the lowest priority (0) - * does a match-all("1") and pass-through (next) */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1", "next;"); -+ /* ICMPv6 time exceeded */ -+ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { -+ /* skip link-local address */ -+ if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) { -+ continue; -+ } - /* Convert routing policies to flows. */ - for (int i = 0; i < od->nbr->n_policies; i++) { - const struct nbrec_logical_router_policy *rule - = od->nbr->policies[i]; - build_routing_policy_flow(lflows, od, ports, rule, &rule->header_); ++ /* Packets are allowed by default. */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 0, "1", ++ "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LARGER_PKTS, 0, "1", ++ "next;"); ++ ++ if (od->l3dgw_port && od->l3redirect_port) { ++ int gw_mtu = 0; ++ if (od->l3dgw_port->nbrp) { ++ gw_mtu = smap_get_int(&od->l3dgw_port->nbrp->options, ++ "gateway_mtu", 0); ++ } ++ /* Add the flows only if gateway_mtu is configured. */ ++ if (gw_mtu <= 0) { ++ return; ++ } ++ + ds_clear(match); -+ ds_clear(actions); ++ ds_put_format(match, "outport == %s", od->l3dgw_port->json_key); + -+ ds_put_format(match, -+ "inport == %s && ip6 && " -+ "ip6.src == %s/%d && " -+ "ip.ttl == {0, 1} && !ip.later_frag", -+ op->json_key, -+ op->lrp_networks.ipv6_addrs[i].network_s, -+ op->lrp_networks.ipv6_addrs[i].plen); ++ ds_clear(actions); + ds_put_format(actions, -+ "icmp6 {" -+ "eth.dst <-> eth.src; " -+ "ip6.dst = ip6.src; " -+ "ip6.src = %s; " -+ "ip.ttl = 255; " -+ "icmp6.type = 3; /* Time exceeded */ " -+ "icmp6.code = 0; /* TTL exceeded in transit */ " -+ "next; };", -+ op->lrp_networks.ipv6_addrs[i].addr_s); -+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40, ++ REGBIT_PKT_LARGER" = check_pkt_larger(%d);" ++ " next;", gw_mtu + VLAN_ETH_HEADER_LEN); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 50, + ds_cstr(match), ds_cstr(actions), -+ &op->nbrp->header_); ++ &od->l3dgw_port->nbrp->header_); ++ ++ for (size_t i = 0; i < od->nbr->n_ports; i++) { ++ struct ovn_port *rp = ovn_port_find(ports, ++ od->nbr->ports[i]->name); ++ if (!rp || rp == od->l3dgw_port) { ++ continue; ++ } ++ ++ if (rp->lrp_networks.ipv4_addrs) { ++ ds_clear(match); ++ ds_put_format(match, "inport == %s && outport == %s" ++ " && ip4 && "REGBIT_PKT_LARGER, ++ rp->json_key, od->l3dgw_port->json_key); ++ ++ ds_clear(actions); ++ /* Set icmp4.frag_mtu to gw_mtu */ ++ ds_put_format(actions, ++ "icmp4_error {" ++ REGBIT_EGRESS_LOOPBACK" = 1; " ++ "eth.dst = %s; " ++ "ip4.dst = ip4.src; " ++ "ip4.src = %s; " ++ "ip.ttl = 255; " ++ "icmp4.type = 3; /* Destination Unreachable. */ " ++ "icmp4.code = 4; /* Frag Needed and DF was Set. */ " ++ "icmp4.frag_mtu = %d; " ++ "next(pipeline=ingress, table=%d); };", ++ rp->lrp_networks.ea_s, ++ rp->lrp_networks.ipv4_addrs[0].addr_s, ++ gw_mtu, ++ ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); ++ ovn_lflow_add_with_hint(lflows, od, ++ S_ROUTER_IN_LARGER_PKTS, 50, ++ ds_cstr(match), ds_cstr(actions), ++ &rp->nbrp->header_); ++ } ++ ++ if (rp->lrp_networks.ipv6_addrs) { ++ ds_clear(match); ++ ds_put_format(match, "inport == %s && outport == %s" ++ " && ip6 && "REGBIT_PKT_LARGER, ++ rp->json_key, od->l3dgw_port->json_key); ++ ++ ds_clear(actions); ++ /* Set icmp6.frag_mtu to gw_mtu */ ++ ds_put_format(actions, ++ "icmp6_error {" ++ REGBIT_EGRESS_LOOPBACK" = 1; " ++ "eth.dst = %s; " ++ "ip6.dst = ip6.src; " ++ "ip6.src = %s; " ++ "ip.ttl = 255; " ++ "icmp6.type = 2; /* Packet Too Big. */ " ++ "icmp6.code = 0; " ++ "icmp6.frag_mtu = %d; " ++ "next(pipeline=ingress, table=%d); };", ++ rp->lrp_networks.ea_s, ++ rp->lrp_networks.ipv6_addrs[0].addr_s, ++ gw_mtu, ++ ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); ++ ovn_lflow_add_with_hint(lflows, od, ++ S_ROUTER_IN_LARGER_PKTS, 50, ++ ds_cstr(match), ds_cstr(actions), ++ &rp->nbrp->header_); ++ } ++ } } } -+ } -/* Local router ingress table ARP_RESOLVE: ARP Resolution. */ ++/* Logical router ingress table GW_REDIRECT: Gateway redirect. ++ * ++ * For traffic with outport equal to the l3dgw_port ++ * on a distributed router, this table redirects a subset ++ * of the traffic to the l3redirect_port which represents ++ * the central instance of the l3dgw_port. ++ */ static void -build_arp_resolve_flows_for_lrouter( - struct ovn_datapath *od, struct hmap *lflows) -+build_lrouter_arp_nd_for_datapath(struct ovn_datapath *od, -+ struct hmap *lflows) ++build_gateway_redirect_flows_for_lrouter( ++ struct ovn_datapath *od, struct hmap *lflows, ++ struct ds *match, struct ds *actions) { if (od->nbr) { - /* Multicast packets already have the outport set so just advance to - * next table (priority 500). */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 500, - "ip4.mcast || ip6.mcast", "next;"); ++ if (od->l3dgw_port && od->l3redirect_port) { ++ const struct ovsdb_idl_row *stage_hint = NULL; - ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4", - "get_arp(outport, " REG_NEXT_HOP_IPV4 "); next;"); -+ /* Priority-90-92 flows handle ARP requests and ND packets. Most are -+ * per logical port but DNAT addresses can be handled per datapath -+ * for non gateway router ports. -+ * -+ * Priority 91 and 92 flows are added for each gateway router -+ * port to handle the special cases. In case we get the packet -+ * on a regular port, just reply with the port's ETH address. -+ */ -+ for (int i = 0; i < od->nbr->n_nat; i++) { -+ struct ovn_nat *nat_entry = &od->nat_entries[i]; ++ if (od->l3dgw_port->nbrp) { ++ stage_hint = &od->l3dgw_port->nbrp->header_; ++ } - ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6", - "get_nd(outport, " REG_NEXT_HOP_IPV6 "); next;"); -+ /* Skip entries we failed to parse. */ -+ if (!nat_entry_is_valid(nat_entry)) { -+ continue; -+ } -+ -+ /* Skip SNAT entries for now, we handle unique SNAT IPs separately -+ * below. -+ */ -+ if (!strcmp(nat_entry->nb->type, "snat")) { -+ continue; -+ } -+ build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows); ++ /* For traffic with outport == l3dgw_port, if the ++ * packet did not match any higher priority redirect ++ * rule, then the traffic is redirected to the central ++ * instance of the l3dgw_port. */ ++ ds_clear(match); ++ ds_put_format(match, "outport == %s", ++ od->l3dgw_port->json_key); ++ ds_clear(actions); ++ ds_put_format(actions, "outport = %s; next;", ++ od->l3redirect_port->json_key); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50, ++ ds_cstr(match), ds_cstr(actions), ++ stage_hint); + } + -+ /* Now handle SNAT entries too, one per unique SNAT IP. */ -+ struct shash_node *snat_snode; -+ SHASH_FOR_EACH (snat_snode, &od->snat_ips) { -+ struct ovn_snat_ip *snat_ip = snat_snode->data; -+ -+ if (ovs_list_is_empty(&snat_ip->snat_entries)) { -+ continue; -+ } -+ -+ struct ovn_nat *nat_entry = -+ CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries), -+ struct ovn_nat, ext_addr_list_node); -+ build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows); -+ } ++ /* Packets are allowed by default. */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;"); } } -/* Local router ingress table ARP_RESOLVE: ARP Resolution. -- * ++/* Local router ingress table ARP_REQUEST: ARP request. + * - * Any unicast packet that reaches this table is an IP packet whose - * next-hop IP address is in REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 - * (ip4.dst/ipv6.dst is the final destination). @@ -10746,33 +15230,23 @@ index 5a3227568..c81e3220c 100644 - * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 into an output port in outport and - * an Ethernet address in eth.dst. - */ -+/* Logical router ingress table 3: IP Input for IPv4. */ ++ * In the common case where the Ethernet destination has been resolved, ++ * this table outputs the packet (priority 0). Otherwise, it composes ++ * and sends an ARP/IPv6 NA request (priority 100). */ static void -build_arp_resolve_flows_for_lrouter_port( - struct ovn_port *op, struct hmap *lflows, - struct hmap *ports, -- struct ds *match, struct ds *actions) -+build_lrouter_ipv4_ip_input(struct ovn_port *op, -+ struct hmap *lflows, -+ struct ds *match, struct ds *actions) ++build_arp_request_flows_for_lrouter( ++ struct ovn_datapath *od, struct hmap *lflows, + struct ds *match, struct ds *actions) { - if (op->nbsp && !lsp_is_enabled(op->nbsp)) { - return; - } -+ /* No ingress packets are accepted on a chassisredirect -+ * port, so no need to program flows for that port. */ -+ if (op->nbrp && (!op->derived)) { -+ if (op->lrp_networks.n_ipv4_addrs) { -+ /* L3 admission control: drop packets that originate from an -+ * IPv4 address owned by the router or a broadcast address -+ * known to the router (priority 100). */ -+ ds_clear(match); -+ ds_put_cstr(match, "ip4.src == "); -+ op_put_v4_networks(match, op, true); -+ ds_put_cstr(match, " && "REGBIT_EGRESS_LOOPBACK" == 0"); -+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, -+ ds_cstr(match), "drop;", -+ &op->nbrp->header_); ++ if (od->nbr) { ++ for (int i = 0; i < od->nbr->n_static_routes; i++) { ++ const struct nbrec_logical_router_static_route *route; - if (op->nbrp) { - /* This is a logical router port. If next-hop IP address in @@ -10790,15 +15264,14 @@ index 5a3227568..c81e3220c 100644 - REG_NEXT_HOP_IPV4 "== ", - op->peer->json_key); - op_put_v4_networks(match, op, false); -+ /* ICMP echo reply. These flows reply to ICMP echo requests -+ * received for the router's IP address. Since packets only -+ * get here as part of the logical router datapath, the inport -+ * (i.e. the incoming locally attached net) does not matter. -+ * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */ -+ ds_clear(match); -+ ds_put_cstr(match, "ip4.dst == "); -+ op_put_v4_networks(match, op, false); -+ ds_put_cstr(match, " && icmp4.type == 8 && icmp4.code == 0"); ++ route = od->nbr->static_routes[i]; ++ struct in6_addr gw_ip6; ++ unsigned int plen; ++ char *error = ipv6_parse_cidr(route->nexthop, &gw_ip6, &plen); ++ if (error || plen != 128) { ++ free(error); ++ continue; ++ } - ds_clear(actions); - ds_put_format(actions, "eth.dst = %s; next;", @@ -10807,25 +15280,244 @@ index 5a3227568..c81e3220c 100644 - S_ROUTER_IN_ARP_RESOLVE, 100, - ds_cstr(match), ds_cstr(actions), - &op->nbrp->header_); -- } -+ const char * icmp_actions = "ip4.dst <-> ip4.src; " -+ "ip.ttl = 255; " -+ "icmp4.type = 0; " -+ "flags.loopback = 1; " -+ "next; "; ++ ds_clear(match); ++ ds_put_format(match, "eth.dst == 00:00:00:00:00:00 && " ++ "ip6 && " REG_NEXT_HOP_IPV6 " == %s", ++ route->nexthop); ++ struct in6_addr sn_addr; ++ struct eth_addr eth_dst; ++ in6_addr_solicited_node(&sn_addr, &gw_ip6); ++ ipv6_multicast_to_ethernet(ð_dst, &sn_addr); ++ ++ char sn_addr_s[INET6_ADDRSTRLEN + 1]; ++ ipv6_string_mapped(sn_addr_s, &sn_addr); ++ ++ ds_clear(actions); ++ ds_put_format(actions, ++ "nd_ns { " ++ "eth.dst = "ETH_ADDR_FMT"; " ++ "ip6.dst = %s; " ++ "nd.target = %s; " ++ "output; " ++ "};", ETH_ADDR_ARGS(eth_dst), sn_addr_s, ++ route->nexthop); ++ ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ARP_REQUEST, 200, ++ ds_cstr(match), ds_cstr(actions), ++ &route->header_); ++ } ++ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100, ++ "eth.dst == 00:00:00:00:00:00 && ip4", ++ "arp { " ++ "eth.dst = ff:ff:ff:ff:ff:ff; " ++ "arp.spa = " REG_SRC_IPV4 "; " ++ "arp.tpa = " REG_NEXT_HOP_IPV4 "; " ++ "arp.op = 1; " /* ARP request */ ++ "output; " ++ "};"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100, ++ "eth.dst == 00:00:00:00:00:00 && ip6", ++ "nd_ns { " ++ "nd.target = " REG_NEXT_HOP_IPV6 "; " ++ "output; " ++ "};"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;"); ++ } ++} ++ ++/* Logical router egress table DELIVERY: Delivery (priority 100-110). ++ * ++ * Priority 100 rules deliver packets to enabled logical ports. ++ * Priority 110 rules match multicast packets and update the source ++ * mac before delivering to enabled logical ports. IP multicast traffic ++ * bypasses S_ROUTER_IN_IP_ROUTING route lookups. ++ */ ++static void ++build_egress_delivery_flows_for_lrouter_port( ++ struct ovn_port *op, struct hmap *lflows, ++ struct ds *match, struct ds *actions) ++{ ++ if (op->nbrp) { ++ if (!lrport_is_enabled(op->nbrp)) { ++ /* Drop packets to disabled logical ports (since logical flow ++ * tables are default-drop). */ ++ return; ++ } ++ ++ if (op->derived) { ++ /* No egress packets should be processed in the context of ++ * a chassisredirect port. The chassisredirect port should ++ * be replaced by the l3dgw port in the local output ++ * pipeline stage before egress processing. */ ++ return; ++ } ++ ++ /* If multicast relay is enabled then also adjust source mac for IP ++ * multicast traffic. ++ */ ++ if (op->od->mcast_info.rtr.relay) { ++ ds_clear(match); ++ ds_clear(actions); ++ ds_put_format(match, "(ip4.mcast || ip6.mcast) && outport == %s", ++ op->json_key); ++ ds_put_format(actions, "eth.src = %s; output;", ++ op->lrp_networks.ea_s); ++ ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 110, ++ ds_cstr(match), ds_cstr(actions)); ++ } ++ ++ ds_clear(match); ++ ds_put_format(match, "outport == %s", op->json_key); ++ ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100, ++ ds_cstr(match), "output;"); ++ } ++ ++} ++ ++static void ++build_misc_local_traffic_drop_flows_for_lrouter( ++ struct ovn_datapath *od, struct hmap *lflows) ++{ ++ if (od->nbr) { ++ /* L3 admission control: drop multicast and broadcast source, localhost ++ * source or destination, and zero network source or destination ++ * (priority 100). */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100, ++ "ip4.src_mcast ||" ++ "ip4.src == 255.255.255.255 || " ++ "ip4.src == 127.0.0.0/8 || " ++ "ip4.dst == 127.0.0.0/8 || " ++ "ip4.src == 0.0.0.0/8 || " ++ "ip4.dst == 0.0.0.0/8", ++ "drop;"); ++ ++ /* Drop ARP packets (priority 85). ARP request packets for router's own ++ * IPs are handled with priority-90 flows. ++ * Drop IPv6 ND packets (priority 85). ND NA packets for router's own ++ * IPs are handled with priority-90 flows. ++ */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 85, ++ "arp || nd", "drop;"); ++ ++ /* Allow IPv6 multicast traffic that's supposed to reach the ++ * router pipeline (e.g., router solicitations). ++ */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 84, "nd_rs || nd_ra", ++ "next;"); ++ ++ /* Drop other reserved multicast. */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 83, ++ "ip6.mcast_rsvd", "drop;"); ++ ++ /* Allow other multicast if relay enabled (priority 82). */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 82, ++ "ip4.mcast || ip6.mcast", ++ od->mcast_info.rtr.relay ? "next;" : "drop;"); ++ ++ /* Drop Ethernet local broadcast. By definition this traffic should ++ * not be forwarded.*/ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50, ++ "eth.bcast", "drop;"); ++ ++ /* TTL discard */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, ++ "ip4 && ip.ttl == {0, 1}", "drop;"); ++ ++ /* Pass other traffic not already handled to the next table for ++ * routing. */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;"); ++ } ++} ++ ++static void ++build_dhcpv6_reply_flows_for_lrouter_port( ++ struct ovn_port *op, struct hmap *lflows, ++ struct ds *match) ++{ ++ if (op->nbrp && (!op->derived)) { ++ for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { ++ ds_clear(match); ++ ds_put_format(match, "ip6.dst == %s && udp.src == 547 &&" ++ " udp.dst == 546", ++ op->lrp_networks.ipv6_addrs[i].addr_s); ++ ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, ++ ds_cstr(match), ++ "reg0 = 0; handle_dhcpv6_reply;"); ++ } ++ } ++ ++} ++ ++static void ++build_ipv6_input_flows_for_lrouter_port( ++ struct ovn_port *op, struct hmap *lflows, ++ struct ds *match, struct ds *actions) ++{ ++ if (op->nbrp && (!op->derived)) { ++ /* No ingress packets are accepted on a chassisredirect ++ * port, so no need to program flows for that port. */ ++ if (op->lrp_networks.n_ipv6_addrs) { ++ /* ICMPv6 echo reply. These flows reply to echo requests ++ * received for the router's IP address. */ ++ ds_clear(match); ++ ds_put_cstr(match, "ip6.dst == "); ++ op_put_v6_networks(match, op); ++ ds_put_cstr(match, " && icmp6.type == 128 && icmp6.code == 0"); ++ ++ const char *lrp_actions = ++ "ip6.dst <-> ip6.src; " ++ "ip.ttl = 255; " ++ "icmp6.type = 129; " ++ "flags.loopback = 1; " ++ "next; "; + ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, -+ ds_cstr(match), icmp_actions, ++ ds_cstr(match), lrp_actions, + &op->nbrp->header_); + } ++ ++ /* ND reply. These flows reply to ND solicitations for the ++ * router's own IP address. */ ++ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { ++ ds_clear(match); ++ if (op->od->l3dgw_port && op == op->od->l3dgw_port ++ && op->od->l3redirect_port) { ++ /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s ++ * should only be sent from the gateway chassi, so that ++ * upstream MAC learning points to the gateway chassis. ++ * Also need to avoid generation of multiple ND replies ++ * from different chassis. */ ++ ds_put_format(match, "is_chassis_resident(%s)", ++ op->od->l3redirect_port->json_key); + } - if (op->lrp_networks.n_ipv6_addrs) { -- ds_clear(match); ++ build_lrouter_nd_flow(op->od, op, "nd_na_router", ++ op->lrp_networks.ipv6_addrs[i].addr_s, ++ op->lrp_networks.ipv6_addrs[i].sn_addr_s, ++ REG_INPORT_ETH_ADDR, match, false, 90, ++ &op->nbrp->header_, lflows); ++ } ++ ++ /* UDP/TCP/SCTP port unreachable */ ++ if (!smap_get(&op->od->nbr->options, "chassis") ++ && !op->od->l3dgw_port) { ++ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { + ds_clear(match); - ds_put_format(match, "outport == %s && " - REG_NEXT_HOP_IPV6 " == ", - op->peer->json_key); - op_put_v6_networks(match, op); -+ /* BFD msg handling */ -+ build_lrouter_bfd_flows(lflows, op); ++ ds_put_format(match, ++ "ip6 && ip6.dst == %s && !ip.later_frag && tcp", ++ op->lrp_networks.ipv6_addrs[i].addr_s); ++ const char *action = "tcp_reset {" ++ "eth.dst <-> eth.src; " ++ "ip6.dst <-> ip6.src; " ++ "next; };"; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, ++ 80, ds_cstr(match), action, ++ &op->nbrp->header_); - ds_clear(actions); - ds_put_format(actions, "eth.dst = %s; next;", @@ -10833,30 +15525,19 @@ index 5a3227568..c81e3220c 100644 - ovn_lflow_add_with_hint(lflows, op->peer->od, - S_ROUTER_IN_ARP_RESOLVE, 100, - ds_cstr(match), ds_cstr(actions), -- &op->nbrp->header_); ++ ds_clear(match); ++ ds_put_format(match, ++ "ip6 && ip6.dst == %s && !ip.later_frag && sctp", ++ op->lrp_networks.ipv6_addrs[i].addr_s); ++ action = "sctp_abort {" ++ "eth.dst <-> eth.src; " ++ "ip6.dst <-> ip6.src; " ++ "next; };"; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, ++ 80, ds_cstr(match), action, + &op->nbrp->header_); - } -+ /* ICMP time exceeded */ -+ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { -+ ds_clear(match); -+ ds_clear(actions); -+ -+ ds_put_format(match, -+ "inport == %s && ip4 && " -+ "ip.ttl == {0, 1} && !ip.later_frag", op->json_key); -+ ds_put_format(actions, -+ "icmp4 {" -+ "eth.dst <-> eth.src; " -+ "icmp4.type = 11; /* Time exceeded */ " -+ "icmp4.code = 0; /* TTL exceeded in transit */ " -+ "ip4.dst = ip4.src; " -+ "ip4.src = %s; " -+ "ip.ttl = 255; " -+ "next; };", -+ op->lrp_networks.ipv4_addrs[i].addr_s); -+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40, -+ ds_cstr(match), ds_cstr(actions), -+ &op->nbrp->header_); - } +- } - if (!op->derived && op->od->l3redirect_port) { - const char *redirect_type = smap_get(&op->nbrp->options, @@ -10868,50 +15549,46 @@ index 5a3227568..c81e3220c 100644 - * is "bridged", instead of calling "get_arp" - * on this node, we will redirect the packet to gateway - * chassis, by setting destination mac router port mac.*/ -- ds_clear(match); + ds_clear(match); - ds_put_format(match, "outport == %s && " - "!is_chassis_resident(%s)", op->json_key, - op->od->l3redirect_port->json_key); - ds_clear(actions); - ds_put_format(actions, "eth.dst = %s; next;", - op->lrp_networks.ea_s); -+ /* ARP reply. These flows reply to ARP requests for the router's own -+ * IP address. */ -+ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { -+ ds_clear(match); -+ ds_put_format(match, "arp.spa == %s/%u", -+ op->lrp_networks.ipv4_addrs[i].network_s, -+ op->lrp_networks.ipv4_addrs[i].plen); ++ ds_put_format(match, ++ "ip6 && ip6.dst == %s && !ip.later_frag && udp", ++ op->lrp_networks.ipv6_addrs[i].addr_s); ++ action = "icmp6 {" ++ "eth.dst <-> eth.src; " ++ "ip6.dst <-> ip6.src; " ++ "ip.ttl = 255; " ++ "icmp6.type = 1; " ++ "icmp6.code = 4; " ++ "next; };"; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, ++ 80, ds_cstr(match), action, ++ &op->nbrp->header_); - ovn_lflow_add_with_hint(lflows, op->od, - S_ROUTER_IN_ARP_RESOLVE, 50, - ds_cstr(match), ds_cstr(actions), -- &op->nbrp->header_); -- } -- } -+ if (op->od->l3dgw_port && op->od->l3redirect_port && op->peer -+ && op->peer->od->n_localnet_ports) { -+ bool add_chassis_resident_check = false; -+ if (op == op->od->l3dgw_port) { -+ /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s -+ * should only be sent from the gateway chassis, so that -+ * upstream MAC learning points to the gateway chassis. -+ * Also need to avoid generation of multiple ARP responses -+ * from different chassis. */ -+ add_chassis_resident_check = true; -+ } else { -+ /* Check if the option 'reside-on-redirect-chassis' -+ * is set to true on the router port. If set to true -+ * and if peer's logical switch has a localnet port, it -+ * means the router pipeline for the packets from -+ * peer's logical switch is be run on the chassis -+ * hosting the gateway port and it should reply to the -+ * ARP requests for the router port IPs. -+ */ -+ add_chassis_resident_check = smap_get_bool( -+ &op->nbrp->options, -+ "reside-on-redirect-chassis", false); -+ } ++ ds_clear(match); ++ ds_put_format(match, ++ "ip6 && ip6.dst == %s && !ip.later_frag", ++ op->lrp_networks.ipv6_addrs[i].addr_s); ++ action = "icmp6 {" ++ "eth.dst <-> eth.src; " ++ "ip6.dst <-> ip6.src; " ++ "ip.ttl = 255; " ++ "icmp6.type = 1; " ++ "icmp6.code = 3; " ++ "next; };"; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, ++ 70, ds_cstr(match), action, + &op->nbrp->header_); + } + } - /* Drop IP traffic destined to router owned IPs. Part of it is dropped - * in stage "lr_in_ip_input" but traffic that could have been unSNATed @@ -10928,10 +15605,11 @@ index 5a3227568..c81e3220c 100644 - * the router ports attached to the switch (to which this port - * connects) and if the address in question is reachable from the - * router port, add an ARP/ND entry in that router's pipeline. */ -+ if (add_chassis_resident_check) { -+ ds_put_format(match, " && is_chassis_resident(%s)", -+ op->od->l3redirect_port->json_key); -+ } ++ /* ICMPv6 time exceeded */ ++ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { ++ /* skip link-local address */ ++ if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) { ++ continue; + } - for (size_t i = 0; i < op->n_lsp_addrs; i++) { @@ -10948,40 +15626,61 @@ index 5a3227568..c81e3220c 100644 - if (!peer_name) { - continue; - } -+ build_lrouter_arp_flow(op->od, op, -+ op->lrp_networks.ipv4_addrs[i].addr_s, -+ REG_INPORT_ETH_ADDR, match, false, 90, -+ &op->nbrp->header_, lflows); ++ ds_clear(match); ++ ds_clear(actions); ++ ++ ds_put_format(match, ++ "inport == %s && ip6 && " ++ "ip6.src == %s/%d && " ++ "ip.ttl == {0, 1} && !ip.later_frag", ++ op->json_key, ++ op->lrp_networks.ipv6_addrs[i].network_s, ++ op->lrp_networks.ipv6_addrs[i].plen); ++ ds_put_format(actions, ++ "icmp6 {" ++ "eth.dst <-> eth.src; " ++ "ip6.dst = ip6.src; " ++ "ip6.src = %s; " ++ "ip.ttl = 255; " ++ "icmp6.type = 3; /* Time exceeded */ " ++ "icmp6.code = 0; /* TTL exceeded in transit */ " ++ "next; };", ++ op->lrp_networks.ipv6_addrs[i].addr_s); ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbrp->header_); + } ++ } - struct ovn_port *peer = ovn_port_find(ports, peer_name); - if (!peer || !peer->nbrp) { - continue; - } -+ /* A set to hold all load-balancer vips that need ARP responses. */ -+ struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4); -+ struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6); -+ get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6); ++} - if (!find_lrp_member_ip(peer, ip_s)) { - continue; - } -+ const char *ip_address; -+ SSET_FOR_EACH (ip_address, &all_ips_v4) { -+ ds_clear(match); -+ if (op == op->od->l3dgw_port) { -+ ds_put_format(match, "is_chassis_resident(%s)", -+ op->od->l3redirect_port->json_key); -+ } ++static void ++build_lrouter_arp_nd_for_datapath(struct ovn_datapath *od, ++ struct hmap *lflows) ++{ ++ if (od->nbr) { - ds_clear(match); - ds_put_format(match, "outport == %s && " - REG_NEXT_HOP_IPV4 " == %s", - peer->json_key, ip_s); -+ build_lrouter_arp_flow(op->od, op, -+ ip_address, REG_INPORT_ETH_ADDR, -+ match, false, 90, NULL, lflows); -+ } ++ /* Priority-90-92 flows handle ARP requests and ND packets. Most are ++ * per logical port but DNAT addresses can be handled per datapath ++ * for non gateway router ports. ++ * ++ * Priority 91 and 92 flows are added for each gateway router ++ * port to handle the special cases. In case we get the packet ++ * on a regular port, just reply with the port's ETH address. ++ */ ++ for (int i = 0; i < od->nbr->n_nat; i++) { ++ struct ovn_nat *nat_entry = &od->nat_entries[i]; - ds_clear(actions); - ds_put_format(actions, "eth.dst = %s; next;", ea_s); @@ -10991,11 +15690,9 @@ index 5a3227568..c81e3220c 100644 - ds_cstr(actions), - &op->nbsp->header_); - } -+ SSET_FOR_EACH (ip_address, &all_ips_v6) { -+ ds_clear(match); -+ if (op == op->od->l3dgw_port) { -+ ds_put_format(match, "is_chassis_resident(%s)", -+ op->od->l3redirect_port->json_key); ++ /* Skip entries we failed to parse. */ ++ if (!nat_entry_is_valid(nat_entry)) { ++ continue; } - for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { @@ -11010,55 +15707,32 @@ index 5a3227568..c81e3220c 100644 - if (!peer_name) { - continue; - } -+ build_lrouter_nd_flow(op->od, op, "nd_na", -+ ip_address, NULL, REG_INPORT_ETH_ADDR, -+ match, false, 90, NULL, lflows); -+ } - +- - struct ovn_port *peer = ovn_port_find(ports, peer_name); - if (!peer || !peer->nbrp) { - continue; - } -+ sset_destroy(&all_ips_v4); -+ sset_destroy(&all_ips_v6); - +- - if (!find_lrp_member_ip(peer, ip_s)) { - continue; - } -+ if (!smap_get(&op->od->nbr->options, "chassis") -+ && !op->od->l3dgw_port) { -+ /* UDP/TCP/SCTP port unreachable. */ -+ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { -+ ds_clear(match); -+ ds_put_format(match, -+ "ip4 && ip4.dst == %s && !ip.later_frag && udp", -+ op->lrp_networks.ipv4_addrs[i].addr_s); -+ const char *action = "icmp4 {" -+ "eth.dst <-> eth.src; " -+ "ip4.dst <-> ip4.src; " -+ "ip.ttl = 255; " -+ "icmp4.type = 3; " -+ "icmp4.code = 3; " -+ "next; };"; -+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, -+ 80, ds_cstr(match), action, -+ &op->nbrp->header_); ++ /* Skip SNAT entries for now, we handle unique SNAT IPs separately ++ * below. ++ */ ++ if (!strcmp(nat_entry->nb->type, "snat")) { ++ continue; ++ } ++ build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows); ++ } - ds_clear(match); - ds_put_format(match, "outport == %s && " - REG_NEXT_HOP_IPV6 " == %s", - peer->json_key, ip_s); -+ ds_clear(match); -+ ds_put_format(match, -+ "ip4 && ip4.dst == %s && !ip.later_frag && tcp", -+ op->lrp_networks.ipv4_addrs[i].addr_s); -+ action = "tcp_reset {" -+ "eth.dst <-> eth.src; " -+ "ip4.dst <-> ip4.src; " -+ "next; };"; -+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, -+ 80, ds_cstr(match), action, -+ &op->nbrp->header_); ++ /* Now handle SNAT entries too, one per unique SNAT IP. */ ++ struct shash_node *snat_snode; ++ SHASH_FOR_EACH (snat_snode, &od->snat_ips) { ++ struct ovn_snat_ip *snat_ip = snat_snode->data; - ds_clear(actions); - ds_put_format(actions, "eth.dst = %s; next;", ea_s); @@ -11068,34 +15742,10 @@ index 5a3227568..c81e3220c 100644 - ds_cstr(actions), - &op->nbsp->header_); - } -+ ds_clear(match); -+ ds_put_format(match, -+ "ip4 && ip4.dst == %s && !ip.later_frag && sctp", -+ op->lrp_networks.ipv4_addrs[i].addr_s); -+ action = "sctp_abort {" -+ "eth.dst <-> eth.src; " -+ "ip4.dst <-> ip4.src; " -+ "next; };"; -+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, -+ 80, ds_cstr(match), action, -+ &op->nbrp->header_); -+ -+ ds_clear(match); -+ ds_put_format(match, -+ "ip4 && ip4.dst == %s && !ip.later_frag", -+ op->lrp_networks.ipv4_addrs[i].addr_s); -+ action = "icmp4 {" -+ "eth.dst <-> eth.src; " -+ "ip4.dst <-> ip4.src; " -+ "ip.ttl = 255; " -+ "icmp4.type = 3; " -+ "icmp4.code = 2; " -+ "next; };"; -+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, -+ 70, ds_cstr(match), action, -+ &op->nbrp->header_); ++ if (ovs_list_is_empty(&snat_ip->snat_entries)) { ++ continue; } - } +- } - } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp) - && !strcmp(op->nbsp->type, "virtual")) { - /* This is a virtual port. Add ARP replies for the virtual ip with @@ -11113,31 +15763,14 @@ index 5a3227568..c81e3220c 100644 - "virtual-parents"); - if (!vip || !virtual_parents || - !ip_parse(vip, &ip) || !op->sb) { -+ /* Drop IP traffic destined to router owned IPs except if the IP is -+ * also a SNAT IP. Those are dropped later, in stage -+ * "lr_in_arp_resolve", if unSNAT was unsuccessful. -+ * -+ * If op->pd->lb_force_snat_router_ip is true, it means the IP of the -+ * router port is also SNAT IP. -+ * -+ * Priority 60. -+ */ -+ if (!op->od->lb_force_snat_router_ip) { -+ build_lrouter_drop_own_dest(op, S_ROUTER_IN_IP_INPUT, 60, false, -+ lflows); -+ } -+ /* ARP / ND handling for external IP addresses. -+ * -+ * DNAT and SNAT IP addresses are external IP addresses that need ARP -+ * handling. -+ * -+ * These are already taken care globally, per router. The only -+ * exception is on the l3dgw_port where we might need to use a -+ * different ETH address. -+ */ -+ if (op != op->od->l3dgw_port) { - return; +- return; ++ struct ovn_nat *nat_entry = ++ CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries), ++ struct ovn_nat, ext_addr_list_node); ++ build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows); } ++ } ++} - if (!op->sb->virtual_parent || !op->sb->virtual_parent[0] || - !op->sb->chassis) { @@ -11154,14 +15787,41 @@ index 5a3227568..c81e3220c 100644 - if (!peer || !peer->nbrp) { - continue; - } -- ++/* Logical router ingress table 3: IP Input for IPv4. */ ++static void ++build_lrouter_ipv4_ip_input(struct ovn_port *op, ++ struct hmap *lflows, ++ struct ds *match, struct ds *actions) ++{ ++ /* No ingress packets are accepted on a chassisredirect ++ * port, so no need to program flows for that port. */ ++ if (op->nbrp && (!op->derived)) { ++ if (op->lrp_networks.n_ipv4_addrs) { ++ /* L3 admission control: drop packets that originate from an ++ * IPv4 address owned by the router or a broadcast address ++ * known to the router (priority 100). */ ++ ds_clear(match); ++ ds_put_cstr(match, "ip4.src == "); ++ op_put_v4_networks(match, op, true); ++ ds_put_cstr(match, " && "REGBIT_EGRESS_LOOPBACK" == 0"); ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, ++ ds_cstr(match), "drop;", ++ &op->nbrp->header_); + - if (find_lrp_member_ip(peer, vip)) { - ds_clear(match); - ds_put_format(match, "outport == %s && " - REG_NEXT_HOP_IPV4 " == %s", - peer->json_key, vip); -+ for (size_t i = 0; i < op->od->nbr->n_nat; i++) { -+ struct ovn_nat *nat_entry = &op->od->nat_entries[i]; ++ /* ICMP echo reply. These flows reply to ICMP echo requests ++ * received for the router's IP address. Since packets only ++ * get here as part of the logical router datapath, the inport ++ * (i.e. the incoming locally attached net) does not matter. ++ * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */ ++ ds_clear(match); ++ ds_put_cstr(match, "ip4.dst == "); ++ op_put_v4_networks(match, op, false); ++ ds_put_cstr(match, " && icmp4.type == 8 && icmp4.code == 0"); - const char *arp_actions = - "eth.dst = 00:00:00:00:00:00; next;"; @@ -11178,10 +15838,16 @@ index 5a3227568..c81e3220c 100644 - ovn_port_find(ports, op->sb->virtual_parent); - if (!vp || !vp->nbsp) { - return; -+ /* Skip entries we failed to parse. */ -+ if (!nat_entry_is_valid(nat_entry)) { -+ continue; - } +- } ++ const char * icmp_actions = "ip4.dst <-> ip4.src; " ++ "ip.ttl = 255; " ++ "icmp4.type = 0; " ++ "flags.loopback = 1; " ++ "next; "; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, ++ ds_cstr(match), icmp_actions, ++ &op->nbrp->header_); ++ } - for (size_t i = 0; i < vp->n_lsp_addrs; i++) { - bool found_vip_network = false; @@ -11196,43 +15862,51 @@ index 5a3227568..c81e3220c 100644 - if (!peer_name) { - continue; - } -+ /* Skip SNAT entries for now, we handle unique SNAT IPs separately -+ * below. -+ */ -+ if (!strcmp(nat_entry->nb->type, "snat")) { -+ continue; -+ } -+ build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows); -+ } ++ /* BFD msg handling */ ++ build_lrouter_bfd_flows(lflows, op); - struct ovn_port *peer = - ovn_port_find(ports, peer_name); - if (!peer || !peer->nbrp) { - continue; - } -+ /* Now handle SNAT entries too, one per unique SNAT IP. */ -+ struct shash_node *snat_snode; -+ SHASH_FOR_EACH (snat_snode, &op->od->snat_ips) { -+ struct ovn_snat_ip *snat_ip = snat_snode->data; ++ /* ICMP time exceeded */ ++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { ++ ds_clear(match); ++ ds_clear(actions); - if (!find_lrp_member_ip(peer, vip)) { - continue; - } -+ if (ovs_list_is_empty(&snat_ip->snat_entries)) { -+ continue; -+ } ++ ds_put_format(match, ++ "inport == %s && ip4 && " ++ "ip.ttl == {0, 1} && !ip.later_frag", op->json_key); ++ ds_put_format(actions, ++ "icmp4 {" ++ "eth.dst <-> eth.src; " ++ "icmp4.type = 11; /* Time exceeded */ " ++ "icmp4.code = 0; /* TTL exceeded in transit */ " ++ "ip4.dst = ip4.src; " ++ "ip4.src = %s; " ++ "ip.ttl = 255; " ++ "next; };", ++ op->lrp_networks.ipv4_addrs[i].addr_s); ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbrp->header_); ++ } - ds_clear(match); - ds_put_format(match, "outport == %s && " - REG_NEXT_HOP_IPV4 " == %s", - peer->json_key, vip); -+ struct ovn_nat *nat_entry = -+ CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries), -+ struct ovn_nat, ext_addr_list_node); -+ build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows); -+ } -+ } -+} ++ /* ARP reply. These flows reply to ARP requests for the router's own ++ * IP address. */ ++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { ++ ds_clear(match); ++ ds_put_format(match, "arp.spa == %s/%u", ++ op->lrp_networks.ipv4_addrs[i].network_s, ++ op->lrp_networks.ipv4_addrs[i].plen); - ds_clear(actions); - ds_put_format(actions, "eth.dst = %s; next;", ea_s); @@ -11243,67 +15917,88 @@ index 5a3227568..c81e3220c 100644 - &op->nbsp->header_); - found_vip_network = true; - break; -- } -+/* NAT, Defrag and load balancing. */ -+static void -+build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, -+ struct hmap *lflows, -+ struct shash *meter_groups, -+ struct hmap *lbs, -+ struct ds *match, struct ds *actions) -+{ -+ if (od->nbr) { ++ if (op->od->l3dgw_port && op->od->l3redirect_port && op->peer ++ && op->peer->od->n_localnet_ports) { ++ bool add_chassis_resident_check = false; ++ if (op == op->od->l3dgw_port) { ++ /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s ++ * should only be sent from the gateway chassis, so that ++ * upstream MAC learning points to the gateway chassis. ++ * Also need to avoid generation of multiple ARP responses ++ * from different chassis. */ ++ add_chassis_resident_check = true; ++ } else { ++ /* Check if the option 'reside-on-redirect-chassis' ++ * is set to true on the router port. If set to true ++ * and if peer's logical switch has a localnet port, it ++ * means the router pipeline for the packets from ++ * peer's logical switch is be run on the chassis ++ * hosting the gateway port and it should reply to the ++ * ARP requests for the router port IPs. ++ */ ++ add_chassis_resident_check = smap_get_bool( ++ &op->nbrp->options, ++ "reside-on-redirect-chassis", false); + } - if (found_vip_network) { - break; -- } -- } -- } ++ if (add_chassis_resident_check) { ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ op->od->l3redirect_port->json_key); + } + } ++ ++ build_lrouter_arp_flow(op->od, op, ++ op->lrp_networks.ipv4_addrs[i].addr_s, ++ REG_INPORT_ETH_ADDR, match, false, 90, ++ &op->nbrp->header_, lflows); + } - } else if (lsp_is_router(op->nbsp)) { - /* This is a logical switch port that connects to a router. */ -+ /* Packets are allowed by default. */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;"); -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;"); -+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;"); -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;"); -+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;"); -+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;"); -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;"); - /* The peer of this switch port is the router port for which - * we need to add logical flows such that it can resolve - * ARP entries for all the other router ports connected to - * the switch in question. */ -+ /* Send the IPv6 NS packets to next table. When ovn-controller -+ * generates IPv6 NS (for the action - nd_ns{}), the injected -+ * packet would go through conntrack - which is not required. */ -+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, "nd_ns", "next;"); ++ /* A set to hold all load-balancer vips that need ARP responses. */ ++ struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4); ++ struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6); ++ get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6); - const char *peer_name = smap_get(&op->nbsp->options, - "router-port"); - if (!peer_name) { -+ /* NAT rules are only valid on Gateway routers and routers with -+ * l3dgw_port (router has a port with gateway chassis -+ * specified). */ -+ if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { - return; - } +- return; +- } ++ const char *ip_address; ++ if (sset_count(&all_ips_v4)) { ++ ds_clear(match); ++ if (op == op->od->l3dgw_port) { ++ ds_put_format(match, "is_chassis_resident(%s)", ++ op->od->l3redirect_port->json_key); ++ } - struct ovn_port *peer = ovn_port_find(ports, peer_name); - if (!peer || !peer->nbrp) { - return; - } -+ struct sset nat_entries = SSET_INITIALIZER(&nat_entries); ++ struct ds load_balancer_ips_v4 = DS_EMPTY_INITIALIZER; - if (peer->od->nbr && - smap_get_bool(&peer->od->nbr->options, - "dynamic_neigh_routers", false)) { - return; -- } -+ bool dnat_force_snat_ip = -+ !lport_addresses_is_empty(&od->dnat_force_snat_addrs); -+ bool lb_force_snat_ip = -+ !lport_addresses_is_empty(&od->lb_force_snat_addrs); ++ /* For IPv4 we can just create one rule with all required IPs. */ ++ ds_put_cstr(&load_balancer_ips_v4, "{ "); ++ ds_put_and_free_cstr(&load_balancer_ips_v4, ++ sset_join(&all_ips_v4, ", ", " }")); ++ ++ build_lrouter_arp_flow(op->od, op, ds_cstr(&load_balancer_ips_v4), ++ REG_INPORT_ETH_ADDR, ++ match, false, 90, NULL, lflows); ++ ds_destroy(&load_balancer_ips_v4); + } - for (size_t i = 0; i < op->od->n_router_ports; i++) { - const char *router_port_name = smap_get( @@ -11312,57 +16007,50 @@ index 5a3227568..c81e3220c 100644 - struct ovn_port *router_port = ovn_port_find(ports, - router_port_name); - if (!router_port || !router_port->nbrp) { -+ for (int i = 0; i < od->nbr->n_nat; i++) { -+ const struct nbrec_nat *nat; -+ -+ nat = od->nbr->nat[i]; -+ -+ ovs_be32 ip, mask; -+ struct in6_addr ipv6, mask_v6, v6_exact = IN6ADDR_EXACT_INIT; -+ bool is_v6 = false; -+ bool stateless = lrouter_nat_is_stateless(nat); -+ struct nbrec_address_set *allowed_ext_ips = -+ nat->allowed_ext_ips; -+ struct nbrec_address_set *exempted_ext_ips = -+ nat->exempted_ext_ips; -+ -+ if (allowed_ext_ips && exempted_ext_ips) { -+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); -+ VLOG_WARN_RL(&rl, "NAT rule: "UUID_FMT" not applied, since " -+ "both allowed and exempt external ips set", -+ UUID_ARGS(&(nat->header_.uuid))); - continue; +- continue; ++ SSET_FOR_EACH (ip_address, &all_ips_v6) { ++ ds_clear(match); ++ if (op == op->od->l3dgw_port) { ++ ds_put_format(match, "is_chassis_resident(%s)", ++ op->od->l3redirect_port->json_key); } - /* Skip the router port under consideration. */ - if (router_port == peer) { - continue; -+ char *error = ip_parse_masked(nat->external_ip, &ip, &mask); -+ if (error || mask != OVS_BE32_MAX) { -+ free(error); -+ error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6); -+ if (error || memcmp(&mask_v6, &v6_exact, sizeof(mask_v6))) { -+ /* Invalid for both IPv4 and IPv6 */ -+ static struct vlog_rate_limit rl = -+ VLOG_RATE_LIMIT_INIT(5, 1); -+ VLOG_WARN_RL(&rl, "bad external ip %s for nat", -+ nat->external_ip); -+ free(error); -+ continue; -+ } -+ /* It was an invalid IPv4 address, but valid IPv6. -+ * Treat the rest of the handling of this NAT rule -+ * as IPv6. */ -+ is_v6 = true; - } +- } ++ build_lrouter_nd_flow(op->od, op, "nd_na", ++ ip_address, NULL, REG_INPORT_ETH_ADDR, ++ match, false, 90, NULL, lflows); ++ } - if (router_port->lrp_networks.n_ipv4_addrs) { -- ds_clear(match); ++ sset_destroy(&all_ips_v4); ++ sset_destroy(&all_ips_v6); ++ ++ if (!smap_get(&op->od->nbr->options, "chassis") ++ && !op->od->l3dgw_port) { ++ /* UDP/TCP/SCTP port unreachable. */ ++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { + ds_clear(match); - ds_put_format(match, "outport == %s && " - REG_NEXT_HOP_IPV4 " == ", - peer->json_key); - op_put_v4_networks(match, router_port, false); -- ++ ds_put_format(match, ++ "ip4 && ip4.dst == %s && !ip.later_frag && udp", ++ op->lrp_networks.ipv4_addrs[i].addr_s); ++ const char *action = "icmp4 {" ++ "eth.dst <-> eth.src; " ++ "ip4.dst <-> ip4.src; " ++ "ip.ttl = 255; " ++ "icmp4.type = 3; " ++ "icmp4.code = 3; " ++ "next; };"; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, ++ 80, ds_cstr(match), action, ++ &op->nbrp->header_); + - ds_clear(actions); - ds_put_format(actions, "eth.dst = %s; next;", - router_port->lrp_networks.ea_s); @@ -11370,48 +16058,36 @@ index 5a3227568..c81e3220c 100644 - S_ROUTER_IN_ARP_RESOLVE, 100, - ds_cstr(match), ds_cstr(actions), - &op->nbsp->header_); -+ /* Check the validity of nat->logical_ip. 'logical_ip' can -+ * be a subnet when the type is "snat". */ -+ int cidr_bits; -+ if (is_v6) { -+ error = ipv6_parse_masked(nat->logical_ip, &ipv6, &mask_v6); -+ cidr_bits = ipv6_count_cidr_bits(&mask_v6); -+ } else { -+ error = ip_parse_masked(nat->logical_ip, &ip, &mask); -+ cidr_bits = ip_count_cidr_bits(mask); -+ } -+ if (!strcmp(nat->type, "snat")) { -+ if (error) { -+ /* Invalid for both IPv4 and IPv6 */ -+ static struct vlog_rate_limit rl = -+ VLOG_RATE_LIMIT_INIT(5, 1); -+ VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat " -+ "in router "UUID_FMT"", -+ nat->logical_ip, UUID_ARGS(&od->key)); -+ free(error); -+ continue; -+ } -+ } else { -+ if (error || (!is_v6 && mask != OVS_BE32_MAX) -+ || (is_v6 && memcmp(&mask_v6, &v6_exact, -+ sizeof mask_v6))) { -+ /* Invalid for both IPv4 and IPv6 */ -+ static struct vlog_rate_limit rl = -+ VLOG_RATE_LIMIT_INIT(5, 1); -+ VLOG_WARN_RL(&rl, "bad ip %s for dnat in router " -+ ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key)); -+ free(error); -+ continue; -+ } - } +- } ++ ds_clear(match); ++ ds_put_format(match, ++ "ip4 && ip4.dst == %s && !ip.later_frag && tcp", ++ op->lrp_networks.ipv4_addrs[i].addr_s); ++ action = "tcp_reset {" ++ "eth.dst <-> eth.src; " ++ "ip4.dst <-> ip4.src; " ++ "next; };"; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, ++ 80, ds_cstr(match), action, ++ &op->nbrp->header_); - if (router_port->lrp_networks.n_ipv6_addrs) { -- ds_clear(match); + ds_clear(match); - ds_put_format(match, "outport == %s && " - REG_NEXT_HOP_IPV6 " == ", - peer->json_key); - op_put_v6_networks(match, router_port); -- ++ ds_put_format(match, ++ "ip4 && ip4.dst == %s && !ip.later_frag && sctp", ++ op->lrp_networks.ipv4_addrs[i].addr_s); ++ action = "sctp_abort {" ++ "eth.dst <-> eth.src; " ++ "ip4.dst <-> ip4.src; " ++ "next; };"; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, ++ 80, ds_cstr(match), action, ++ &op->nbrp->header_); + - ds_clear(actions); - ds_put_format(actions, "eth.dst = %s; next;", - router_port->lrp_networks.ea_s); @@ -11419,50 +16095,25 @@ index 5a3227568..c81e3220c 100644 - S_ROUTER_IN_ARP_RESOLVE, 100, - ds_cstr(match), ds_cstr(actions), - &op->nbsp->header_); -+ /* For distributed router NAT, determine whether this NAT rule -+ * satisfies the conditions for distributed NAT processing. */ -+ bool distributed = false; -+ struct eth_addr mac; -+ if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") && -+ nat->logical_port && nat->external_mac) { -+ if (eth_addr_from_string(nat->external_mac, &mac)) { -+ distributed = true; -+ } else { -+ static struct vlog_rate_limit rl = -+ VLOG_RATE_LIMIT_INIT(5, 1); -+ VLOG_WARN_RL(&rl, "bad mac %s for dnat in router " -+ ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key)); -+ continue; -+ } ++ ds_clear(match); ++ ds_put_format(match, ++ "ip4 && ip4.dst == %s && !ip.later_frag", ++ op->lrp_networks.ipv4_addrs[i].addr_s); ++ action = "icmp4 {" ++ "eth.dst <-> eth.src; " ++ "ip4.dst <-> ip4.src; " ++ "ip.ttl = 255; " ++ "icmp4.type = 3; " ++ "icmp4.code = 2; " ++ "next; };"; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, ++ 70, ds_cstr(match), action, ++ &op->nbrp->header_); } -- } + } - } - +- -} -+ /* Ingress UNSNAT table: It is for already established connections' -+ * reverse traffic. i.e., SNAT has already been done in egress -+ * pipeline and now the packet has entered the ingress pipeline as -+ * part of a reply. We undo the SNAT here. -+ * -+ * Undoing SNAT has to happen before DNAT processing. This is -+ * because when the packet was DNATed in ingress pipeline, it did -+ * not know about the possibility of eventual additional SNAT in -+ * egress pipeline. */ -+ if (!strcmp(nat->type, "snat") -+ || !strcmp(nat->type, "dnat_and_snat")) { -+ if (!od->l3dgw_port) { -+ /* Gateway router. */ -+ ds_clear(match); -+ ds_clear(actions); -+ ds_put_format(match, "ip && ip%s.dst == %s", -+ is_v6 ? "6" : "4", -+ nat->external_ip); -+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { -+ ds_put_format(actions, "ip%s.dst=%s; next;", -+ is_v6 ? "6" : "4", nat->logical_ip); -+ } else { -+ ds_put_cstr(actions, "ct_snat;"); -+ } -/* Local router ingress table CHK_PKT_LEN: Check packet length. - * @@ -11484,32 +16135,39 @@ index 5a3227568..c81e3220c 100644 - struct ds *match, struct ds *actions) -{ - if (od->nbr) { -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, -+ 90, ds_cstr(match), -+ ds_cstr(actions), -+ &nat->header_); -+ } else { -+ /* Distributed router. */ ++ /* Drop IP traffic destined to router owned IPs except if the IP is ++ * also a SNAT IP. Those are dropped later, in stage ++ * "lr_in_arp_resolve", if unSNAT was unsuccessful. ++ * ++ * If op->pd->lb_force_snat_router_ip is true, it means the IP of the ++ * router port is also SNAT IP. ++ * ++ * Priority 60. ++ */ ++ if (!op->od->lb_force_snat_router_ip) { ++ build_lrouter_drop_own_dest(op, S_ROUTER_IN_IP_INPUT, 60, false, ++ lflows); ++ } ++ /* ARP / ND handling for external IP addresses. ++ * ++ * DNAT and SNAT IP addresses are external IP addresses that need ARP ++ * handling. ++ * ++ * These are already taken care globally, per router. The only ++ * exception is on the l3dgw_port where we might need to use a ++ * different ETH address. ++ */ ++ if (op != op->od->l3dgw_port) { ++ return; ++ } - /* Packets are allowed by default. */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 0, "1", - "next;"); - ovn_lflow_add(lflows, od, S_ROUTER_IN_LARGER_PKTS, 0, "1", - "next;"); -+ /* Traffic received on l3dgw_port is subject to NAT. */ -+ ds_clear(match); -+ ds_clear(actions); -+ ds_put_format(match, "ip && ip%s.dst == %s" -+ " && inport == %s", -+ is_v6 ? "6" : "4", -+ nat->external_ip, -+ od->l3dgw_port->json_key); -+ if (!distributed && od->l3redirect_port) { -+ /* Flows for NAT rules that are centralized are only -+ * programmed on the gateway chassis. */ -+ ds_put_format(match, " && is_chassis_resident(%s)", -+ od->l3redirect_port->json_key); -+ } ++ for (size_t i = 0; i < op->od->nbr->n_nat; i++) { ++ struct ovn_nat *nat_entry = &op->od->nat_entries[i]; - if (od->l3dgw_port && od->l3redirect_port) { - int gw_mtu = 0; @@ -11520,50 +16178,14 @@ index 5a3227568..c81e3220c 100644 - /* Add the flows only if gateway_mtu is configured. */ - if (gw_mtu <= 0) { - return; -+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { -+ ds_put_format(actions, "ip%s.dst=%s; next;", -+ is_v6 ? "6" : "4", nat->logical_ip); -+ } else { -+ ds_put_cstr(actions, "ct_snat;"); -+ } -+ -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, -+ 100, -+ ds_cstr(match), ds_cstr(actions), -+ &nat->header_); -+ } ++ /* Skip entries we failed to parse. */ ++ if (!nat_entry_is_valid(nat_entry)) { ++ continue; } - ds_clear(match); - ds_put_format(match, "outport == %s", od->l3dgw_port->json_key); -+ /* Ingress DNAT table: Packets enter the pipeline with destination -+ * IP address that needs to be DNATted from a external IP address -+ * to a logical IP address. */ -+ if (!strcmp(nat->type, "dnat") -+ || !strcmp(nat->type, "dnat_and_snat")) { -+ if (!od->l3dgw_port) { -+ /* Gateway router. */ -+ /* Packet when it goes from the initiator to destination. -+ * We need to set flags.loopback because the router can -+ * send the packet back through the same interface. */ -+ ds_clear(match); -+ ds_put_format(match, "ip && ip%s.dst == %s", -+ is_v6 ? "6" : "4", -+ nat->external_ip); -+ ds_clear(actions); -+ if (allowed_ext_ips || exempted_ext_ips) { -+ lrouter_nat_add_ext_ip_match(od, lflows, match, nat, -+ is_v6, true, mask); -+ } -+ -+ if (dnat_force_snat_ip) { -+ /* Indicate to the future tables that a DNAT has taken -+ * place and a force SNAT needs to be done in the -+ * Egress SNAT table. */ -+ ds_put_format(actions, -+ "flags.force_snat_for_dnat = 1; "); -+ } - +- - ds_clear(actions); - ds_put_format(actions, - REGBIT_PKT_LARGER" = check_pkt_larger(%d);" @@ -11571,52 +16193,21 @@ index 5a3227568..c81e3220c 100644 - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 50, - ds_cstr(match), ds_cstr(actions), - &od->l3dgw_port->nbrp->header_); -+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { -+ ds_put_format(actions, "flags.loopback = 1; " -+ "ip%s.dst=%s; next;", -+ is_v6 ? "6" : "4", nat->logical_ip); -+ } else { -+ ds_put_format(actions, "flags.loopback = 1; " -+ "ct_dnat(%s", nat->logical_ip); - +- - for (size_t i = 0; i < od->nbr->n_ports; i++) { - struct ovn_port *rp = ovn_port_find(ports, - od->nbr->ports[i]->name); - if (!rp || rp == od->l3dgw_port) { - continue; - } -+ if (nat->external_port_range[0]) { -+ ds_put_format(actions, ",%s", -+ nat->external_port_range); -+ } -+ ds_put_format(actions, ");"); -+ } - +- - if (rp->lrp_networks.ipv4_addrs) { - ds_clear(match); - ds_put_format(match, "inport == %s && outport == %s" - " && ip4 && "REGBIT_PKT_LARGER, - rp->json_key, od->l3dgw_port->json_key); -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100, -+ ds_cstr(match), ds_cstr(actions), -+ &nat->header_); -+ } else { -+ /* Distributed router. */ - -+ /* Traffic received on l3dgw_port is subject to NAT. */ -+ ds_clear(match); -+ ds_put_format(match, "ip && ip%s.dst == %s" -+ " && inport == %s", -+ is_v6 ? "6" : "4", -+ nat->external_ip, -+ od->l3dgw_port->json_key); -+ if (!distributed && od->l3redirect_port) { -+ /* Flows for NAT rules that are centralized are only -+ * programmed on the gateway chassis. */ -+ ds_put_format(match, " && is_chassis_resident(%s)", -+ od->l3redirect_port->json_key); -+ } - ds_clear(actions); +- +- ds_clear(actions); - /* Set icmp4.frag_mtu to gw_mtu */ - ds_put_format(actions, - "icmp4_error {" @@ -11635,55 +16226,29 @@ index 5a3227568..c81e3220c 100644 - ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); - ovn_lflow_add_with_hint(lflows, od, - S_ROUTER_IN_LARGER_PKTS, 50, -+ if (allowed_ext_ips || exempted_ext_ips) { -+ lrouter_nat_add_ext_ip_match(od, lflows, match, nat, -+ is_v6, true, mask); -+ } -+ -+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { -+ ds_put_format(actions, "ip%s.dst=%s; next;", -+ is_v6 ? "6" : "4", nat->logical_ip); -+ } else { -+ ds_put_format(actions, "ct_dnat(%s", nat->logical_ip); -+ if (nat->external_port_range[0]) { -+ ds_put_format(actions, ",%s", -+ nat->external_port_range); -+ } -+ ds_put_format(actions, ");"); -+ } -+ -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100, - ds_cstr(match), ds_cstr(actions), +- ds_cstr(match), ds_cstr(actions), - &rp->nbrp->header_); -+ &nat->header_); - } +- } ++ /* Skip SNAT entries for now, we handle unique SNAT IPs separately ++ * below. ++ */ ++ if (!strcmp(nat_entry->nb->type, "snat")) { ++ continue; + } ++ build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows); ++ } - if (rp->lrp_networks.ipv6_addrs) { -+ /* ARP resolve for NAT IPs. */ -+ if (od->l3dgw_port) { -+ if (!strcmp(nat->type, "snat")) { - ds_clear(match); +- ds_clear(match); - ds_put_format(match, "inport == %s && outport == %s" - " && ip6 && "REGBIT_PKT_LARGER, - rp->json_key, od->l3dgw_port->json_key); -+ ds_put_format( -+ match, "inport == %s && %s == %s", -+ od->l3dgw_port->json_key, -+ is_v6 ? "ip6.src" : "ip4.src", nat->external_ip); -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT, -+ 120, ds_cstr(match), "next;", -+ &nat->header_); -+ } ++ /* Now handle SNAT entries too, one per unique SNAT IP. */ ++ struct shash_node *snat_snode; ++ SHASH_FOR_EACH (snat_snode, &op->od->snat_ips) { ++ struct ovn_snat_ip *snat_ip = snat_snode->data; -+ if (!sset_contains(&nat_entries, nat->external_ip)) { -+ ds_clear(match); -+ ds_put_format( -+ match, "outport == %s && %s == %s", -+ od->l3dgw_port->json_key, -+ is_v6 ? REG_NEXT_HOP_IPV6 : REG_NEXT_HOP_IPV4, -+ nat->external_ip); - ds_clear(actions); +- ds_clear(actions); - /* Set icmp6.frag_mtu to gw_mtu */ - ds_put_format(actions, - "icmp6_error {" @@ -11700,30 +16265,23 @@ index 5a3227568..c81e3220c 100644 - rp->lrp_networks.ipv6_addrs[0].addr_s, - gw_mtu, - ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); -+ ds_put_format( -+ actions, "eth.dst = %s; next;", -+ distributed ? nat->external_mac : -+ od->l3dgw_port->lrp_networks.ea_s); - ovn_lflow_add_with_hint(lflows, od, +- ovn_lflow_add_with_hint(lflows, od, - S_ROUTER_IN_LARGER_PKTS, 50, - ds_cstr(match), ds_cstr(actions), - &rp->nbrp->header_); -+ S_ROUTER_IN_ARP_RESOLVE, -+ 100, ds_cstr(match), -+ ds_cstr(actions), -+ &nat->header_); -+ sset_add(&nat_entries, nat->external_ip); - } -+ } else { -+ /* Add the NAT external_ip to the nat_entries even for -+ * gateway routers. This is required for adding load balancer -+ * flows.*/ -+ sset_add(&nat_entries, nat->external_ip); +- } ++ if (ovs_list_is_empty(&snat_ip->snat_entries)) { ++ continue; } -- } -- } --} -- ++ ++ struct ovn_nat *nat_entry = ++ CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries), ++ struct ovn_nat, ext_addr_list_node); ++ build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows); + } + } + } + -/* Logical router ingress table GW_REDIRECT: Gateway redirect. - * - * For traffic with outport equal to the l3dgw_port @@ -11731,19 +16289,47 @@ index 5a3227568..c81e3220c 100644 - * of the traffic to the l3redirect_port which represents - * the central instance of the l3dgw_port. - */ --static void + static void -build_gateway_redirect_flows_for_lrouter( - struct ovn_datapath *od, struct hmap *lflows, - struct ds *match, struct ds *actions) --{ ++build_lrouter_in_unsnat_flow(struct hmap *lflows, struct ovn_datapath *od, ++ const struct nbrec_nat *nat, struct ds *match, ++ struct ds *actions, bool distributed, bool is_v6) + { - if (od->nbr) { - if (od->l3dgw_port && od->l3redirect_port) { - const struct ovsdb_idl_row *stage_hint = NULL; -- ++ /* Ingress UNSNAT table: It is for already established connections' ++ * reverse traffic. i.e., SNAT has already been done in egress ++ * pipeline and now the packet has entered the ingress pipeline as ++ * part of a reply. We undo the SNAT here. ++ * ++ * Undoing SNAT has to happen before DNAT processing. This is ++ * because when the packet was DNATed in ingress pipeline, it did ++ * not know about the possibility of eventual additional SNAT in ++ * egress pipeline. */ ++ if (strcmp(nat->type, "snat") && strcmp(nat->type, "dnat_and_snat")) { ++ return; ++ } + - if (od->l3dgw_port->nbrp) { - stage_hint = &od->l3dgw_port->nbrp->header_; - } -- ++ bool stateless = lrouter_nat_is_stateless(nat); ++ if (!od->l3dgw_port) { ++ /* Gateway router. */ ++ ds_clear(match); ++ ds_clear(actions); ++ ds_put_format(match, "ip && ip%s.dst == %s", ++ is_v6 ? "6" : "4", nat->external_ip); ++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { ++ ds_put_format(actions, "ip%s.dst=%s; next;", ++ is_v6 ? "6" : "4", nat->logical_ip); ++ } else { ++ ds_put_cstr(actions, "ct_snat;"); ++ } + - /* For traffic with outport == l3dgw_port, if the - * packet did not match any higher priority redirect - * rule, then the traffic is redirected to the central @@ -11753,63 +16339,65 @@ index 5a3227568..c81e3220c 100644 - od->l3dgw_port->json_key); - ds_clear(actions); - ds_put_format(actions, "outport = %s; next;", -- od->l3redirect_port->json_key); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, ++ 90, ds_cstr(match), ds_cstr(actions), ++ &nat->header_); ++ } else { ++ /* Distributed router. */ ++ ++ /* Traffic received on l3dgw_port is subject to NAT. */ ++ ds_clear(match); ++ ds_clear(actions); ++ ds_put_format(match, "ip && ip%s.dst == %s && inport == %s", ++ is_v6 ? "6" : "4", nat->external_ip, ++ od->l3dgw_port->json_key); ++ if (!distributed && od->l3redirect_port) { ++ /* Flows for NAT rules that are centralized are only ++ * programmed on the gateway chassis. */ ++ ds_put_format(match, " && is_chassis_resident(%s)", + od->l3redirect_port->json_key); - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50, - ds_cstr(match), ds_cstr(actions), - stage_hint); -- } + } - /* Packets are allowed by default. */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;"); -- } --} -+ /* Egress UNDNAT table: It is for already established connections' -+ * reverse traffic. i.e., DNAT has already been done in ingress -+ * pipeline and now the packet has entered the egress pipeline as -+ * part of a reply. We undo the DNAT here. -+ * -+ * Note that this only applies for NAT on a distributed router. -+ * Undo DNAT on a gateway router is done in the ingress DNAT -+ * pipeline stage. */ -+ if (od->l3dgw_port && (!strcmp(nat->type, "dnat") -+ || !strcmp(nat->type, "dnat_and_snat"))) { -+ ds_clear(match); -+ ds_put_format(match, "ip && ip%s.src == %s" -+ " && outport == %s", -+ is_v6 ? "6" : "4", -+ nat->logical_ip, -+ od->l3dgw_port->json_key); -+ if (!distributed && od->l3redirect_port) { -+ /* Flows for NAT rules that are centralized are only -+ * programmed on the gateway chassis. */ -+ ds_put_format(match, " && is_chassis_resident(%s)", -+ od->l3redirect_port->json_key); -+ } -+ ds_clear(actions); -+ if (distributed) { -+ ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ", -+ ETH_ADDR_ARGS(mac)); -+ } ++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { ++ ds_put_format(actions, "ip%s.dst=%s; next;", ++ is_v6 ? "6" : "4", nat->logical_ip); ++ } else { ++ ds_put_cstr(actions, "ct_snat;"); ++ } ++ ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, ++ 100, ds_cstr(match), ds_cstr(actions), ++ &nat->header_); + } + } -/* Local router ingress table ARP_REQUEST: ARP request. - * - * In the common case where the Ethernet destination has been resolved, - * this table outputs the packet (priority 0). Otherwise, it composes - * and sends an ARP/IPv6 NA request (priority 100). */ --static void + static void -build_arp_request_flows_for_lrouter( - struct ovn_datapath *od, struct hmap *lflows, - struct ds *match, struct ds *actions) --{ ++build_lrouter_in_dnat_flow(struct hmap *lflows, struct ovn_datapath *od, ++ const struct nbrec_nat *nat, struct ds *match, ++ struct ds *actions, bool distributed, ++ ovs_be32 mask, bool is_v6) + { - if (od->nbr) { - for (int i = 0; i < od->nbr->n_static_routes; i++) { - const struct nbrec_logical_router_static_route *route; -+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { -+ ds_put_format(actions, "ip%s.src=%s; next;", -+ is_v6 ? "6" : "4", nat->external_ip); -+ } else { -+ ds_put_format(actions, "ct_dnat;"); -+ } ++ /* Ingress DNAT table: Packets enter the pipeline with destination ++ * IP address that needs to be DNATted from a external IP address ++ * to a logical IP address. */ ++ if (!strcmp(nat->type, "dnat") || !strcmp(nat->type, "dnat_and_snat")) { ++ bool stateless = lrouter_nat_is_stateless(nat); - route = od->nbr->static_routes[i]; - struct in6_addr gw_ip6; @@ -11818,9 +16406,18 @@ index 5a3227568..c81e3220c 100644 - if (error || plen != 128) { - free(error); - continue; -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 100, -+ ds_cstr(match), ds_cstr(actions), -+ &nat->header_); ++ if (!od->l3dgw_port) { ++ /* Gateway router. */ ++ /* Packet when it goes from the initiator to destination. ++ * We need to set flags.loopback because the router can ++ * send the packet back through the same interface. */ ++ ds_clear(match); ++ ds_put_format(match, "ip && ip%s.dst == %s", ++ is_v6 ? "6" : "4", nat->external_ip); ++ ds_clear(actions); ++ if (nat->allowed_ext_ips || nat->exempted_ext_ips) { ++ lrouter_nat_add_ext_ip_match(od, lflows, match, nat, ++ is_v6, true, mask); } - ds_clear(match); @@ -11831,11 +16428,47 @@ index 5a3227568..c81e3220c 100644 - struct eth_addr eth_dst; - in6_addr_solicited_node(&sn_addr, &gw_ip6); - ipv6_multicast_to_ethernet(ð_dst, &sn_addr); -- ++ if (!lport_addresses_is_empty(&od->dnat_force_snat_addrs)) { ++ /* Indicate to the future tables that a DNAT has taken ++ * place and a force SNAT needs to be done in the ++ * Egress SNAT table. */ ++ ds_put_format(actions, "flags.force_snat_for_dnat = 1; "); ++ } + - char sn_addr_s[INET6_ADDRSTRLEN + 1]; - ipv6_string_mapped(sn_addr_s, &sn_addr); -- -- ds_clear(actions); ++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { ++ ds_put_format(actions, "flags.loopback = 1; " ++ "ip%s.dst=%s; next;", ++ is_v6 ? "6" : "4", nat->logical_ip); ++ } else { ++ ds_put_format(actions, "flags.loopback = 1; ct_dnat(%s", ++ nat->logical_ip); ++ ++ if (nat->external_port_range[0]) { ++ ds_put_format(actions, ",%s", nat->external_port_range); ++ } ++ ds_put_format(actions, ");"); ++ } + ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100, ++ ds_cstr(match), ds_cstr(actions), ++ &nat->header_); ++ } else { ++ /* Distributed router. */ ++ ++ /* Traffic received on l3dgw_port is subject to NAT. */ ++ ds_clear(match); ++ ds_put_format(match, "ip && ip%s.dst == %s && inport == %s", ++ is_v6 ? "6" : "4", nat->external_ip, ++ od->l3dgw_port->json_key); ++ if (!distributed && od->l3redirect_port) { ++ /* Flows for NAT rules that are centralized are only ++ * programmed on the gateway chassis. */ ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ od->l3redirect_port->json_key); ++ } + ds_clear(actions); - ds_put_format(actions, - "nd_ns { " - "eth.dst = "ETH_ADDR_FMT"; " @@ -11844,11 +16477,28 @@ index 5a3227568..c81e3220c 100644 - "output; " - "};", ETH_ADDR_ARGS(eth_dst), sn_addr_s, - route->nexthop); -- ++ if (nat->allowed_ext_ips || nat->exempted_ext_ips) { ++ lrouter_nat_add_ext_ip_match(od, lflows, match, nat, ++ is_v6, true, mask); ++ } + - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ARP_REQUEST, 200, -- ds_cstr(match), ds_cstr(actions), ++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { ++ ds_put_format(actions, "ip%s.dst=%s; next;", ++ is_v6 ? "6" : "4", nat->logical_ip); ++ } else { ++ ds_put_format(actions, "ct_dnat(%s", nat->logical_ip); ++ if (nat->external_port_range[0]) { ++ ds_put_format(actions, ",%s", nat->external_port_range); ++ } ++ ds_put_format(actions, ");"); ++ } ++ ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100, + ds_cstr(match), ds_cstr(actions), - &route->header_); -- } ++ &nat->header_); + } - - ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100, - "eth.dst == 00:00:00:00:00:00 && ip4", @@ -11866,20 +16516,8 @@ index 5a3227568..c81e3220c 100644 - "output; " - "};"); - ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;"); -- } --} -+ /* Egress SNAT table: Packets enter the egress pipeline with -+ * source ip address that needs to be SNATted to a external ip -+ * address. */ -+ if (!strcmp(nat->type, "snat") -+ || !strcmp(nat->type, "dnat_and_snat")) { -+ if (!od->l3dgw_port) { -+ /* Gateway router. */ -+ ds_clear(match); -+ ds_put_format(match, "ip && ip%s.src == %s", -+ is_v6 ? "6" : "4", -+ nat->logical_ip); -+ ds_clear(actions); + } + } -/* Logical router egress table DELIVERY: Delivery (priority 100-110). - * @@ -11888,22 +16526,22 @@ index 5a3227568..c81e3220c 100644 - * mac before delivering to enabled logical ports. IP multicast traffic - * bypasses S_ROUTER_IN_IP_ROUTING route lookups. - */ --static void + static void -build_egress_delivery_flows_for_lrouter_port( - struct ovn_port *op, struct hmap *lflows, - struct ds *match, struct ds *actions) --{ ++build_lrouter_out_undnat_flow(struct hmap *lflows, struct ovn_datapath *od, ++ const struct nbrec_nat *nat, struct ds *match, ++ struct ds *actions, bool distributed, ++ struct eth_addr mac, bool is_v6) + { - if (op->nbrp) { - if (!lrport_is_enabled(op->nbrp)) { - /* Drop packets to disabled logical ports (since logical flow - * tables are default-drop). */ - return; - } -+ if (allowed_ext_ips || exempted_ext_ips) { -+ lrouter_nat_add_ext_ip_match(od, lflows, match, nat, -+ is_v6, false, mask); -+ } - +- - if (op->derived) { - /* No egress packets should be processed in the context of - * a chassisredirect port. The chassisredirect port should @@ -11911,12 +16549,18 @@ index 5a3227568..c81e3220c 100644 - * pipeline stage before egress processing. */ - return; - } -+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { -+ ds_put_format(actions, "ip%s.src=%s; next;", -+ is_v6 ? "6" : "4", nat->external_ip); -+ } else { -+ ds_put_format(actions, "ct_snat(%s", -+ nat->external_ip); ++ /* Egress UNDNAT table: It is for already established connections' ++ * reverse traffic. i.e., DNAT has already been done in ingress ++ * pipeline and now the packet has entered the egress pipeline as ++ * part of a reply. We undo the DNAT here. ++ * ++ * Note that this only applies for NAT on a distributed router. ++ * Undo DNAT on a gateway router is done in the ingress DNAT ++ * pipeline stage. */ ++ if (!od->l3dgw_port || ++ (strcmp(nat->type, "dnat") && strcmp(nat->type, "dnat_and_snat"))) { ++ return; ++ } - /* If multicast relay is enabled then also adjust source mac for IP - * multicast traffic. @@ -11931,49 +16575,48 @@ index 5a3227568..c81e3220c 100644 - ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 110, - ds_cstr(match), ds_cstr(actions)); - } -+ if (nat->external_port_range[0]) { -+ ds_put_format(actions, ",%s", -+ nat->external_port_range); -+ } -+ ds_put_format(actions, ");"); -+ } ++ ds_clear(match); ++ ds_put_format(match, "ip && ip%s.src == %s && outport == %s", ++ is_v6 ? "6" : "4", nat->logical_ip, ++ od->l3dgw_port->json_key); ++ if (!distributed && od->l3redirect_port) { ++ /* Flows for NAT rules that are centralized are only ++ * programmed on the gateway chassis. */ ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ od->l3redirect_port->json_key); ++ } ++ ds_clear(actions); ++ if (distributed) { ++ ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ", ++ ETH_ADDR_ARGS(mac)); ++ } - ds_clear(match); - ds_put_format(match, "outport == %s", op->json_key); - ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100, - ds_cstr(match), "output;"); -- } -+ /* The priority here is calculated such that the -+ * nat->logical_ip with the longest mask gets a higher -+ * priority. */ -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT, -+ cidr_bits + 1, -+ ds_cstr(match), ds_cstr(actions), -+ &nat->header_); -+ } else { -+ uint16_t priority = cidr_bits + 1; ++ if (!strcmp(nat->type, "dnat_and_snat") && ++ lrouter_nat_is_stateless(nat)) { ++ ds_put_format(actions, "ip%s.src=%s; next;", ++ is_v6 ? "6" : "4", nat->external_ip); ++ } else { ++ ds_put_format(actions, "ct_dnat;"); + } --} -+ /* Distributed router. */ -+ ds_clear(match); -+ ds_put_format(match, "ip && ip%s.src == %s" -+ " && outport == %s", -+ is_v6 ? "6" : "4", -+ nat->logical_ip, -+ od->l3dgw_port->json_key); -+ if (!distributed && od->l3redirect_port) { -+ /* Flows for NAT rules that are centralized are only -+ * programmed on the gateway chassis. */ -+ priority += 128; -+ ds_put_format(match, " && is_chassis_resident(%s)", -+ od->l3redirect_port->json_key); -+ } -+ ds_clear(actions); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 100, ++ ds_cstr(match), ds_cstr(actions), ++ &nat->header_); + } --static void + static void -build_misc_local_traffic_drop_flows_for_lrouter( - struct ovn_datapath *od, struct hmap *lflows) --{ ++build_lrouter_out_snat_flow(struct hmap *lflows, struct ovn_datapath *od, ++ const struct nbrec_nat *nat, struct ds *match, ++ struct ds *actions, bool distributed, ++ struct eth_addr mac, ovs_be32 mask, ++ int cidr_bits, bool is_v6) + { - if (od->nbr) { - /* L3 admission control: drop multicast and broadcast source, localhost - * source or destination, and zero network source or destination @@ -11986,10 +16629,12 @@ index 5a3227568..c81e3220c 100644 - "ip4.src == 0.0.0.0/8 || " - "ip4.dst == 0.0.0.0/8", - "drop;"); -+ if (allowed_ext_ips || exempted_ext_ips) { -+ lrouter_nat_add_ext_ip_match(od, lflows, match, nat, -+ is_v6, false, mask); -+ } ++ /* Egress SNAT table: Packets enter the egress pipeline with ++ * source ip address that needs to be SNATted to a external ip ++ * address. */ ++ if (strcmp(nat->type, "snat") && strcmp(nat->type, "dnat_and_snat")) { ++ return; ++ } - /* Drop ARP packets (priority 85). ARP request packets for router's own - * IPs are handled with priority-90 flows. @@ -11998,135 +16643,117 @@ index 5a3227568..c81e3220c 100644 - */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 85, - "arp || nd", "drop;"); -+ if (distributed) { -+ ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ", -+ ETH_ADDR_ARGS(mac)); -+ } ++ bool stateless = lrouter_nat_is_stateless(nat); ++ if (!od->l3dgw_port) { ++ /* Gateway router. */ ++ ds_clear(match); ++ ds_put_format(match, "ip && ip%s.src == %s", ++ is_v6 ? "6" : "4", nat->logical_ip); ++ ds_clear(actions); - /* Allow IPv6 multicast traffic that's supposed to reach the - * router pipeline (e.g., router solicitations). - */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 84, "nd_rs || nd_ra", - "next;"); -+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { -+ ds_put_format(actions, "ip%s.src=%s; next;", -+ is_v6 ? "6" : "4", nat->external_ip); -+ } else { -+ ds_put_format(actions, "ct_snat(%s", -+ nat->external_ip); -+ if (nat->external_port_range[0]) { -+ ds_put_format(actions, ",%s", -+ nat->external_port_range); -+ } -+ ds_put_format(actions, ");"); -+ } ++ if (nat->allowed_ext_ips || nat->exempted_ext_ips) { ++ lrouter_nat_add_ext_ip_match(od, lflows, match, nat, ++ is_v6, false, mask); ++ } - /* Drop other reserved multicast. */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 83, - "ip6.mcast_rsvd", "drop;"); -+ /* The priority here is calculated such that the -+ * nat->logical_ip with the longest mask gets a higher -+ * priority. */ -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT, -+ priority, ds_cstr(match), -+ ds_cstr(actions), -+ &nat->header_); -+ } ++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { ++ ds_put_format(actions, "ip%s.src=%s; next;", ++ is_v6 ? "6" : "4", nat->external_ip); ++ } else { ++ ds_put_format(actions, "ct_snat(%s", nat->external_ip); ++ ++ if (nat->external_port_range[0]) { ++ ds_put_format(actions, ",%s", ++ nat->external_port_range); + } ++ ds_put_format(actions, ");"); ++ } ++ ++ /* The priority here is calculated such that the ++ * nat->logical_ip with the longest mask gets a higher ++ * priority. */ ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT, ++ cidr_bits + 1, ds_cstr(match), ++ ds_cstr(actions), &nat->header_); ++ } else { ++ uint16_t priority = cidr_bits + 1; ++ ++ /* Distributed router. */ ++ ds_clear(match); ++ ds_put_format(match, "ip && ip%s.src == %s && outport == %s", ++ is_v6 ? "6" : "4", nat->logical_ip, ++ od->l3dgw_port->json_key); ++ if (!distributed && od->l3redirect_port) { ++ /* Flows for NAT rules that are centralized are only ++ * programmed on the gateway chassis. */ ++ priority += 128; ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ od->l3redirect_port->json_key); ++ } ++ ds_clear(actions); - /* Allow other multicast if relay enabled (priority 82). */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 82, - "ip4.mcast || ip6.mcast", - od->mcast_info.rtr.relay ? "next;" : "drop;"); -+ /* Logical router ingress table 0: -+ * For NAT on a distributed router, add rules allowing -+ * ingress traffic with eth.dst matching nat->external_mac -+ * on the l3dgw_port instance where nat->logical_port is -+ * resident. */ -+ if (distributed) { -+ /* Store the ethernet address of the port receiving the packet. -+ * This will save us from having to match on inport further -+ * down in the pipeline. -+ */ -+ ds_clear(actions); -+ ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;", -+ od->l3dgw_port->lrp_networks.ea_s); ++ if (nat->allowed_ext_ips || nat->exempted_ext_ips) { ++ lrouter_nat_add_ext_ip_match(od, lflows, match, nat, ++ is_v6, false, mask); ++ } - /* Drop Ethernet local broadcast. By definition this traffic should - * not be forwarded.*/ - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50, - "eth.bcast", "drop;"); -+ ds_clear(match); -+ ds_put_format(match, -+ "eth.dst == "ETH_ADDR_FMT" && inport == %s" -+ " && is_chassis_resident(\"%s\")", -+ ETH_ADDR_ARGS(mac), -+ od->l3dgw_port->json_key, -+ nat->logical_port); -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ADMISSION, 50, -+ ds_cstr(match), ds_cstr(actions), -+ &nat->header_); -+ } ++ if (distributed) { ++ ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ", ++ ETH_ADDR_ARGS(mac)); ++ } - /* TTL discard */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, - "ip4 && ip.ttl == {0, 1}", "drop;"); -+ /* Ingress Gateway Redirect Table: For NAT on a distributed -+ * router, add flows that are specific to a NAT rule. These -+ * flows indicate the presence of an applicable NAT rule that -+ * can be applied in a distributed manner. -+ * In particulr REG_SRC_IPV4/REG_SRC_IPV6 and eth.src are set to -+ * NAT external IP and NAT external mac so the ARP request -+ * generated in the following stage is sent out with proper IP/MAC -+ * src addresses. -+ */ -+ if (distributed) { -+ ds_clear(match); -+ ds_clear(actions); -+ ds_put_format(match, -+ "ip%s.src == %s && outport == %s && " -+ "is_chassis_resident(\"%s\")", -+ is_v6 ? "6" : "4", nat->logical_ip, -+ od->l3dgw_port->json_key, nat->logical_port); -+ ds_put_format(actions, "eth.src = %s; %s = %s; next;", -+ nat->external_mac, -+ is_v6 ? REG_SRC_IPV6 : REG_SRC_IPV4, -+ nat->external_ip); -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, -+ 100, ds_cstr(match), -+ ds_cstr(actions), &nat->header_); ++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { ++ ds_put_format(actions, "ip%s.src=%s; next;", ++ is_v6 ? "6" : "4", nat->external_ip); ++ } else { ++ ds_put_format(actions, "ct_snat(%s", ++ nat->external_ip); ++ if (nat->external_port_range[0]) { ++ ds_put_format(actions, ",%s", nat->external_port_range); + } ++ ds_put_format(actions, ");"); ++ } - /* Pass other traffic not already handled to the next table for - * routing. */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;"); -- } --} -+ /* Egress Loopback table: For NAT on a distributed router. -+ * If packets in the egress pipeline on the distributed -+ * gateway port have ip.dst matching a NAT external IP, then -+ * loop a clone of the packet back to the beginning of the -+ * ingress pipeline with inport = outport. */ -+ if (od->l3dgw_port) { -+ /* Distributed router. */ -+ ds_clear(match); -+ ds_put_format(match, "ip%s.dst == %s && outport == %s", -+ is_v6 ? "6" : "4", -+ nat->external_ip, -+ od->l3dgw_port->json_key); -+ if (!distributed) { -+ ds_put_format(match, " && is_chassis_resident(%s)", -+ od->l3redirect_port->json_key); -+ } else { -+ ds_put_format(match, " && is_chassis_resident(\"%s\")", -+ nat->logical_port); -+ } ++ /* The priority here is calculated such that the ++ * nat->logical_ip with the longest mask gets a higher ++ * priority. */ ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT, ++ priority, ds_cstr(match), ++ ds_cstr(actions), &nat->header_); + } + } --static void + static void -build_dhcpv6_reply_flows_for_lrouter_port( - struct ovn_port *op, struct hmap *lflows, - struct ds *match) --{ ++build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od, ++ const struct nbrec_nat *nat, struct ds *match, ++ struct ds *actions, struct eth_addr mac, ++ bool distributed, bool is_v6) + { - if (op->nbrp && (!op->derived)) { - for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { - ds_clear(match); @@ -12136,55 +16763,53 @@ index 5a3227568..c81e3220c 100644 - ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, - ds_cstr(match), - "reg0 = 0; handle_dhcpv6_reply;"); -+ ds_clear(actions); -+ ds_put_format(actions, -+ "clone { ct_clear; " -+ "inport = outport; outport = \"\"; " -+ "flags = 0; flags.loopback = 1; "); -+ for (int j = 0; j < MFF_N_LOG_REGS; j++) { -+ ds_put_format(actions, "reg%d = 0; ", j); -+ } -+ ds_put_format(actions, REGBIT_EGRESS_LOOPBACK" = 1; " -+ "next(pipeline=ingress, table=%d); };", -+ ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100, -+ ds_cstr(match), ds_cstr(actions), -+ &nat->header_); -+ } - } -- } +- } ++ if (od->l3dgw_port && !strcmp(nat->type, "snat")) { ++ ds_clear(match); ++ ds_put_format( ++ match, "inport == %s && %s == %s", ++ od->l3dgw_port->json_key, ++ is_v6 ? "ip6.src" : "ip4.src", nat->external_ip); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT, ++ 120, ds_cstr(match), "next;", ++ &nat->header_); + } ++ /* Logical router ingress table 0: ++ * For NAT on a distributed router, add rules allowing ++ * ingress traffic with eth.dst matching nat->external_mac ++ * on the l3dgw_port instance where nat->logical_port is ++ * resident. */ ++ if (distributed) { ++ /* Store the ethernet address of the port receiving the packet. ++ * This will save us from having to match on inport further ++ * down in the pipeline. ++ */ ++ ds_clear(actions); ++ ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;", ++ od->l3dgw_port->lrp_networks.ea_s); --} -+ /* Handle force SNAT options set in the gateway router. */ -+ if (!od->l3dgw_port) { -+ if (dnat_force_snat_ip) { -+ if (od->dnat_force_snat_addrs.n_ipv4_addrs) { -+ build_lrouter_force_snat_flows(lflows, od, "4", -+ od->dnat_force_snat_addrs.ipv4_addrs[0].addr_s, -+ "dnat"); -+ } -+ if (od->dnat_force_snat_addrs.n_ipv6_addrs) { -+ build_lrouter_force_snat_flows(lflows, od, "6", -+ od->dnat_force_snat_addrs.ipv6_addrs[0].addr_s, -+ "dnat"); -+ } -+ } -+ if (lb_force_snat_ip) { -+ if (od->lb_force_snat_addrs.n_ipv4_addrs) { -+ build_lrouter_force_snat_flows(lflows, od, "4", -+ od->lb_force_snat_addrs.ipv4_addrs[0].addr_s, "lb"); -+ } -+ if (od->lb_force_snat_addrs.n_ipv6_addrs) { -+ build_lrouter_force_snat_flows(lflows, od, "6", -+ od->lb_force_snat_addrs.ipv6_addrs[0].addr_s, "lb"); -+ } -+ } ++ ds_clear(match); ++ ds_put_format(match, ++ "eth.dst == "ETH_ADDR_FMT" && inport == %s" ++ " && is_chassis_resident(\"%s\")", ++ ETH_ADDR_ARGS(mac), ++ od->l3dgw_port->json_key, ++ nat->logical_port); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ADMISSION, 50, ++ ds_cstr(match), ds_cstr(actions), ++ &nat->header_); ++ } + } -static void -build_ipv6_input_flows_for_lrouter_port( - struct ovn_port *op, struct hmap *lflows, - struct ds *match, struct ds *actions) --{ ++static int ++lrouter_check_nat_entry(struct ovn_datapath *od, const struct nbrec_nat *nat, ++ ovs_be32 *mask, bool *is_v6, int *cidr_bits, ++ struct eth_addr *mac, bool *distributed) + { - if (op->nbrp && (!op->derived)) { - /* No ingress packets are accepted on a chassisredirect - * port, so no need to program flows for that port. */ @@ -12195,18 +16820,8 @@ index 5a3227568..c81e3220c 100644 - ds_put_cstr(match, "ip6.dst == "); - op_put_v6_networks(match, op); - ds_put_cstr(match, " && icmp6.type == 128 && icmp6.code == 0"); -+ /* For gateway router, re-circulate every packet through -+ * the DNAT zone. This helps with the following. -+ * -+ * Any packet that needs to be unDNATed in the reverse -+ * direction gets unDNATed. Ideally this could be done in -+ * the egress pipeline. But since the gateway router -+ * does not have any feature that depends on the source -+ * ip address being external IP address for IP routing, -+ * we can do it here, saving a future re-circulation. */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50, -+ "ip", "flags.loopback = 1; ct_dnat;"); -+ } ++ struct in6_addr ipv6, mask_v6, v6_exact = IN6ADDR_EXACT_INIT; ++ ovs_be32 ip; - const char *lrp_actions = - "ip6.dst <-> ip6.src; " @@ -12217,12 +16832,34 @@ index 5a3227568..c81e3220c 100644 - ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, - ds_cstr(match), lrp_actions, - &op->nbrp->header_); -+ /* Load balancing and packet defrag are only valid on -+ * Gateway routers or router with gateway port. */ -+ if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { -+ sset_destroy(&nat_entries); -+ return; ++ if (nat->allowed_ext_ips && nat->exempted_ext_ips) { ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); ++ VLOG_WARN_RL(&rl, "NAT rule: "UUID_FMT" not applied, since " ++ "both allowed and exempt external ips set", ++ UUID_ARGS(&(nat->header_.uuid))); ++ return -EINVAL; ++ } ++ ++ char *error = ip_parse_masked(nat->external_ip, &ip, mask); ++ *is_v6 = false; ++ ++ if (error || *mask != OVS_BE32_MAX) { ++ free(error); ++ error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6); ++ if (error || memcmp(&mask_v6, &v6_exact, sizeof(mask_v6))) { ++ /* Invalid for both IPv4 and IPv6 */ ++ static struct vlog_rate_limit rl = ++ VLOG_RATE_LIMIT_INIT(5, 1); ++ VLOG_WARN_RL(&rl, "bad external ip %s for nat", ++ nat->external_ip); ++ free(error); ++ return -EINVAL; } ++ /* It was an invalid IPv4 address, but valid IPv6. ++ * Treat the rest of the handling of this NAT rule ++ * as IPv6. */ ++ *is_v6 = true; ++ } - /* ND reply. These flows reply to ND solicitations for the - * router's own IP address. */ @@ -12238,20 +16875,60 @@ index 5a3227568..c81e3220c 100644 - ds_put_format(match, "is_chassis_resident(%s)", - op->od->l3redirect_port->json_key); - } -+ /* A set to hold all ips that need defragmentation and tracking. */ -+ struct sset all_ips = SSET_INITIALIZER(&all_ips); ++ /* Check the validity of nat->logical_ip. 'logical_ip' can ++ * be a subnet when the type is "snat". */ ++ if (*is_v6) { ++ error = ipv6_parse_masked(nat->logical_ip, &ipv6, &mask_v6); ++ *cidr_bits = ipv6_count_cidr_bits(&mask_v6); ++ } else { ++ error = ip_parse_masked(nat->logical_ip, &ip, mask); ++ *cidr_bits = ip_count_cidr_bits(*mask); ++ } ++ if (!strcmp(nat->type, "snat")) { ++ if (error) { ++ /* Invalid for both IPv4 and IPv6 */ ++ static struct vlog_rate_limit rl = ++ VLOG_RATE_LIMIT_INIT(5, 1); ++ VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat " ++ "in router "UUID_FMT"", ++ nat->logical_ip, UUID_ARGS(&od->key)); ++ free(error); ++ return -EINVAL; ++ } ++ } else { ++ if (error || (*is_v6 == false && *mask != OVS_BE32_MAX) ++ || (*is_v6 && memcmp(&mask_v6, &v6_exact, ++ sizeof mask_v6))) { ++ /* Invalid for both IPv4 and IPv6 */ ++ static struct vlog_rate_limit rl = ++ VLOG_RATE_LIMIT_INIT(5, 1); ++ VLOG_WARN_RL(&rl, "bad ip %s for dnat in router " ++ ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key)); ++ free(error); ++ return -EINVAL; ++ } ++ } - build_lrouter_nd_flow(op->od, op, "nd_na_router", - op->lrp_networks.ipv6_addrs[i].addr_s, - op->lrp_networks.ipv6_addrs[i].sn_addr_s, - REG_INPORT_ETH_ADDR, match, false, 90, - &op->nbrp->header_, lflows); -- } -+ for (int i = 0; i < od->nbr->n_load_balancer; i++) { -+ struct nbrec_load_balancer *nb_lb = od->nbr->load_balancer[i]; -+ struct ovn_northd_lb *lb = -+ ovn_northd_lb_find(lbs, &nb_lb->header_.uuid); -+ ovs_assert(lb); ++ /* For distributed router NAT, determine whether this NAT rule ++ * satisfies the conditions for distributed NAT processing. */ ++ *distributed = false; ++ if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") && ++ nat->logical_port && nat->external_mac) { ++ if (eth_addr_from_string(nat->external_mac, mac)) { ++ *distributed = true; ++ } else { ++ static struct vlog_rate_limit rl = ++ VLOG_RATE_LIMIT_INIT(5, 1); ++ VLOG_WARN_RL(&rl, "bad mac %s for dnat in router " ++ ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key)); ++ return -EINVAL; + } ++ } - /* UDP/TCP port unreachable */ - if (!smap_get(&op->od->nbr->options, "chassis") @@ -12268,12 +16945,8 @@ index 5a3227568..c81e3220c 100644 - ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, - 80, ds_cstr(match), action, - &op->nbrp->header_); -+ for (size_t j = 0; j < lb->n_vips; j++) { -+ struct ovn_lb_vip *lb_vip = &lb->vips[j]; -+ struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[j]; -+ ds_clear(actions); -+ build_lb_vip_actions(lb_vip, lb_vip_nb, actions, -+ lb->selection_fields, false); ++ return 0; ++} - ds_clear(match); - ds_put_format(match, @@ -12289,35 +16962,19 @@ index 5a3227568..c81e3220c 100644 - ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, - 80, ds_cstr(match), action, - &op->nbrp->header_); -+ if (!sset_contains(&all_ips, lb_vip->vip_str)) { -+ sset_add(&all_ips, lb_vip->vip_str); -+ /* If there are any load balancing rules, we should send -+ * the packet to conntrack for defragmentation and -+ * tracking. This helps with two things. -+ * -+ * 1. With tracking, we can send only new connections to -+ * pick a DNAT ip address from a group. -+ * 2. If there are L4 ports in load balancing rules, we -+ * need the defragmentation to match on L4 ports. */ -+ ds_clear(match); -+ if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { -+ ds_put_format(match, "ip && ip4.dst == %s", -+ lb_vip->vip_str); -+ } else { -+ ds_put_format(match, "ip && ip6.dst == %s", -+ lb_vip->vip_str); -+ } -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, -+ 100, ds_cstr(match), "ct_next;", -+ &nb_lb->header_); -+ } ++/* NAT, Defrag and load balancing. */ ++static void ++build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, ++ struct hmap *lflows, ++ struct shash *meter_groups, ++ struct hmap *lbs, ++ struct ds *match, struct ds *actions) ++{ ++ if (!od->nbr) { ++ return; ++ } -+ /* Higher priority rules are added for load-balancing in DNAT -+ * table. For every match (on a VIP[:port]), we add two flows -+ * via add_router_lb_flow(). One flow is for specific matching -+ * on ct.new with an action of "ct_lb($targets);". The other -+ * flow is for ct.est with an action of "ct_dnat;". */ - ds_clear(match); +- ds_clear(match); - ds_put_format(match, - "ip6 && ip6.dst == %s && !ip.later_frag", - op->lrp_networks.ipv6_addrs[i].addr_s); @@ -12332,14 +16989,45 @@ index 5a3227568..c81e3220c 100644 - 70, ds_cstr(match), action, - &op->nbrp->header_); - } -- } -+ if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { -+ ds_put_format(match, "ip && ip4.dst == %s", -+ lb_vip->vip_str); -+ } else { -+ ds_put_format(match, "ip && ip6.dst == %s", -+ lb_vip->vip_str); -+ } ++ /* Packets are allowed by default. */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;"); ++ ++ /* Send the IPv6 NS packets to next table. When ovn-controller ++ * generates IPv6 NS (for the action - nd_ns{}), the injected ++ * packet would go through conntrack - which is not required. */ ++ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, "nd_ns", "next;"); ++ ++ /* NAT rules are only valid on Gateway routers and routers with ++ * l3dgw_port (router has a port with gateway chassis ++ * specified). */ ++ if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { ++ return; ++ } ++ ++ struct sset nat_entries = SSET_INITIALIZER(&nat_entries); ++ ++ bool dnat_force_snat_ip = ++ !lport_addresses_is_empty(&od->dnat_force_snat_addrs); ++ bool lb_force_snat_ip = ++ !lport_addresses_is_empty(&od->lb_force_snat_addrs); ++ ++ for (int i = 0; i < od->nbr->n_nat; i++) { ++ const struct nbrec_nat *nat = nat = od->nbr->nat[i]; ++ struct eth_addr mac = eth_addr_broadcast; ++ bool is_v6, distributed; ++ ovs_be32 mask; ++ int cidr_bits; ++ ++ if (lrouter_check_nat_entry(od, nat, &mask, &is_v6, &cidr_bits, ++ &mac, &distributed) < 0) { ++ continue; + } - /* ICMPv6 time exceeded */ - for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { @@ -12347,28 +17035,107 @@ index 5a3227568..c81e3220c 100644 - if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) { - continue; - } -+ int prio = 110; -+ bool is_udp = nullable_string_is_equal(nb_lb->protocol, "udp"); -+ bool is_sctp = nullable_string_is_equal(nb_lb->protocol, -+ "sctp"); -+ const char *proto = is_udp ? "udp" : is_sctp ? "sctp" : "tcp"; - -- ds_clear(match); -- ds_clear(actions); -+ if (lb_vip->vip_port) { -+ ds_put_format(match, " && %s && %s.dst == %d", proto, -+ proto, lb_vip->vip_port); -+ prio = 120; -+ } - -- ds_put_format(match, ++ /* S_ROUTER_IN_UNSNAT */ ++ build_lrouter_in_unsnat_flow(lflows, od, nat, match, actions, distributed, ++ is_v6); ++ /* S_ROUTER_IN_DNAT */ ++ build_lrouter_in_dnat_flow(lflows, od, nat, match, actions, distributed, ++ mask, is_v6); + ++ /* ARP resolve for NAT IPs. */ ++ if (od->l3dgw_port) { ++ if (!sset_contains(&nat_entries, nat->external_ip)) { ++ ds_clear(match); ++ ds_put_format( ++ match, "outport == %s && %s == %s", ++ od->l3dgw_port->json_key, ++ is_v6 ? REG_NEXT_HOP_IPV6 : REG_NEXT_HOP_IPV4, ++ nat->external_ip); ++ ds_clear(actions); ++ ds_put_format( ++ actions, "eth.dst = %s; next;", ++ distributed ? nat->external_mac : ++ od->l3dgw_port->lrp_networks.ea_s); ++ ovn_lflow_add_with_hint(lflows, od, ++ S_ROUTER_IN_ARP_RESOLVE, ++ 100, ds_cstr(match), ++ ds_cstr(actions), ++ &nat->header_); ++ sset_add(&nat_entries, nat->external_ip); ++ } ++ } else { ++ /* Add the NAT external_ip to the nat_entries even for ++ * gateway routers. This is required for adding load balancer ++ * flows.*/ ++ sset_add(&nat_entries, nat->external_ip); ++ } ++ ++ /* S_ROUTER_OUT_UNDNAT */ ++ build_lrouter_out_undnat_flow(lflows, od, nat, match, actions, distributed, ++ mac, is_v6); ++ /* S_ROUTER_OUT_SNAT */ ++ build_lrouter_out_snat_flow(lflows, od, nat, match, actions, distributed, ++ mac, mask, cidr_bits, is_v6); ++ ++ /* S_ROUTER_IN_ADMISSION - S_ROUTER_IN_IP_INPUT */ ++ build_lrouter_ingress_flow(lflows, od, nat, match, actions, ++ mac, distributed, is_v6); ++ ++ /* Ingress Gateway Redirect Table: For NAT on a distributed ++ * router, add flows that are specific to a NAT rule. These ++ * flows indicate the presence of an applicable NAT rule that ++ * can be applied in a distributed manner. ++ * In particulr REG_SRC_IPV4/REG_SRC_IPV6 and eth.src are set to ++ * NAT external IP and NAT external mac so the ARP request ++ * generated in the following stage is sent out with proper IP/MAC ++ * src addresses. ++ */ ++ if (distributed) { + ds_clear(match); + ds_clear(actions); +- + ds_put_format(match, - "inport == %s && ip6 && " - "ip6.src == %s/%d && " - "ip.ttl == {0, 1} && !ip.later_frag", - op->json_key, - op->lrp_networks.ipv6_addrs[i].network_s, - op->lrp_networks.ipv6_addrs[i].plen); -- ds_put_format(actions, ++ "ip%s.src == %s && outport == %s && " ++ "is_chassis_resident(\"%s\")", ++ is_v6 ? "6" : "4", nat->logical_ip, ++ od->l3dgw_port->json_key, nat->logical_port); ++ ds_put_format(actions, "eth.src = %s; %s = %s; next;", ++ nat->external_mac, ++ is_v6 ? REG_SRC_IPV6 : REG_SRC_IPV4, ++ nat->external_ip); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, ++ 100, ds_cstr(match), ++ ds_cstr(actions), &nat->header_); ++ } ++ ++ /* Egress Loopback table: For NAT on a distributed router. ++ * If packets in the egress pipeline on the distributed ++ * gateway port have ip.dst matching a NAT external IP, then ++ * loop a clone of the packet back to the beginning of the ++ * ingress pipeline with inport = outport. */ ++ if (od->l3dgw_port) { ++ /* Distributed router. */ ++ ds_clear(match); ++ ds_put_format(match, "ip%s.dst == %s && outport == %s", ++ is_v6 ? "6" : "4", ++ nat->external_ip, ++ od->l3dgw_port->json_key); ++ if (!distributed) { ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ od->l3redirect_port->json_key); ++ } else { ++ ds_put_format(match, " && is_chassis_resident(\"%s\")", ++ nat->logical_port); ++ } ++ ++ ds_clear(actions); + ds_put_format(actions, - "icmp6 {" - "eth.dst <-> eth.src; " - "ip6.dst = ip6.src; " @@ -12379,24 +17146,71 @@ index 5a3227568..c81e3220c 100644 - "next; };", - op->lrp_networks.ipv6_addrs[i].addr_s); - ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40, -- ds_cstr(match), ds_cstr(actions), ++ "clone { ct_clear; " ++ "inport = outport; outport = \"\"; " ++ "flags = 0; flags.loopback = 1; "); ++ for (int j = 0; j < MFF_N_LOG_REGS; j++) { ++ ds_put_format(actions, "reg%d = 0; ", j); ++ } ++ ds_put_format(actions, REGBIT_EGRESS_LOOPBACK" = 1; " ++ "next(pipeline=ingress, table=%d); };", ++ ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100, + ds_cstr(match), ds_cstr(actions), - &op->nbrp->header_); -+ if (od->l3redirect_port && -+ (lb_vip->n_backends || !lb_vip->empty_backend_rej)) { -+ ds_put_format(match, " && is_chassis_resident(%s)", -+ od->l3redirect_port->json_key); -+ } -+ bool force_snat_for_lb = -+ lb_force_snat_ip || od->lb_force_snat_router_ip; -+ add_router_lb_flow(lflows, od, match, actions, prio, -+ force_snat_for_lb, lb_vip, proto, -+ nb_lb, meter_groups, &nat_entries); ++ &nat->header_); ++ } ++ } ++ ++ /* Handle force SNAT options set in the gateway router. */ ++ if (!od->l3dgw_port) { ++ if (dnat_force_snat_ip) { ++ if (od->dnat_force_snat_addrs.n_ipv4_addrs) { ++ build_lrouter_force_snat_flows(lflows, od, "4", ++ od->dnat_force_snat_addrs.ipv4_addrs[0].addr_s, ++ "dnat"); ++ } ++ if (od->dnat_force_snat_addrs.n_ipv6_addrs) { ++ build_lrouter_force_snat_flows(lflows, od, "6", ++ od->dnat_force_snat_addrs.ipv6_addrs[0].addr_s, ++ "dnat"); ++ } ++ } ++ if (lb_force_snat_ip) { ++ if (od->lb_force_snat_addrs.n_ipv4_addrs) { ++ build_lrouter_force_snat_flows(lflows, od, "4", ++ od->lb_force_snat_addrs.ipv4_addrs[0].addr_s, "lb"); ++ } ++ if (od->lb_force_snat_addrs.n_ipv6_addrs) { ++ build_lrouter_force_snat_flows(lflows, od, "6", ++ od->lb_force_snat_addrs.ipv6_addrs[0].addr_s, "lb"); + } } -+ sset_destroy(&all_ips); ++ ++ /* For gateway router, re-circulate every packet through ++ * the DNAT zone. This helps with the following. ++ * ++ * Any packet that needs to be unDNATed in the reverse ++ * direction gets unDNATed. Ideally this could be done in ++ * the egress pipeline. But since the gateway router ++ * does not have any feature that depends on the source ++ * ip address being external IP address for IP routing, ++ * we can do it here, saving a future re-circulation. */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50, ++ "ip", "flags.loopback = 1; ct_dnat;"); ++ } ++ ++ /* Load balancing and packet defrag are only valid on ++ * Gateway routers or router with gateway port. */ ++ if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { + sset_destroy(&nat_entries); ++ return; } -- + ++ build_lrouter_lb_flows(lflows, od, lbs, meter_groups, &nat_entries, ++ match, actions); ++ ++ sset_destroy(&nat_entries); } + @@ -12404,7 +17218,7 @@ index 5a3227568..c81e3220c 100644 struct lswitch_flow_build_info { struct hmap *datapaths; struct hmap *ports; -@@ -11177,7 +11918,8 @@ struct lswitch_flow_build_info { +@@ -11177,7 +12002,8 @@ struct lswitch_flow_build_info { static void build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od, @@ -12414,7 +17228,7 @@ index 5a3227568..c81e3220c 100644 { /* Build Logical Switch Flows. */ build_lswitch_lflows_pre_acl_and_acl(od, lsi->port_groups, lsi->lflows, -@@ -11186,13 +11928,20 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od, +@@ -11186,13 +12012,20 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od, build_fwd_group_lflows(od, lsi->lflows); build_lswitch_lflows_admission_control(od, lsi->lflows); build_lswitch_input_port_sec_od(od, lsi->lflows); @@ -12436,7 +17250,7 @@ index 5a3227568..c81e3220c 100644 build_mcast_lookup_flows_for_lrouter(od, lsi->lflows, &lsi->match, &lsi->actions); build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->ports); -@@ -11204,6 +11953,9 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od, +@@ -11204,6 +12037,9 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od, build_arp_request_flows_for_lrouter(od, lsi->lflows, &lsi->match, &lsi->actions); build_misc_local_traffic_drop_flows_for_lrouter(od, lsi->lflows); @@ -12446,7 +17260,7 @@ index 5a3227568..c81e3220c 100644 } /* Helper function to combine all lflow generation which is iterated by port. -@@ -11216,6 +11968,20 @@ build_lswitch_and_lrouter_iterate_by_op(struct ovn_port *op, +@@ -11216,6 +12052,20 @@ build_lswitch_and_lrouter_iterate_by_op(struct ovn_port *op, /* Build Logical Switch Flows. */ build_lswitch_input_port_sec_op(op, lsi->lflows, &lsi->actions, &lsi->match); @@ -12467,7 +17281,7 @@ index 5a3227568..c81e3220c 100644 /* Build Logical Router Flows. */ build_adm_ctrl_flows_for_lrouter_port(op, lsi->lflows, &lsi->match, -@@ -11232,6 +11998,10 @@ build_lswitch_and_lrouter_iterate_by_op(struct ovn_port *op, +@@ -11232,6 +12082,10 @@ build_lswitch_and_lrouter_iterate_by_op(struct ovn_port *op, build_dhcpv6_reply_flows_for_lrouter_port(op, lsi->lflows, &lsi->match); build_ipv6_input_flows_for_lrouter_port(op, lsi->lflows, &lsi->match, &lsi->actions); @@ -12478,7 +17292,7 @@ index 5a3227568..c81e3220c 100644 } static void -@@ -11239,10 +12009,13 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports, +@@ -11239,10 +12093,13 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports, struct hmap *port_groups, struct hmap *lflows, struct hmap *mcgroups, struct hmap *igmp_groups, @@ -12493,7 +17307,7 @@ index 5a3227568..c81e3220c 100644 char *svc_check_match = xasprintf("eth.dst == %s", svc_monitor_mac); -@@ -11264,22 +12037,28 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports, +@@ -11264,22 +12121,28 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports, * will move here and will be reogranized by iterator type. */ HMAP_FOR_EACH (od, key_node, datapaths) { @@ -12529,7 +17343,7 @@ index 5a3227568..c81e3220c 100644 } struct ovn_dp_group { -@@ -11356,13 +12135,14 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, +@@ -11356,13 +12219,14 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, struct hmap *ports, struct hmap *port_groups, struct hmap *mcgroups, struct hmap *igmp_groups, struct shash *meter_groups, @@ -12546,7 +17360,7 @@ index 5a3227568..c81e3220c 100644 /* Collecting all unique datapath groups. */ struct hmap dp_groups = HMAP_INITIALIZER(&dp_groups); -@@ -11801,17 +12581,20 @@ static void +@@ -11801,17 +12665,20 @@ static void sync_meters_iterate_nb_meter(struct northd_context *ctx, const char *meter_name, const struct nbrec_meter *nb_meter, @@ -12570,7 +17384,7 @@ index 5a3227568..c81e3220c 100644 if (new_sb_meter || bands_need_update(nb_meter, sb_meter)) { struct sbrec_meter_band **sb_bands; -@@ -11833,6 +12616,24 @@ sync_meters_iterate_nb_meter(struct northd_context *ctx, +@@ -11833,6 +12700,24 @@ sync_meters_iterate_nb_meter(struct northd_context *ctx, sbrec_meter_set_unit(sb_meter, nb_meter->unit); } @@ -12595,7 +17409,7 @@ index 5a3227568..c81e3220c 100644 /* Each entry in the Meter and Meter_Band tables in OVN_Northbound have * a corresponding entries in the Meter and Meter_Band tables in * OVN_Southbound. Additionally, ACL logs that use fair meters have -@@ -11840,9 +12641,10 @@ sync_meters_iterate_nb_meter(struct northd_context *ctx, +@@ -11840,9 +12725,10 @@ sync_meters_iterate_nb_meter(struct northd_context *ctx, */ static void sync_meters(struct northd_context *ctx, struct hmap *datapaths, @@ -12607,7 +17421,7 @@ index 5a3227568..c81e3220c 100644 const struct sbrec_meter *sb_meter; SBREC_METER_FOR_EACH (sb_meter, ctx->ovnsb_idl) { -@@ -11852,7 +12654,7 @@ sync_meters(struct northd_context *ctx, struct hmap *datapaths, +@@ -11852,7 +12738,7 @@ sync_meters(struct northd_context *ctx, struct hmap *datapaths, const struct nbrec_meter *nb_meter; NBREC_METER_FOR_EACH (nb_meter, ctx->ovnnb_idl) { sync_meters_iterate_nb_meter(ctx, nb_meter->name, nb_meter, @@ -12616,7 +17430,7 @@ index 5a3227568..c81e3220c 100644 } /* -@@ -11866,19 +12668,28 @@ sync_meters(struct northd_context *ctx, struct hmap *datapaths, +@@ -11866,19 +12752,28 @@ sync_meters(struct northd_context *ctx, struct hmap *datapaths, continue; } for (size_t i = 0; i < od->nbs->n_acls; i++) { @@ -12654,7 +17468,7 @@ index 5a3227568..c81e3220c 100644 struct shash_node *node, *next; SHASH_FOR_EACH_SAFE (node, next, &sb_meters) { sbrec_meter_delete(node->data); -@@ -12274,6 +13085,7 @@ ovnnb_db_run(struct northd_context *ctx, +@@ -12274,6 +13169,7 @@ ovnnb_db_run(struct northd_context *ctx, struct hmap igmp_groups; struct shash meter_groups = SHASH_INITIALIZER(&meter_groups); struct hmap lbs; @@ -12662,15 +17476,18 @@ index 5a3227568..c81e3220c 100644 /* Sync ipsec configuration. * Copy nb_cfg from northbound to southbound database. -@@ -12354,6 +13166,7 @@ ovnnb_db_run(struct northd_context *ctx, +@@ -12354,6 +13250,10 @@ ovnnb_db_run(struct northd_context *ctx, use_logical_dp_groups = smap_get_bool(&nb->options, "use_logical_dp_groups", false); ++ use_ct_inv_match = smap_get_bool(&nb->options, ++ "use_ct_inv_match", true); ++ + /* deprecated, use --event instead */ controller_event_en = smap_get_bool(&nb->options, "controller_event", false); check_lsp_is_up = !smap_get_bool(&nb->options, -@@ -12368,14 +13181,16 @@ ovnnb_db_run(struct northd_context *ctx, +@@ -12368,14 +13268,16 @@ ovnnb_db_run(struct northd_context *ctx, build_ip_mcast(ctx, datapaths); build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups); build_meter_groups(ctx, &meter_groups); @@ -12689,7 +17506,7 @@ index 5a3227568..c81e3220c 100644 struct ovn_northd_lb *lb; HMAP_FOR_EACH_POP (lb, hmap_node, &lbs) { -@@ -12393,9 +13208,13 @@ ovnnb_db_run(struct northd_context *ctx, +@@ -12393,9 +13295,13 @@ ovnnb_db_run(struct northd_context *ctx, HMAP_FOR_EACH_SAFE (pg, next_pg, key_node, &port_groups) { ovn_port_group_destroy(&port_groups, pg); } @@ -12703,7 +17520,7 @@ index 5a3227568..c81e3220c 100644 struct shash_node *node, *next; SHASH_FOR_EACH_SAFE (node, next, &meter_groups) { -@@ -12542,7 +13361,17 @@ handle_port_binding_changes(struct northd_context *ctx, struct hmap *ports, +@@ -12542,7 +13448,17 @@ handle_port_binding_changes(struct northd_context *ctx, struct hmap *ports, continue; } @@ -12722,7 +17539,7 @@ index 5a3227568..c81e3220c 100644 if (!op->nbsp->up || *op->nbsp->up != up) { nbrec_logical_switch_port_set_up(op->nbsp, &up, 1); } -@@ -12690,7 +13519,7 @@ static const char *rbac_encap_update[] = +@@ -12690,7 +13606,7 @@ static const char *rbac_encap_update[] = static const char *rbac_port_binding_auth[] = {""}; static const char *rbac_port_binding_update[] = @@ -12731,7 +17548,7 @@ index 5a3227568..c81e3220c 100644 static const char *rbac_mac_binding_auth[] = {""}; -@@ -13176,6 +14005,8 @@ main(int argc, char *argv[]) +@@ -13176,6 +14092,8 @@ main(int argc, char *argv[]) &sbrec_port_binding_col_ha_chassis_group); ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_virtual_parent); @@ -12740,7 +17557,7 @@ index 5a3227568..c81e3220c 100644 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_gateway_chassis_col_chassis); ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_gateway_chassis_col_name); -@@ -13324,9 +14155,25 @@ main(int argc, char *argv[]) +@@ -13324,9 +14242,25 @@ main(int argc, char *argv[]) add_column_noalert(ovnsb_idl_loop.idl, &sbrec_load_balancer_col_name); add_column_noalert(ovnsb_idl_loop.idl, &sbrec_load_balancer_col_vips); add_column_noalert(ovnsb_idl_loop.idl, &sbrec_load_balancer_col_protocol); @@ -12766,7 +17583,31 @@ index 5a3227568..c81e3220c 100644 struct ovsdb_idl_index *sbrec_chassis_by_name = chassis_index_create(ovnsb_idl_loop.idl); -@@ -13449,6 +14296,7 @@ main(int argc, char *argv[]) +@@ -13350,6 +14284,15 @@ main(int argc, char *argv[]) + state.had_lock = false; + state.paused = false; + while (!exiting) { ++ memory_run(); ++ if (memory_should_report()) { ++ struct simap usage = SIMAP_INITIALIZER(&usage); ++ ++ /* Nothing special to report yet. */ ++ memory_report(&usage); ++ simap_destroy(&usage); ++ } ++ + if (!state.paused) { + if (!ovsdb_idl_has_lock(ovnsb_idl_loop.idl) && + !ovsdb_idl_is_lock_contended(ovnsb_idl_loop.idl)) +@@ -13421,6 +14364,7 @@ main(int argc, char *argv[]) + + unixctl_server_run(unixctl); + unixctl_server_wait(unixctl); ++ memory_wait(); + if (exiting) { + poll_immediate_wake(); + } +@@ -13449,6 +14393,7 @@ main(int argc, char *argv[]) } } @@ -12852,10 +17693,32 @@ index 269e3a888..29019809c 100644 "isRoot": true}} } diff --git a/ovn-nb.xml b/ovn-nb.xml -index c9ab25ceb..09b755f1a 100644 +index c9ab25ceb..5f5c2cda0 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml -@@ -1635,6 +1635,24 @@ +@@ -226,6 +226,21 @@ +

    + + ++ ++

    ++ If set to false, ovn-northd will not use the ++ ct.inv field in any of the logical flow matches. ++ The default value is true. If the NIC supports offloading ++ OVS datapath flows but doesn't support offloading ct_state ++ inv flag, then the datapath flows matching on this flag ++ (either +inv or -inv) will not be ++ offloaded. CMS should consider setting use_ct_inv_match ++ to false in such cases. This results in a side effect ++ of the invalid packets getting delivered to the destination VIF, ++ which otherwise would have been dropped by OVN. ++

    ++
    ++ + +

    + These options control how routes are advertised between OVN +@@ -1635,6 +1650,30 @@ See External IDs at the beginning of this document. @@ -12876,11 +17739,17 @@ index c9ab25ceb..09b755f1a 100644 + exactly one IPv4 and/or one IPv6 address on it, separated by a space + character. + ++ ++ ++ If the load balancing rule is configured with skip_snat ++ option, the force_snat_for_lb option configured for the router ++ pipeline will not be applied for this load balancer. ++ + -@@ -1917,16 +1935,29 @@ +@@ -1917,16 +1956,29 @@

    @@ -12920,7 +17789,7 @@ index c9ab25ceb..09b755f1a 100644

    -@@ -2634,6 +2665,13 @@ +@@ -2634,6 +2686,13 @@

    @@ -12934,7 +17803,7 @@ index c9ab25ceb..09b755f1a 100644 ovn-ic populates this key if the route is learned from the global database. In this case the value -@@ -2713,18 +2751,34 @@ +@@ -2713,18 +2772,34 @@
  • @@ -12970,7 +17839,7 @@ index c9ab25ceb..09b755f1a 100644

    Marks the packet with the value specified when the router policy -@@ -3702,4 +3756,71 @@ +@@ -3702,4 +3777,71 @@

  • @@ -13353,11 +18222,11 @@ index c13994848..258a12b4e 100644 diff --git a/ovs b/ovs new file mode 160000 -index 000000000..ac09cbfcb +index 000000000..ac85cdb38 --- /dev/null +++ b/ovs @@ -0,0 +1 @@ -+Subproject commit ac09cbfcb70ac6f443f039d5934448bd80f74493 ++Subproject commit ac85cdb38c1f33e7952bc4c0347d6c7873fb56a1 diff --git a/tests/atlocal.in b/tests/atlocal.in index d9a4c91d4..5ebc8e117 100644 --- a/tests/atlocal.in @@ -13373,26 +18242,32 @@ index d9a4c91d4..5ebc8e117 100644 unset http_proxy unset https_proxy diff --git a/tests/automake.mk b/tests/automake.mk -index c5c286eae..c09f615d5 100644 +index c5c286eae..d60cb8105 100644 --- a/tests/automake.mk +++ b/tests/automake.mk -@@ -31,7 +31,8 @@ TESTSUITE_AT = \ +@@ -31,7 +31,9 @@ TESTSUITE_AT = \ tests/ovn-controller-vtep.at \ tests/ovn-ic.at \ tests/ovn-macros.at \ - tests/ovn-performance.at + tests/ovn-performance.at \ -+ tests/ovn-ofctrl-seqno.at ++ tests/ovn-ofctrl-seqno.at \ ++ tests/ovn-lflow-cache.at SYSTEM_KMOD_TESTSUITE_AT = \ tests/system-common-macros.at \ -@@ -202,7 +203,10 @@ noinst_PROGRAMS += tests/ovstest +@@ -202,7 +204,15 @@ noinst_PROGRAMS += tests/ovstest tests_ovstest_SOURCES = \ tests/ovstest.c \ tests/ovstest.h \ - tests/test-ovn.c ++ tests/test-utils.c \ ++ tests/test-utils.h \ + tests/test-ovn.c \ ++ controller/test-lflow-cache.c \ + controller/test-ofctrl-seqno.c \ ++ controller/lflow-cache.c \ ++ controller/lflow-cache.h \ + controller/ofctrl-seqno.c \ + controller/ofctrl-seqno.h @@ -13473,10 +18348,10 @@ index cb582811f..b2261d285 100644 OVS_WAIT_UNTIL([test -z "`ovn-sbctl list Chassis | grep -- br-vtep_lswitch`"]) OVS_WAIT_UNTIL([test -z "`vtep-ctl list physical_port p0`"]) diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at -index 1b4679963..f818f9cea 100644 +index 1b4679963..1f922f78f 100644 --- a/tests/ovn-controller.at +++ b/tests/ovn-controller.at -@@ -414,3 +414,20 @@ OVS_WAIT_UNTIL([ovs-vsctl get Bridge br-int external_ids:ovn-nb-cfg], [0], [1]) +@@ -414,3 +414,100 @@ OVS_WAIT_UNTIL([ovs-vsctl get Bridge br-int external_ids:ovn-nb-cfg], [0], [1]) OVN_CLEANUP([hv1]) AT_CLEANUP @@ -13490,18 +18365,509 @@ index 1b4679963..f818f9cea 100644 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 + -+# Wait for ovn-controller to register in the SB. -+OVS_WAIT_UNTIL([ -+ test "$(ovn-sbctl get chassis hv1 other_config:port-up-notif)" = '"true"' ++# Wait for ovn-controller to register in the SB. ++OVS_WAIT_UNTIL([ ++ test "$(ovn-sbctl get chassis hv1 other_config:port-up-notif)" = '"true"' ++]) ++ ++OVN_CLEANUP([hv1]) ++AT_CLEANUP ++ ++# Test that changes of a port binding from one type to another doesn'that ++# result in any ovn-controller asserts or crashes. ++AT_SETUP([ovn-controller - port binding type change handling]) ++AT_KEYWORDS([ovn]) ++ovn_start ++ ++net_add n1 ++sim_add hv1 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.1 ++ ++check ovn-nbctl ls-add ls1 -- lsp-add ls1 lsp1 ++ ++as hv1 ++check ovs-vsctl \ ++ -- add-port br-int vif1 \ ++ -- set Interface vif1 external_ids:iface-id=lsp1 ++ ++# ovn-controller should bind the interface. ++wait_for_ports_up ++hv_uuid=$(fetch_column Chassis _uuid name=hv1) ++check_column "$hv_uuid" Port_Binding chassis logical_port=lsp1 ++ ++AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[lsp1]], OVS interface name : [[vif1]], num binding lports : [[1]] ++primary lport : [[lsp1]] ++---------------------------------------- ++]) ++ ++# pause ovn-northd ++check as northd ovn-appctl -t ovn-northd pause ++check as northd-backup ovn-appctl -t ovn-northd pause ++ ++as northd ovn-appctl -t ovn-northd status ++as northd-backup ovn-appctl -t ovn-northd status ++ ++pb_types=(patch chassisredirect l3gateway localnet localport l2gateway ++ virtual external remote vtep) ++for type in ${pb_types[[@]]} ++do ++ for update_type in ${pb_types[[@]]} ++ do ++ check ovn-sbctl set port_binding lsp1 type=$type ++ check as hv1 ovs-vsctl set open . external_ids:ovn-cms-options=$type ++ OVS_WAIT_UNTIL([test $type = $(ovn-sbctl get chassis . other_config:ovn-cms-options)]) ++ ++ AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[lsp1]], OVS interface name : [[vif1]], num binding lports : [[0]] ++---------------------------------------- ++]) ++ ++ echo "Updating to $update_type from $type" ++ check ovn-sbctl set port_binding lsp1 type=$update_type ++ check as hv1 ovs-vsctl set open . external_ids:ovn-cms-options=$update_type ++ OVS_WAIT_UNTIL([test $update_type = $(ovn-sbctl get chassis . other_config:ovn-cms-options)]) ++ ++ AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[lsp1]], OVS interface name : [[vif1]], num binding lports : [[0]] ++---------------------------------------- +]) ++ # Set the port binding type back to VIF. ++ check ovn-sbctl set port_binding lsp1 type=\"\" ++ check as hv1 ovs-vsctl set open . external_ids:ovn-cms-options=foo ++ OVS_WAIT_UNTIL([test foo = $(ovn-sbctl get chassis . other_config:ovn-cms-options)]) ++ ++ AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[lsp1]], OVS interface name : [[vif1]], num binding lports : [[1]] ++primary lport : [[lsp1]] ++---------------------------------------- ++]) ++ done ++done + +OVN_CLEANUP([hv1]) +AT_CLEANUP +diff --git a/tests/ovn-lflow-cache.at b/tests/ovn-lflow-cache.at +new file mode 100644 +index 000000000..e5e9ed1e8 +--- /dev/null ++++ b/tests/ovn-lflow-cache.at +@@ -0,0 +1,405 @@ ++# ++# Unit tests for the controller/lflow-cache.c module. ++# ++AT_BANNER([OVN unit tests - lflow-cache]) ++ ++AT_SETUP([ovn -- unit test -- lflow-cache single add/lookup]) ++AT_CHECK( ++ [ovstest test-lflow-cache lflow_cache_operations \ ++ true 3 \ ++ add conj-id 1 \ ++ add expr 2 \ ++ add matches 3 | grep -v 'Mem usage (KB)'], ++ [0], [dnl ++Enabled: true ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++ADD conj-id: ++ conj-id-ofs: 1 ++LOOKUP: ++ conj_id_ofs: 1 ++ type: conj-id ++Enabled: true ++cache-conj-id : 1 ++cache-expr : 0 ++cache-matches : 0 ++ADD expr: ++ conj-id-ofs: 2 ++LOOKUP: ++ conj_id_ofs: 2 ++ type: expr ++Enabled: true ++cache-conj-id : 1 ++cache-expr : 1 ++cache-matches : 0 ++ADD matches: ++ conj-id-ofs: 3 ++LOOKUP: ++ conj_id_ofs: 0 ++ type: matches ++Enabled: true ++cache-conj-id : 1 ++cache-expr : 1 ++cache-matches : 1 ++]) ++AT_CLEANUP ++ ++AT_SETUP([ovn -- unit test -- lflow-cache single add/lookup/del]) ++AT_CHECK( ++ [ovstest test-lflow-cache lflow_cache_operations \ ++ true 3 \ ++ add-del conj-id 1 \ ++ add-del expr 2 \ ++ add-del matches 3 | grep -v 'Mem usage (KB)'], ++ [0], [dnl ++Enabled: true ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++ADD conj-id: ++ conj-id-ofs: 1 ++LOOKUP: ++ conj_id_ofs: 1 ++ type: conj-id ++DELETE ++LOOKUP: ++ not found ++Enabled: true ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++ADD expr: ++ conj-id-ofs: 2 ++LOOKUP: ++ conj_id_ofs: 2 ++ type: expr ++DELETE ++LOOKUP: ++ not found ++Enabled: true ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++ADD matches: ++ conj-id-ofs: 3 ++LOOKUP: ++ conj_id_ofs: 0 ++ type: matches ++DELETE ++LOOKUP: ++ not found ++Enabled: true ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++]) ++AT_CLEANUP ++ ++AT_SETUP([ovn -- unit test -- lflow-cache disabled single add/lookup/del]) ++AT_CHECK( ++ [ovstest test-lflow-cache lflow_cache_operations \ ++ false 3 \ ++ add conj-id 1 \ ++ add expr 2 \ ++ add matches 3 | grep -v 'Mem usage (KB)'], ++ [0], [dnl ++Enabled: false ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++ADD conj-id: ++ conj-id-ofs: 1 ++LOOKUP: ++ not found ++Enabled: false ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++ADD expr: ++ conj-id-ofs: 2 ++LOOKUP: ++ not found ++Enabled: false ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++ADD matches: ++ conj-id-ofs: 3 ++LOOKUP: ++ not found ++Enabled: false ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++]) ++AT_CLEANUP ++ ++AT_SETUP([ovn -- unit test -- lflow-cache disable/enable/flush]) ++AT_CHECK( ++ [ovstest test-lflow-cache lflow_cache_operations \ ++ true 12 \ ++ add conj-id 1 \ ++ add expr 2 \ ++ add matches 3 \ ++ disable \ ++ add conj-id 4 \ ++ add expr 5 \ ++ add matches 6 \ ++ enable 1000 1024 \ ++ add conj-id 7 \ ++ add expr 8 \ ++ add matches 9 \ ++ flush | grep -v 'Mem usage (KB)'], ++ [0], [dnl ++Enabled: true ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++ADD conj-id: ++ conj-id-ofs: 1 ++LOOKUP: ++ conj_id_ofs: 1 ++ type: conj-id ++Enabled: true ++cache-conj-id : 1 ++cache-expr : 0 ++cache-matches : 0 ++ADD expr: ++ conj-id-ofs: 2 ++LOOKUP: ++ conj_id_ofs: 2 ++ type: expr ++Enabled: true ++cache-conj-id : 1 ++cache-expr : 1 ++cache-matches : 0 ++ADD matches: ++ conj-id-ofs: 3 ++LOOKUP: ++ conj_id_ofs: 0 ++ type: matches ++Enabled: true ++cache-conj-id : 1 ++cache-expr : 1 ++cache-matches : 1 ++DISABLE ++Enabled: false ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++ADD conj-id: ++ conj-id-ofs: 4 ++LOOKUP: ++ not found ++Enabled: false ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++ADD expr: ++ conj-id-ofs: 5 ++LOOKUP: ++ not found ++Enabled: false ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++ADD matches: ++ conj-id-ofs: 6 ++LOOKUP: ++ not found ++Enabled: false ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++ENABLE ++Enabled: true ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++ADD conj-id: ++ conj-id-ofs: 7 ++LOOKUP: ++ conj_id_ofs: 7 ++ type: conj-id ++Enabled: true ++cache-conj-id : 1 ++cache-expr : 0 ++cache-matches : 0 ++ADD expr: ++ conj-id-ofs: 8 ++LOOKUP: ++ conj_id_ofs: 8 ++ type: expr ++Enabled: true ++cache-conj-id : 1 ++cache-expr : 1 ++cache-matches : 0 ++ADD matches: ++ conj-id-ofs: 9 ++LOOKUP: ++ conj_id_ofs: 0 ++ type: matches ++Enabled: true ++cache-conj-id : 1 ++cache-expr : 1 ++cache-matches : 1 ++FLUSH ++Enabled: true ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++]) ++AT_CLEANUP ++ ++AT_SETUP([ovn -- unit test -- lflow-cache set limit]) ++AT_CHECK( ++ [ovstest test-lflow-cache lflow_cache_operations \ ++ true 12 \ ++ add conj-id 1 \ ++ add expr 2 \ ++ add matches 3 \ ++ enable 1 1024 \ ++ add conj-id 4 \ ++ add expr 5 \ ++ add matches 6 \ ++ add conj-id 7 \ ++ enable 1 1 \ ++ add conj-id 8 \ ++ add expr 9 \ ++ add matches 10 | grep -v 'Mem usage (KB)'], ++ [0], [dnl ++Enabled: true ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++ADD conj-id: ++ conj-id-ofs: 1 ++LOOKUP: ++ conj_id_ofs: 1 ++ type: conj-id ++Enabled: true ++cache-conj-id : 1 ++cache-expr : 0 ++cache-matches : 0 ++ADD expr: ++ conj-id-ofs: 2 ++LOOKUP: ++ conj_id_ofs: 2 ++ type: expr ++Enabled: true ++cache-conj-id : 1 ++cache-expr : 1 ++cache-matches : 0 ++ADD matches: ++ conj-id-ofs: 3 ++LOOKUP: ++ conj_id_ofs: 0 ++ type: matches ++Enabled: true ++cache-conj-id : 1 ++cache-expr : 1 ++cache-matches : 1 ++ENABLE ++dnl ++dnl Max capacity smaller than current usage, cache should be flushed. ++dnl ++Enabled: true ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++ADD conj-id: ++ conj-id-ofs: 4 ++LOOKUP: ++ conj_id_ofs: 4 ++ type: conj-id ++Enabled: true ++cache-conj-id : 1 ++cache-expr : 0 ++cache-matches : 0 ++ADD expr: ++ conj-id-ofs: 5 ++LOOKUP: ++ conj_id_ofs: 5 ++ type: expr ++dnl ++dnl Cache is full but we can evict the conj-id entry because we're adding ++dnl an expr one. ++dnl ++Enabled: true ++cache-conj-id : 0 ++cache-expr : 1 ++cache-matches : 0 ++ADD matches: ++ conj-id-ofs: 6 ++LOOKUP: ++ conj_id_ofs: 0 ++ type: matches ++dnl ++dnl Cache is full but we can evict the expr entry because we're adding ++dnl a matches one. ++dnl ++Enabled: true ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 1 ++ADD conj-id: ++ conj-id-ofs: 7 ++LOOKUP: ++ not found ++dnl ++dnl Cache is full and we're adding a conj-id entry so we shouldn't evict ++dnl anything else. ++dnl ++Enabled: true ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 1 ++ENABLE ++dnl ++dnl Max memory usage smaller than current memory usage, cache should be ++dnl flushed. ++dnl ++Enabled: true ++cache-conj-id : 0 ++cache-expr : 0 ++cache-matches : 0 ++ADD conj-id: ++ conj-id-ofs: 8 ++LOOKUP: ++ conj_id_ofs: 8 ++ type: conj-id ++Enabled: true ++cache-conj-id : 1 ++cache-expr : 0 ++cache-matches : 0 ++ADD expr: ++ conj-id-ofs: 9 ++LOOKUP: ++ not found ++dnl ++dnl Cache is full and we're adding a cache entry that would go over the max ++dnl memory limit so adding should fail. ++dnl ++Enabled: true ++cache-conj-id : 1 ++cache-expr : 0 ++cache-matches : 0 ++ADD matches: ++ conj-id-ofs: 10 ++LOOKUP: ++ not found ++dnl ++dnl Cache is full and we're adding a cache entry that would go over the max ++dnl memory limit so adding should fail. ++dnl ++Enabled: true ++cache-conj-id : 1 ++cache-expr : 0 ++cache-matches : 0 ++]) ++AT_CLEANUP ++ ++AT_SETUP([ovn -- unit test -- lflow-cache negative tests]) ++AT_CHECK([ovstest test-lflow-cache lflow_cache_negative], [0], []) ++AT_CLEANUP diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at -index 59e500c57..2ba29a960 100644 +index 59e500c57..4cf14b1f2 100644 --- a/tests/ovn-macros.at +++ b/tests/ovn-macros.at -@@ -417,6 +417,22 @@ wait_column() { +@@ -417,6 +417,40 @@ wait_column() { echo "$column in $db table $table has value $found, from the following rows:" ovn-${db}ctl list $table]) } @@ -13521,14 +18887,32 @@ index 59e500c57..2ba29a960 100644 + done + fi +} ++ ++# reset_pcap_file iface pcap_file ++# Resets the pcap file associates with OVS interface. should be used ++# with dummy datapath. ++reset_iface_pcap_file() { ++ local iface=$1 ++ local pcap_file=$2 ++ check rm -f dummy-*.pcap ++ check ovs-vsctl -- set Interface $iface options:tx_pcap=dummy-tx.pcap \ ++options:rxq_pcap=dummy-rx.pcap ++ OVS_WAIT_WHILE([test 24 = $(wc -c dummy-tx.pcap | cut -d " " -f1)]) ++ check rm -f ${pcap_file}*.pcap ++ check ovs-vsctl -- set Interface $iface options:tx_pcap=${pcap_file}-tx.pcap \ ++options:rxq_pcap=${pcap_file}-rx.pcap ++ ++ OVS_WAIT_WHILE([test 24 = $(wc -c ${pcap_file}-tx.pcap | cut -d " " -f1)]) ++} ++ OVS_END_SHELL_HELPERS m4_define([OVN_POPULATE_ARP], [AT_CHECK(ovn_populate_arp__, [0], [ignore])]) diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at -index 01edfcbc1..6d91aa4c5 100644 +index 01edfcbc1..8af55161f 100644 --- a/tests/ovn-nbctl.at +++ b/tests/ovn-nbctl.at -@@ -1539,34 +1539,34 @@ AT_CHECK([ovn-nbctl lr-route-list lr0], [0], [dnl +@@ -1539,34 +1539,35 @@ AT_CHECK([ovn-nbctl lr-route-list lr0], [0], [dnl dnl Add ecmp routes AT_CHECK([ovn-nbctl lr-route-add lr0 10.0.0.0/24 11.0.0.1]) AT_CHECK([ovn-nbctl --ecmp lr-route-add lr0 10.0.0.0/24 11.0.0.2]) @@ -13547,10 +18931,11 @@ index 01edfcbc1..6d91aa4c5 100644 + 10.0.0.0/24 11.0.0.2 dst-ip ecmp + 10.0.0.0/24 11.0.0.3 dst-ip ecmp + 10.0.0.0/24 11.0.0.4 dst-ip lp0 ecmp -+]) + ]) +AT_CHECK([ovn-nbctl --ecmp lr-route-add lr0 10.0.0.0/24 11.0.0.2], [1], [], + [ovn-nbctl: duplicate nexthop for the same ECMP route - ]) ++]) ++AT_CHECK([ovn-nbctl --may-exist --ecmp lr-route-add lr0 10.0.0.0/24 11.0.0.2]) dnl Delete ecmp routes AT_CHECK([ovn-nbctl lr-route-del lr0 10.0.0.0/24 11.0.0.1]) @@ -13577,7 +18962,7 @@ index 01edfcbc1..6d91aa4c5 100644 AT_CHECK([ovn-nbctl lr-route-list lr0], [0], [dnl IPv4 Routes 10.0.0.0/24 11.0.0.3 dst-ip -@@ -1605,7 +1605,15 @@ AT_CHECK([ovn-nbctl lr-route-add lr0 10.0.1.1/24 11.0.1.1 lp0]) +@@ -1605,7 +1606,16 @@ AT_CHECK([ovn-nbctl lr-route-add lr0 10.0.1.1/24 11.0.1.1 lp0]) AT_CHECK([ovn-nbctl lr-route-add lr0 10.0.0.1/24 11.0.0.1]) AT_CHECK([ovn-nbctl lr-route-add lr0 0:0:0:0:0:0:0:0/0 2001:0db8:0:f101::1]) AT_CHECK([ovn-nbctl lr-route-add lr0 2001:0db8:0::/64 2001:0db8:0:f102::1 lp0]) @@ -13591,10 +18976,11 @@ index 01edfcbc1..6d91aa4c5 100644 +AT_CHECK([ovn-nbctl --ecmp-symmetric-reply lr-route-add lr0 2003:0db8:1::/64 2001:0db8:0:f103::6], [1], [], + [ovn-nbctl: duplicate nexthop for the same ECMP route +]) ++AT_CHECK([ovn-nbctl --may-exist --ecmp-symmetric-reply lr-route-add lr0 2003:0db8:1::/64 2001:0db8:0:f103::6]) AT_CHECK([ovn-nbctl lr-route-list lr0], [0], [dnl IPv4 Routes -@@ -1615,9 +1623,20 @@ IPv4 Routes +@@ -1615,9 +1625,20 @@ IPv4 Routes IPv6 Routes 2001:db8::/64 2001:db8:0:f102::1 dst-ip lp0 @@ -13618,7 +19004,7 @@ index 01edfcbc1..6d91aa4c5 100644 dnl --------------------------------------------------------------------- diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at -index 90ca0a4db..11d4a9c86 100644 +index 90ca0a4db..7a0dcaec0 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -605,11 +605,12 @@ wait_row_count Port_Binding 0 logical_port=sw0-pext1 'chassis!=[[]]' @@ -13770,11 +19156,13 @@ index 90ca0a4db..11d4a9c86 100644 ovn-nbctl ls-add sw0 ovn-nbctl --wait=sb lsp-add sw0 sw0-p1 -- lsp-set-addresses sw0-p1 \ "00:00:00:00:00:03 10.0.0.3" -@@ -1072,54 +1078,57 @@ check ovn-nbctl --wait=sb ls-lb-add sw0 lb1 +@@ -1071,59 +1077,62 @@ check ovn-nbctl --wait=sb ls-lb-add sw0 lb1 + AT_CAPTURE_FILE([sbflows]) OVS_WAIT_FOR_OUTPUT( - [ovn-sbctl dump-flows sw0 | tee sbflows | grep 'priority=120.*ct_lb' | sed 's/table=..//'], 0, [dnl +- [ovn-sbctl dump-flows sw0 | tee sbflows | grep 'priority=120.*ct_lb' | sed 's/table=..//'], 0, [dnl - (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) ++ [ovn-sbctl dump-flows sw0 | tee sbflows | grep 'priority=120.*backends' | sed 's/table=..//'], 0, [dnl + (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) ]) @@ -13785,8 +19173,9 @@ index 90ca0a4db..11d4a9c86 100644 AT_CAPTURE_FILE([sbflows2]) OVS_WAIT_FOR_OUTPUT( - [ovn-sbctl dump-flows sw0 | tee sbflows2 | grep 'priority=120.*ct_lb' | sed 's/table=..//'], [0], +- [ovn-sbctl dump-flows sw0 | tee sbflows2 | grep 'priority=120.*ct_lb' | sed 's/table=..//'], [0], -[ (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) ++ [ovn-sbctl dump-flows sw0 | tee sbflows2 | grep 'priority=120.*backends' | sed 's/table=..//'], [0], +[ (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) ]) @@ -13798,7 +19187,8 @@ index 90ca0a4db..11d4a9c86 100644 wait_row_count Service_Monitor 2 +check ovn-nbctl --wait=sb sync - ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt +-ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt ++ovn-sbctl dump-flows sw0 | grep backends | grep priority=120 > lflows.txt AT_CHECK([cat lflows.txt | sed 's/table=..//'], [0], [dnl - (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) + (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) @@ -13811,8 +19201,9 @@ index 90ca0a4db..11d4a9c86 100644 AT_CAPTURE_FILE([sbflows3]) OVS_WAIT_FOR_OUTPUT( - [ovn-sbctl dump-flows sw0 | tee sbflows 3 | grep 'priority=120.*ct_lb' | sed 's/table=..//'], [0], +- [ovn-sbctl dump-flows sw0 | tee sbflows 3 | grep 'priority=120.*ct_lb' | sed 's/table=..//'], [0], -[ (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) ++ [ovn-sbctl dump-flows sw0 | tee sbflows 3 | grep 'priority=120.*backends' | sed 's/table=..//'], [0], +[ (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) ]) @@ -13824,8 +19215,9 @@ index 90ca0a4db..11d4a9c86 100644 AT_CAPTURE_FILE([sbflows4]) OVS_WAIT_FOR_OUTPUT( - [ovn-sbctl dump-flows sw0 | tee sbflows4 | grep 'priority=120.*ct_lb' | sed 's/table=..//'], [0], +- [ovn-sbctl dump-flows sw0 | tee sbflows4 | grep 'priority=120.*ct_lb' | sed 's/table=..//'], [0], -[ (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80);) ++ [ovn-sbctl dump-flows sw0 | tee sbflows4 | grep 'priority=120.*backends' | sed 's/table=..//'], [0], +[ (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb(backends=10.0.0.3:80);) ]) @@ -13838,6 +19230,11 @@ index 90ca0a4db..11d4a9c86 100644 AT_CAPTURE_FILE([sbflows5]) OVS_WAIT_FOR_OUTPUT( +- [ovn-sbctl dump-flows sw0 | tee sbflows5 | grep 'priority=120.*ct_lb'], 1) ++ [ovn-sbctl dump-flows sw0 | tee sbflows5 | grep 'priority=120.*backends'], 1) + + AT_CAPTURE_FILE([sbflows6]) + OVS_WAIT_FOR_OUTPUT( @@ -1131,32 +1140,34 @@ OVS_WAIT_FOR_OUTPUT( (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(drop;) ]) @@ -13852,8 +19249,9 @@ index 90ca0a4db..11d4a9c86 100644 AT_CAPTURE_FILE([sbflows7]) OVS_WAIT_FOR_OUTPUT( - [ovn-sbctl dump-flows sw0 | tee sbflows7 | grep ct_lb | grep priority=120 | sed 's/table=..//'], 0, +- [ovn-sbctl dump-flows sw0 | tee sbflows7 | grep ct_lb | grep priority=120 | sed 's/table=..//'], 0, -[ (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) ++ [ovn-sbctl dump-flows sw0 | tee sbflows7 | grep backends | grep priority=120 | sed 's/table=..//'], 0, +[ (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) ]) @@ -13879,9 +19277,12 @@ index 90ca0a4db..11d4a9c86 100644 AT_CHECK( [ovn-nbctl --wait=sb \ -- --id=@hc create Load_Balancer_Health_Check vip=10.0.0.40\\:1000 \ -@@ -1176,34 +1187,35 @@ AT_CAPTURE_FILE([sbflows9]) +@@ -1174,36 +1185,37 @@ wait_row_count Service_Monitor 1 port=1000 + + AT_CAPTURE_FILE([sbflows9]) OVS_WAIT_FOR_OUTPUT( - [ovn-sbctl dump-flows sw0 | tee sbflows9 | grep ct_lb | grep priority=120 | sed 's/table=..//' | sort], +- [ovn-sbctl dump-flows sw0 | tee sbflows9 | grep ct_lb | grep priority=120 | sed 's/table=..//' | sort], ++ [ovn-sbctl dump-flows sw0 | tee sbflows9 | grep backends | grep priority=120 | sed 's/table=..//' | sort], 0, -[ (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80);) - (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000);) @@ -13898,7 +19299,8 @@ index 90ca0a4db..11d4a9c86 100644 AT_CAPTURE_FILE([sbflows10]) OVS_WAIT_FOR_OUTPUT( - [ovn-sbctl dump-flows sw0 | tee sbflows10 | grep ct_lb | grep priority=120 | sed 's/table=..//' | sort], +- [ovn-sbctl dump-flows sw0 | tee sbflows10 | grep ct_lb | grep priority=120 | sed 's/table=..//' | sort], ++ [ovn-sbctl dump-flows sw0 | tee sbflows10 | grep backends | grep priority=120 | sed 's/table=..//' | sort], 0, -[ (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) - (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000,20.0.0.3:80);) @@ -13911,7 +19313,8 @@ index 90ca0a4db..11d4a9c86 100644 check ovn-nbctl --wait=sb ls-lb-add sw1 lb1 AT_CAPTURE_FILE([sbflows11]) OVS_WAIT_FOR_OUTPUT( - [ovn-sbctl dump-flows sw1 | tee sbflows11 | grep ct_lb | grep priority=120 | sed 's/table=..//' | sort], +- [ovn-sbctl dump-flows sw1 | tee sbflows11 | grep ct_lb | grep priority=120 | sed 's/table=..//' | sort], ++ [ovn-sbctl dump-flows sw1 | tee sbflows11 | grep backends | grep priority=120 | sed 's/table=..//' | sort], 0, [dnl - (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) - (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000,20.0.0.3:80);) @@ -13970,13 +19373,202 @@ index 90ca0a4db..11d4a9c86 100644 +AT_CAPTURE_FILE([sbflows12]) +OVS_WAIT_FOR_OUTPUT( + [ovn-sbctl dump-flows sw0 | tee sbflows12 | grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" | grep priority=120 | sed 's/table=..//'], [0], [dnl -+ (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg0 = 0; reject { outport <-> inport; next(pipeline=egress,table=6);};) ++ (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg0 = 0; reject { outport <-> inport; next(pipeline=egress,table=5);};) +]) + AT_CLEANUP AT_SETUP([ovn -- Load balancer VIP in NAT entries]) -@@ -1704,7 +1743,7 @@ check ovn-nbctl pg-add pg0 sw0-p1 sw1-p1 +@@ -1465,6 +1504,19 @@ ovn-nbctl lr-nat-add lr dnat_and_snat 43.43.43.4 42.42.42.4 ls-vm 00:00:00:00:00 + ovn-nbctl lr-nat-add lr snat 43.43.43.150 43.43.43.50 + ovn-nbctl lr-nat-add lr snat 43.43.43.150 43.43.43.51 + ++ovn-nbctl lb-add lb1 "192.168.2.1:8080" "10.0.0.4:8080" ++ovn-nbctl lb-add lb2 "192.168.2.4:8080" "10.0.0.5:8080" udp ++ovn-nbctl lb-add lb3 "192.168.2.5:8080" "10.0.0.6:8080" ++ovn-nbctl lb-add lb4 "192.168.2.6:8080" "10.0.0.7:8080" ++ovn-nbctl lb-add lb5 "fe80::200:ff:fe00:101:8080" "fe02::200:ff:fe00:101:8080" ++ovn-nbctl lb-add lb5 "fe80::200:ff:fe00:102:8080" "fe02::200:ff:fe00:102:8080" ++ ++ovn-nbctl lr-lb-add lr lb1 ++ovn-nbctl lr-lb-add lr lb2 ++ovn-nbctl lr-lb-add lr lb3 ++ovn-nbctl lr-lb-add lr lb4 ++ovn-nbctl lr-lb-add lr lb5 ++ + ovn-nbctl --wait=sb sync + + # Ingress router port ETH address is stored in lr_in_admission. +@@ -1487,28 +1539,46 @@ action=(xreg0[[0..47]] = 00:00:00:00:01:00; next;) + AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=90" | grep "arp\|nd" | sort], [0], [dnl + table=3 (lr_in_ip_input ), priority=90 , dnl + match=(arp.op == 1 && arp.tpa == 43.43.43.150), dnl +-action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.150; outport = inport; flags.loopback = 1; output;) ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) + table=3 (lr_in_ip_input ), priority=90 , dnl + match=(arp.op == 1 && arp.tpa == 43.43.43.2), dnl +-action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.2; outport = inport; flags.loopback = 1; output;) ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) + table=3 (lr_in_ip_input ), priority=90 , dnl + match=(arp.op == 1 && arp.tpa == 43.43.43.3), dnl +-action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.3; outport = inport; flags.loopback = 1; output;) ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) + table=3 (lr_in_ip_input ), priority=90 , dnl + match=(arp.op == 1 && arp.tpa == 43.43.43.4), dnl +-action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.4; outport = inport; flags.loopback = 1; output;) ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) + table=3 (lr_in_ip_input ), priority=90 , dnl + match=(inport == "lrp" && arp.op == 1 && arp.tpa == 42.42.42.1 && arp.spa == 42.42.42.0/24), dnl +-action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 42.42.42.1; outport = inport; flags.loopback = 1; output;) ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) ++ table=3 (lr_in_ip_input ), priority=90 , dnl ++match=(inport == "lrp" && arp.op == 1 && arp.tpa == { 192.168.2.1, 192.168.2.4, 192.168.2.5, 192.168.2.6 }), dnl ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) + table=3 (lr_in_ip_input ), priority=90 , dnl + match=(inport == "lrp" && ip6.dst == {fe80::200:ff:fe00:1, ff02::1:ff00:1} && nd_ns && nd.target == fe80::200:ff:fe00:1), dnl + action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = fe80::200:ff:fe00:1; nd.target = fe80::200:ff:fe00:1; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) + table=3 (lr_in_ip_input ), priority=90 , dnl ++match=(inport == "lrp" && nd_ns && nd.target == fe80::200:ff:fe00:101:8080), dnl ++action=(nd_na { eth.src = xreg0[[0..47]]; ip6.src = fe80::200:ff:fe00:101:8080; nd.target = fe80::200:ff:fe00:101:8080; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) ++ table=3 (lr_in_ip_input ), priority=90 , dnl ++match=(inport == "lrp" && nd_ns && nd.target == fe80::200:ff:fe00:102:8080), dnl ++action=(nd_na { eth.src = xreg0[[0..47]]; ip6.src = fe80::200:ff:fe00:102:8080; nd.target = fe80::200:ff:fe00:102:8080; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) ++ table=3 (lr_in_ip_input ), priority=90 , dnl + match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.1 && arp.spa == 43.43.43.0/24), dnl +-action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.1; outport = inport; flags.loopback = 1; output;) ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) ++ table=3 (lr_in_ip_input ), priority=90 , dnl ++match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == { 192.168.2.1, 192.168.2.4, 192.168.2.5, 192.168.2.6 }), dnl ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) + table=3 (lr_in_ip_input ), priority=90 , dnl + match=(inport == "lrp-public" && ip6.dst == {fe80::200:ff:fe00:100, ff02::1:ff00:100} && nd_ns && nd.target == fe80::200:ff:fe00:100), dnl + action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = fe80::200:ff:fe00:100; nd.target = fe80::200:ff:fe00:100; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) ++ table=3 (lr_in_ip_input ), priority=90 , dnl ++match=(inport == "lrp-public" && nd_ns && nd.target == fe80::200:ff:fe00:101:8080), dnl ++action=(nd_na { eth.src = xreg0[[0..47]]; ip6.src = fe80::200:ff:fe00:101:8080; nd.target = fe80::200:ff:fe00:101:8080; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) ++ table=3 (lr_in_ip_input ), priority=90 , dnl ++match=(inport == "lrp-public" && nd_ns && nd.target == fe80::200:ff:fe00:102:8080), dnl ++action=(nd_na { eth.src = xreg0[[0..47]]; ip6.src = fe80::200:ff:fe00:102:8080; nd.target = fe80::200:ff:fe00:102:8080; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) + ]) + + # xreg0[0..47] isn't used anywhere else. +@@ -1544,28 +1614,46 @@ action=(xreg0[[0..47]] = 00:00:00:00:01:00; next;) + AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=90" | grep "arp\|nd" | sort], [0], [dnl + table=3 (lr_in_ip_input ), priority=90 , dnl + match=(arp.op == 1 && arp.tpa == 43.43.43.150), dnl +-action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.150; outport = inport; flags.loopback = 1; output;) ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) + table=3 (lr_in_ip_input ), priority=90 , dnl + match=(arp.op == 1 && arp.tpa == 43.43.43.2), dnl +-action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.2; outport = inport; flags.loopback = 1; output;) ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) + table=3 (lr_in_ip_input ), priority=90 , dnl + match=(arp.op == 1 && arp.tpa == 43.43.43.3), dnl +-action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.3; outport = inport; flags.loopback = 1; output;) ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) + table=3 (lr_in_ip_input ), priority=90 , dnl + match=(arp.op == 1 && arp.tpa == 43.43.43.4), dnl +-action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.4; outport = inport; flags.loopback = 1; output;) ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) + table=3 (lr_in_ip_input ), priority=90 , dnl + match=(inport == "lrp" && arp.op == 1 && arp.tpa == 42.42.42.1 && arp.spa == 42.42.42.0/24), dnl +-action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 42.42.42.1; outport = inport; flags.loopback = 1; output;) ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) ++ table=3 (lr_in_ip_input ), priority=90 , dnl ++match=(inport == "lrp" && arp.op == 1 && arp.tpa == { 192.168.2.1, 192.168.2.4, 192.168.2.5, 192.168.2.6 }), dnl ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) + table=3 (lr_in_ip_input ), priority=90 , dnl + match=(inport == "lrp" && ip6.dst == {fe80::200:ff:fe00:1, ff02::1:ff00:1} && nd_ns && nd.target == fe80::200:ff:fe00:1), dnl + action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = fe80::200:ff:fe00:1; nd.target = fe80::200:ff:fe00:1; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) + table=3 (lr_in_ip_input ), priority=90 , dnl ++match=(inport == "lrp" && nd_ns && nd.target == fe80::200:ff:fe00:101:8080), dnl ++action=(nd_na { eth.src = xreg0[[0..47]]; ip6.src = fe80::200:ff:fe00:101:8080; nd.target = fe80::200:ff:fe00:101:8080; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) ++ table=3 (lr_in_ip_input ), priority=90 , dnl ++match=(inport == "lrp" && nd_ns && nd.target == fe80::200:ff:fe00:102:8080), dnl ++action=(nd_na { eth.src = xreg0[[0..47]]; ip6.src = fe80::200:ff:fe00:102:8080; nd.target = fe80::200:ff:fe00:102:8080; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) ++ table=3 (lr_in_ip_input ), priority=90 , dnl + match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.1 && arp.spa == 43.43.43.0/24), dnl +-action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.1; outport = inport; flags.loopback = 1; output;) ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) ++ table=3 (lr_in_ip_input ), priority=90 , dnl ++match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == { 192.168.2.1, 192.168.2.4, 192.168.2.5, 192.168.2.6 } && is_chassis_resident("cr-lrp-public")), dnl ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) + table=3 (lr_in_ip_input ), priority=90 , dnl + match=(inport == "lrp-public" && ip6.dst == {fe80::200:ff:fe00:100, ff02::1:ff00:100} && nd_ns && nd.target == fe80::200:ff:fe00:100 && is_chassis_resident("cr-lrp-public")), dnl + action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = fe80::200:ff:fe00:100; nd.target = fe80::200:ff:fe00:100; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) ++ table=3 (lr_in_ip_input ), priority=90 , dnl ++match=(inport == "lrp-public" && nd_ns && nd.target == fe80::200:ff:fe00:101:8080 && is_chassis_resident("cr-lrp-public")), dnl ++action=(nd_na { eth.src = xreg0[[0..47]]; ip6.src = fe80::200:ff:fe00:101:8080; nd.target = fe80::200:ff:fe00:101:8080; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) ++ table=3 (lr_in_ip_input ), priority=90 , dnl ++match=(inport == "lrp-public" && nd_ns && nd.target == fe80::200:ff:fe00:102:8080 && is_chassis_resident("cr-lrp-public")), dnl ++action=(nd_na { eth.src = xreg0[[0..47]]; ip6.src = fe80::200:ff:fe00:102:8080; nd.target = fe80::200:ff:fe00:102:8080; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) + ]) + + # Priority 91 drop flows (per distributed gw port), if port is not resident. +@@ -1587,16 +1675,16 @@ action=(drop;) + AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=92" | grep "arp\|nd" | sort], [0], [dnl + table=3 (lr_in_ip_input ), priority=92 , dnl + match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.150 && is_chassis_resident("cr-lrp-public")), dnl +-action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.150; outport = inport; flags.loopback = 1; output;) ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) + table=3 (lr_in_ip_input ), priority=92 , dnl + match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.2 && is_chassis_resident("cr-lrp-public")), dnl +-action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.2; outport = inport; flags.loopback = 1; output;) ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) + table=3 (lr_in_ip_input ), priority=92 , dnl + match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.3 && is_chassis_resident("cr-lrp-public")), dnl +-action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.3; outport = inport; flags.loopback = 1; output;) ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) + table=3 (lr_in_ip_input ), priority=92 , dnl + match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.4 && is_chassis_resident("ls-vm")), dnl +-action=(eth.dst = eth.src; eth.src = 00:00:00:00:00:02; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = 00:00:00:00:00:02; arp.tpa = arp.spa; arp.spa = 43.43.43.4; outport = inport; flags.loopback = 1; output;) ++action=(eth.dst = eth.src; eth.src = 00:00:00:00:00:02; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = 00:00:00:00:00:02; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) + ]) + + # xreg0[0..47] isn't used anywhere else. +@@ -1632,13 +1720,13 @@ AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 + ovn-nbctl ls-lb-add sw0 lb1 + ovn-nbctl --wait=sb sync + AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl +- table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) ++ table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + ]) + + ovn-nbctl ls-lb-add sw0 lb2 + ovn-nbctl --wait=sb sync + AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl +- table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) ++ table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + ]) + + lb1_uuid=$(ovn-nbctl --bare --columns _uuid find load_balancer name=lb1) +@@ -1647,7 +1735,7 @@ lb2_uuid=$(ovn-nbctl --bare --columns _uuid find load_balancer name=lb2) + ovn-nbctl clear load_balancer $lb1_uuid vips + ovn-nbctl --wait=sb sync + AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl +- table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) ++ table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + ]) + + ovn-nbctl clear load_balancer $lb2_uuid vips +@@ -1660,14 +1748,14 @@ ovn-nbctl set load_balancer $lb2_uuid vips:"10.0.0.11"="10.0.0.4" + + ovn-nbctl --wait=sb sync + AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl +- table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) ++ table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + ]) + + # Now reverse the order of clearing the vip. + ovn-nbctl clear load_balancer $lb2_uuid vips + ovn-nbctl --wait=sb sync + AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl +- table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) ++ table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + ]) + + ovn-nbctl clear load_balancer $lb1_uuid vips +@@ -1704,7 +1792,7 @@ check ovn-nbctl pg-add pg0 sw0-p1 sw1-p1 check ovn-nbctl acl-add pg0 from-lport 1002 "inport == @pg0 && ip4 && tcp && tcp.dst == 80" reject check ovn-nbctl acl-add pg0 to-lport 1003 "outport == @pg0 && ip6 && udp" reject @@ -13985,7 +19577,7 @@ index 90ca0a4db..11d4a9c86 100644 AS_BOX([1]) -@@ -1713,28 +1752,12 @@ AT_CAPTURE_FILE([sw0flows]) +@@ -1713,28 +1801,12 @@ AT_CAPTURE_FILE([sw0flows]) ovn-sbctl dump-flows sw1 > sw1flows AT_CAPTURE_FILE([sw1flows]) @@ -14013,14 +19605,14 @@ index 90ca0a4db..11d4a9c86 100644 -action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=21); };) +AT_CHECK( + [grep -E 'ls_(in|out)_acl' sw0flows sw1flows | grep pg0 | sort], [0], [dnl -+sw0flows: table=5 (ls_out_acl ), priority=2003 , match=(outport == @pg0 && ip6 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=23); };) -+sw0flows: table=9 (ls_in_acl ), priority=2002 , match=(inport == @pg0 && ip4 && tcp && tcp.dst == 80), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=egress,table=6); };) -+sw1flows: table=5 (ls_out_acl ), priority=2003 , match=(outport == @pg0 && ip6 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=23); };) -+sw1flows: table=9 (ls_in_acl ), priority=2002 , match=(inport == @pg0 && ip4 && tcp && tcp.dst == 80), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=egress,table=6); };) ++sw0flows: table=4 (ls_out_acl ), priority=2003 , match=(outport == @pg0 && ip6 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=22); };) ++sw0flows: table=9 (ls_in_acl ), priority=2002 , match=(inport == @pg0 && ip4 && tcp && tcp.dst == 80), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=egress,table=5); };) ++sw1flows: table=4 (ls_out_acl ), priority=2003 , match=(outport == @pg0 && ip6 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=22); };) ++sw1flows: table=9 (ls_in_acl ), priority=2002 , match=(inport == @pg0 && ip4 && tcp && tcp.dst == 80), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=egress,table=5); };) ]) AS_BOX([2]) -@@ -1746,22 +1769,11 @@ AT_CAPTURE_FILE([sw0flows2]) +@@ -1746,22 +1818,11 @@ AT_CAPTURE_FILE([sw0flows2]) ovn-sbctl dump-flows sw1 > sw1flows2 AT_CAPTURE_FILE([sw1flows2]) @@ -14041,14 +19633,14 @@ index 90ca0a4db..11d4a9c86 100644 -match=(outport == @pg0 && ip6 && udp), dnl -action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=21); };) +AT_CHECK([grep "ls_out_acl" sw0flows2 sw1flows2 | grep pg0 | sort], [0], [dnl -+sw0flows2: table=5 (ls_out_acl ), priority=2002 , match=(outport == @pg0 && ip4 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=23); };) -+sw0flows2: table=5 (ls_out_acl ), priority=2003 , match=(outport == @pg0 && ip6 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=23); };) -+sw1flows2: table=5 (ls_out_acl ), priority=2002 , match=(outport == @pg0 && ip4 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=23); };) -+sw1flows2: table=5 (ls_out_acl ), priority=2003 , match=(outport == @pg0 && ip6 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=23); };) ++sw0flows2: table=4 (ls_out_acl ), priority=2002 , match=(outport == @pg0 && ip4 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=22); };) ++sw0flows2: table=4 (ls_out_acl ), priority=2003 , match=(outport == @pg0 && ip6 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=22); };) ++sw1flows2: table=4 (ls_out_acl ), priority=2002 , match=(outport == @pg0 && ip4 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=22); };) ++sw1flows2: table=4 (ls_out_acl ), priority=2003 , match=(outport == @pg0 && ip6 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=22); };) ]) AS_BOX([3]) -@@ -1773,42 +1785,19 @@ AT_CAPTURE_FILE([sw0flows3]) +@@ -1773,42 +1834,19 @@ AT_CAPTURE_FILE([sw0flows3]) ovn-sbctl dump-flows sw1 > sw1flows3 AT_CAPTURE_FILE([sw1flows3]) @@ -14089,22 +19681,22 @@ index 90ca0a4db..11d4a9c86 100644 -match=((reg0[[9]] == 1) && outport == @pg0 && ip6 && udp), dnl -action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=21); };) +AT_CHECK([grep "ls_out_acl" sw0flows3 sw1flows3 | grep pg0 | sort], [0], [dnl -+sw0flows3: table=5 (ls_out_acl ), priority=2001 , match=(reg0[[7]] == 1 && (outport == @pg0 && ip)), action=(reg0[[1]] = 1; next;) -+sw0flows3: table=5 (ls_out_acl ), priority=2001 , match=(reg0[[8]] == 1 && (outport == @pg0 && ip)), action=(next;) -+sw0flows3: table=5 (ls_out_acl ), priority=2002 , match=((reg0[[10]] == 1) && outport == @pg0 && ip4 && udp), action=(ct_commit { ct_label.blocked = 1; }; reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=23); };) -+sw0flows3: table=5 (ls_out_acl ), priority=2002 , match=((reg0[[9]] == 1) && outport == @pg0 && ip4 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=23); };) -+sw0flows3: table=5 (ls_out_acl ), priority=2003 , match=((reg0[[10]] == 1) && outport == @pg0 && ip6 && udp), action=(ct_commit { ct_label.blocked = 1; }; reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=23); };) -+sw0flows3: table=5 (ls_out_acl ), priority=2003 , match=((reg0[[9]] == 1) && outport == @pg0 && ip6 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=23); };) -+sw1flows3: table=5 (ls_out_acl ), priority=2001 , match=(reg0[[7]] == 1 && (outport == @pg0 && ip)), action=(reg0[[1]] = 1; next;) -+sw1flows3: table=5 (ls_out_acl ), priority=2001 , match=(reg0[[8]] == 1 && (outport == @pg0 && ip)), action=(next;) -+sw1flows3: table=5 (ls_out_acl ), priority=2002 , match=((reg0[[10]] == 1) && outport == @pg0 && ip4 && udp), action=(ct_commit { ct_label.blocked = 1; }; reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=23); };) -+sw1flows3: table=5 (ls_out_acl ), priority=2002 , match=((reg0[[9]] == 1) && outport == @pg0 && ip4 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=23); };) -+sw1flows3: table=5 (ls_out_acl ), priority=2003 , match=((reg0[[10]] == 1) && outport == @pg0 && ip6 && udp), action=(ct_commit { ct_label.blocked = 1; }; reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=23); };) -+sw1flows3: table=5 (ls_out_acl ), priority=2003 , match=((reg0[[9]] == 1) && outport == @pg0 && ip6 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=23); };) ++sw0flows3: table=4 (ls_out_acl ), priority=2001 , match=(reg0[[7]] == 1 && (outport == @pg0 && ip)), action=(reg0[[1]] = 1; next;) ++sw0flows3: table=4 (ls_out_acl ), priority=2001 , match=(reg0[[8]] == 1 && (outport == @pg0 && ip)), action=(next;) ++sw0flows3: table=4 (ls_out_acl ), priority=2002 , match=((reg0[[10]] == 1) && outport == @pg0 && ip4 && udp), action=(ct_commit { ct_label.blocked = 1; }; reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=22); };) ++sw0flows3: table=4 (ls_out_acl ), priority=2002 , match=((reg0[[9]] == 1) && outport == @pg0 && ip4 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=22); };) ++sw0flows3: table=4 (ls_out_acl ), priority=2003 , match=((reg0[[10]] == 1) && outport == @pg0 && ip6 && udp), action=(ct_commit { ct_label.blocked = 1; }; reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=22); };) ++sw0flows3: table=4 (ls_out_acl ), priority=2003 , match=((reg0[[9]] == 1) && outport == @pg0 && ip6 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=22); };) ++sw1flows3: table=4 (ls_out_acl ), priority=2001 , match=(reg0[[7]] == 1 && (outport == @pg0 && ip)), action=(reg0[[1]] = 1; next;) ++sw1flows3: table=4 (ls_out_acl ), priority=2001 , match=(reg0[[8]] == 1 && (outport == @pg0 && ip)), action=(next;) ++sw1flows3: table=4 (ls_out_acl ), priority=2002 , match=((reg0[[10]] == 1) && outport == @pg0 && ip4 && udp), action=(ct_commit { ct_label.blocked = 1; }; reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=22); };) ++sw1flows3: table=4 (ls_out_acl ), priority=2002 , match=((reg0[[9]] == 1) && outport == @pg0 && ip4 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=22); };) ++sw1flows3: table=4 (ls_out_acl ), priority=2003 , match=((reg0[[10]] == 1) && outport == @pg0 && ip6 && udp), action=(ct_commit { ct_label.blocked = 1; }; reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=22); };) ++sw1flows3: table=4 (ls_out_acl ), priority=2003 , match=((reg0[[9]] == 1) && outport == @pg0 && ip6 && udp), action=(reg0 = 0; reject { /* eth.dst <-> eth.src; ip.dst <-> ip.src; is implicit. */ outport <-> inport; next(pipeline=ingress,table=22); };) ]) AT_CLEANUP -@@ -1818,20 +1807,25 @@ AT_KEYWORDS([acl log meter fair]) +@@ -1818,20 +1856,25 @@ AT_KEYWORDS([acl log meter fair]) ovn_start check ovn-nbctl ls-add sw0 @@ -14131,7 +19723,7 @@ index 90ca0a4db..11d4a9c86 100644 check ovn-nbctl --wait=sb sync check_row_count nb:meter 1 -@@ -1840,8 +1834,9 @@ check_column meter_me nb:meter name +@@ -1840,8 +1883,9 @@ check_column meter_me nb:meter name check_acl_lflow() { acl_log_name=$1 meter_name=$2 @@ -14142,7 +19734,7 @@ index 90ca0a4db..11d4a9c86 100644 grep "\"${acl_log_name}\"" | \ grep -c "meter=\"${meter_name}\""], [0], [1 ]) -@@ -1857,59 +1852,144 @@ check_meter_by_name() { +@@ -1857,57 +1901,165 @@ check_meter_by_name() { # Make sure 'fair' value properly affects the Meters in SB check_meter_by_name meter_me @@ -14217,8 +19809,8 @@ index 90ca0a4db..11d4a9c86 100644 +check_meter_by_name meter_me meter_me__${acl3} check_meter_by_name NOT meter_me__${acl1} meter_me__${acl2} - AT_CLEANUP - ++AT_CLEANUP ++ +AT_SETUP([ovn -- ACL skip hints for stateless config]) +AT_KEYWORDS([acl]) +ovn_start @@ -14238,17 +19830,17 @@ index 90ca0a4db..11d4a9c86 100644 + -- acl-add ls from-lport 2 "udp" allow-related \ + -- acl-add ls to-lport 2 "udp" allow-related +AT_CHECK([ovn-sbctl lflow-list ls | grep -e ls_in_acl_hint -e ls_out_acl_hint -e ls_in_acl -e ls_out_acl | grep 'ct\.' | sort], [0], [dnl -+ table=4 (ls_out_acl_hint ), priority=1 , match=(ct.est && ct_label.blocked == 0), action=(reg0[[10]] = 1; next;) -+ table=4 (ls_out_acl_hint ), priority=2 , match=(ct.est && ct_label.blocked == 1), action=(reg0[[9]] = 1; next;) -+ table=4 (ls_out_acl_hint ), priority=3 , match=(!ct.est), action=(reg0[[9]] = 1; next;) -+ table=4 (ls_out_acl_hint ), priority=4 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 0), action=(reg0[[8]] = 1; reg0[[10]] = 1; next;) -+ table=4 (ls_out_acl_hint ), priority=5 , match=(!ct.trk), action=(reg0[[8]] = 1; reg0[[9]] = 1; next;) -+ table=4 (ls_out_acl_hint ), priority=6 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 1), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) -+ table=4 (ls_out_acl_hint ), priority=7 , match=(ct.new && !ct.est), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) -+ table=5 (ls_out_acl ), priority=1 , match=(ip && (!ct.est || (ct.est && ct_label.blocked == 1))), action=(reg0[[1]] = 1; next;) -+ table=5 (ls_out_acl ), priority=65535, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;) -+ table=5 (ls_out_acl ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;) -+ table=5 (ls_out_acl ), priority=65535, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) ++ table=3 (ls_out_acl_hint ), priority=1 , match=(ct.est && ct_label.blocked == 0), action=(reg0[[10]] = 1; next;) ++ table=3 (ls_out_acl_hint ), priority=2 , match=(ct.est && ct_label.blocked == 1), action=(reg0[[9]] = 1; next;) ++ table=3 (ls_out_acl_hint ), priority=3 , match=(!ct.est), action=(reg0[[9]] = 1; next;) ++ table=3 (ls_out_acl_hint ), priority=4 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 0), action=(reg0[[8]] = 1; reg0[[10]] = 1; next;) ++ table=3 (ls_out_acl_hint ), priority=5 , match=(!ct.trk), action=(reg0[[8]] = 1; reg0[[9]] = 1; next;) ++ table=3 (ls_out_acl_hint ), priority=6 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 1), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) ++ table=3 (ls_out_acl_hint ), priority=7 , match=(ct.new && !ct.est), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) ++ table=4 (ls_out_acl ), priority=1 , match=(ip && (!ct.est || (ct.est && ct_label.blocked == 1))), action=(reg0[[1]] = 1; next;) ++ table=4 (ls_out_acl ), priority=65532, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;) ++ table=4 (ls_out_acl ), priority=65532, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;) ++ table=4 (ls_out_acl ), priority=65532, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) + table=8 (ls_in_acl_hint ), priority=1 , match=(ct.est && ct_label.blocked == 0), action=(reg0[[10]] = 1; next;) + table=8 (ls_in_acl_hint ), priority=2 , match=(ct.est && ct_label.blocked == 1), action=(reg0[[9]] = 1; next;) + table=8 (ls_in_acl_hint ), priority=3 , match=(!ct.est), action=(reg0[[9]] = 1; next;) @@ -14257,9 +19849,9 @@ index 90ca0a4db..11d4a9c86 100644 + table=8 (ls_in_acl_hint ), priority=6 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 1), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) + table=8 (ls_in_acl_hint ), priority=7 , match=(ct.new && !ct.est), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) + table=9 (ls_in_acl ), priority=1 , match=(ip && (!ct.est || (ct.est && ct_label.blocked == 1))), action=(reg0[[1]] = 1; next;) -+ table=9 (ls_in_acl ), priority=65535, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;) -+ table=9 (ls_in_acl ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;) -+ table=9 (ls_in_acl ), priority=65535, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) ++ table=9 (ls_in_acl ), priority=65532, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;) ++ table=9 (ls_in_acl ), priority=65532, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;) ++ table=9 (ls_in_acl ), priority=65532, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) +]) + +AS_BOX([Check match ct_state with load balancer]) @@ -14269,18 +19861,25 @@ index 90ca0a4db..11d4a9c86 100644 + -- lb-add lb "10.0.0.1" "10.0.0.2" \ + -- ls-lb-add ls lb + -+AT_CHECK([ovn-sbctl lflow-list ls | grep -e ls_in_acl_hint -e ls_out_acl_hint -e ls_in_acl -e ls_out_acl | grep 'ct\.' | sort], [0], [dnl -+ table=4 (ls_out_acl_hint ), priority=1 , match=(ct.est && ct_label.blocked == 0), action=(reg0[[10]] = 1; next;) -+ table=4 (ls_out_acl_hint ), priority=2 , match=(ct.est && ct_label.blocked == 1), action=(reg0[[9]] = 1; next;) -+ table=4 (ls_out_acl_hint ), priority=3 , match=(!ct.est), action=(reg0[[9]] = 1; next;) -+ table=4 (ls_out_acl_hint ), priority=4 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 0), action=(reg0[[8]] = 1; reg0[[10]] = 1; next;) -+ table=4 (ls_out_acl_hint ), priority=5 , match=(!ct.trk), action=(reg0[[8]] = 1; reg0[[9]] = 1; next;) -+ table=4 (ls_out_acl_hint ), priority=6 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 1), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) -+ table=4 (ls_out_acl_hint ), priority=7 , match=(ct.new && !ct.est), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) -+ table=5 (ls_out_acl ), priority=1 , match=(ip && (!ct.est || (ct.est && ct_label.blocked == 1))), action=(reg0[[1]] = 1; next;) -+ table=5 (ls_out_acl ), priority=65535, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;) -+ table=5 (ls_out_acl ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;) -+ table=5 (ls_out_acl ), priority=65535, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) ++AT_CHECK([ovn-sbctl lflow-list ls | grep -e ls_in_acl_hint -e ls_out_acl_hint -e ls_in_acl -e ls_out_acl | sort], [0], [dnl ++ table=3 (ls_out_acl_hint ), priority=0 , match=(1), action=(next;) ++ table=3 (ls_out_acl_hint ), priority=1 , match=(ct.est && ct_label.blocked == 0), action=(reg0[[10]] = 1; next;) ++ table=3 (ls_out_acl_hint ), priority=2 , match=(ct.est && ct_label.blocked == 1), action=(reg0[[9]] = 1; next;) ++ table=3 (ls_out_acl_hint ), priority=3 , match=(!ct.est), action=(reg0[[9]] = 1; next;) ++ table=3 (ls_out_acl_hint ), priority=4 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 0), action=(reg0[[8]] = 1; reg0[[10]] = 1; next;) ++ table=3 (ls_out_acl_hint ), priority=5 , match=(!ct.trk), action=(reg0[[8]] = 1; reg0[[9]] = 1; next;) ++ table=3 (ls_out_acl_hint ), priority=6 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 1), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) ++ table=3 (ls_out_acl_hint ), priority=7 , match=(ct.new && !ct.est), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) ++ table=4 (ls_out_acl ), priority=0 , match=(1), action=(next;) ++ table=4 (ls_out_acl ), priority=1 , match=(ip && (!ct.est || (ct.est && ct_label.blocked == 1))), action=(reg0[[1]] = 1; next;) ++ table=4 (ls_out_acl ), priority=1001 , match=(reg0[[7]] == 1 && (ip)), action=(reg0[[1]] = 1; next;) ++ table=4 (ls_out_acl ), priority=1001 , match=(reg0[[8]] == 1 && (ip)), action=(next;) ++ table=4 (ls_out_acl ), priority=34000, match=(eth.src == $svc_monitor_mac), action=(next;) ++ table=4 (ls_out_acl ), priority=65532, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;) ++ table=4 (ls_out_acl ), priority=65532, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;) ++ table=4 (ls_out_acl ), priority=65532, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) ++ table=4 (ls_out_acl ), priority=65532, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;) ++ table=8 (ls_in_acl_hint ), priority=0 , match=(1), action=(next;) + table=8 (ls_in_acl_hint ), priority=1 , match=(ct.est && ct_label.blocked == 0), action=(reg0[[10]] = 1; next;) + table=8 (ls_in_acl_hint ), priority=2 , match=(ct.est && ct_label.blocked == 1), action=(reg0[[9]] = 1; next;) + table=8 (ls_in_acl_hint ), priority=3 , match=(!ct.est), action=(reg0[[9]] = 1; next;) @@ -14288,18 +19887,32 @@ index 90ca0a4db..11d4a9c86 100644 + table=8 (ls_in_acl_hint ), priority=5 , match=(!ct.trk), action=(reg0[[8]] = 1; reg0[[9]] = 1; next;) + table=8 (ls_in_acl_hint ), priority=6 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 1), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) + table=8 (ls_in_acl_hint ), priority=7 , match=(ct.new && !ct.est), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) ++ table=9 (ls_in_acl ), priority=0 , match=(1), action=(next;) + table=9 (ls_in_acl ), priority=1 , match=(ip && (!ct.est || (ct.est && ct_label.blocked == 1))), action=(reg0[[1]] = 1; next;) -+ table=9 (ls_in_acl ), priority=65535, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;) -+ table=9 (ls_in_acl ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;) -+ table=9 (ls_in_acl ), priority=65535, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) ++ table=9 (ls_in_acl ), priority=1001 , match=(reg0[[7]] == 1 && (ip)), action=(reg0[[1]] = 1; next;) ++ table=9 (ls_in_acl ), priority=1001 , match=(reg0[[8]] == 1 && (ip)), action=(next;) ++ table=9 (ls_in_acl ), priority=34000, match=(eth.dst == $svc_monitor_mac), action=(next;) ++ table=9 (ls_in_acl ), priority=65532, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;) ++ table=9 (ls_in_acl ), priority=65532, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;) ++ table=9 (ls_in_acl ), priority=65532, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) ++ table=9 (ls_in_acl ), priority=65532, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;) ++]) ++ ++ovn-nbctl --wait=sb clear logical_switch ls acls ++ovn-nbctl --wait=sb clear logical_switch ls load_balancer ++ ++AT_CHECK([ovn-sbctl lflow-list ls | grep -e ls_in_acl_hint -e ls_out_acl_hint -e ls_in_acl -e ls_out_acl | sort], [0], [dnl ++ table=3 (ls_out_acl_hint ), priority=65535, match=(1), action=(next;) ++ table=4 (ls_out_acl ), priority=65535, match=(1), action=(next;) ++ table=8 (ls_in_acl_hint ), priority=65535, match=(1), action=(next;) ++ table=9 (ls_in_acl ), priority=65535, match=(1), action=(next;) +]) + -+AT_CLEANUP + + AT_CLEANUP + AT_SETUP([datapath requested-tnl-key]) - AT_KEYWORDS([requested tnl tunnel key keys]) - ovn_start -@@ -2092,6 +2172,12 @@ echo +@@ -2092,6 +2244,12 @@ echo echo "__file__:__line__: check that datapath sw1 has lb0 and lb1 set in the load_balancers column." check_column "$lb0_uuid $lb1_uuid" sb:datapath_binding load_balancers external_ids:name=sw1 @@ -14312,7 +19925,7 @@ index 90ca0a4db..11d4a9c86 100644 echo echo "__file__:__line__: Delete load balancer lb1 an check that datapath sw1's load_balancers are updated accordingly." -@@ -2100,6 +2186,35 @@ check_column "$lb0_uuid" sb:datapath_binding load_balancers external_ids:name=sw +@@ -2100,6 +2258,35 @@ check_column "$lb0_uuid" sb:datapath_binding load_balancers external_ids:name=sw AT_CLEANUP @@ -14327,20 +19940,20 @@ index 90ca0a4db..11d4a9c86 100644 +check ovn-nbctl --wait=sb sync + +AT_CHECK([ovn-sbctl lflow-list sw0 | grep ls_in_pre_hairpin | sort], [0], [dnl -+ table=14(ls_in_pre_hairpin ), priority=0 , match=(1), action=(next;) -+ table=14(ls_in_pre_hairpin ), priority=100 , match=(ip && ct.trk), action=(reg0[[6]] = chk_lb_hairpin(); reg0[[12]] = chk_lb_hairpin_reply(); next;) ++ table=13(ls_in_pre_hairpin ), priority=0 , match=(1), action=(next;) ++ table=13(ls_in_pre_hairpin ), priority=100 , match=(ip && ct.trk), action=(reg0[[6]] = chk_lb_hairpin(); reg0[[12]] = chk_lb_hairpin_reply(); next;) +]) + +AT_CHECK([ovn-sbctl lflow-list sw0 | grep ls_in_nat_hairpin | sort], [0], [dnl -+ table=15(ls_in_nat_hairpin ), priority=0 , match=(1), action=(next;) -+ table=15(ls_in_nat_hairpin ), priority=100 , match=(ip && ct.est && ct.trk && reg0[[6]] == 1), action=(ct_snat;) -+ table=15(ls_in_nat_hairpin ), priority=100 , match=(ip && ct.new && ct.trk && reg0[[6]] == 1), action=(ct_snat_to_vip; next;) -+ table=15(ls_in_nat_hairpin ), priority=90 , match=(ip && reg0[[12]] == 1), action=(ct_snat;) ++ table=14(ls_in_nat_hairpin ), priority=0 , match=(1), action=(next;) ++ table=14(ls_in_nat_hairpin ), priority=100 , match=(ip && ct.est && ct.trk && reg0[[6]] == 1), action=(ct_snat;) ++ table=14(ls_in_nat_hairpin ), priority=100 , match=(ip && ct.new && ct.trk && reg0[[6]] == 1), action=(ct_snat_to_vip; next;) ++ table=14(ls_in_nat_hairpin ), priority=90 , match=(ip && reg0[[12]] == 1), action=(ct_snat;) +]) + +AT_CHECK([ovn-sbctl lflow-list sw0 | grep ls_in_hairpin | sort], [0], [dnl -+ table=16(ls_in_hairpin ), priority=0 , match=(1), action=(next;) -+ table=16(ls_in_hairpin ), priority=1 , match=((reg0[[6]] == 1 || reg0[[12]] == 1)), action=(eth.dst <-> eth.src; outport = inport; flags.loopback = 1; output;) ++ table=15(ls_in_hairpin ), priority=0 , match=(1), action=(next;) ++ table=15(ls_in_hairpin ), priority=1 , match=((reg0[[6]] == 1 || reg0[[12]] == 1)), action=(eth.dst <-> eth.src; outport = inport; flags.loopback = 1; output;) +]) + +AT_CLEANUP @@ -14348,7 +19961,7 @@ index 90ca0a4db..11d4a9c86 100644 AT_SETUP([ovn -- logical gatapath groups]) AT_KEYWORDS([use_logical_dp_groups]) ovn_start -@@ -2173,3 +2288,498 @@ dnl Number of common flows should be the same. +@@ -2173,3 +2360,745 @@ dnl Number of common flows should be the same. check_row_count Logical_Flow ${n_flows_common} logical_dp_group=${dp_group_uuid} AT_CLEANUP @@ -14388,6 +20001,13 @@ index 90ca0a4db..11d4a9c86 100644 + +check ovn-nbctl --wait=sb lr-policy-add lr0 10 "ip4.src == 10.0.0.3" reroute 172.168.0.101,172.168.0.102 + ++ovn-nbctl lr-policy-list lr0 > policy-list ++AT_CAPTURE_FILE([policy-list]) ++AT_CHECK([cat policy-list], [0], [dnl ++Routing Policies ++ 10 ip4.src == 10.0.0.3 reroute 172.168.0.101, 172.168.0.102 ++]) ++ +ovn-sbctl dump-flows lr0 > lr0flows3 +AT_CAPTURE_FILE([lr0flows3]) + @@ -14615,7 +20235,7 @@ index 90ca0a4db..11d4a9c86 100644 + +AT_CLEANUP + -+AT_SETUP([ovn -- lb_force_snat_ip for Gateway Routers]) ++AT_SETUP([ovn -- Load Balancers and lb_force_snat_ip for Gateway Routers]) +ovn_start + +check ovn-nbctl ls-add sw0 @@ -14653,11 +20273,11 @@ index 90ca0a4db..11d4a9c86 100644 + table=5 (lr_in_unsnat ), priority=0 , match=(1), action=(next;) +]) + -+AT_CHECK([grep "lr_in_dnat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl -+]) -+ -+ -+AT_CHECK([grep "lr_out_snat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl ++AT_CHECK([grep "lr_in_dnat" lr0flows | sort], [0], [dnl ++ table=6 (lr_in_dnat ), priority=0 , match=(1), action=(next;) ++ table=6 (lr_in_dnat ), priority=120 , match=(ct.est && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80), action=(ct_dnat;) ++ table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80), action=(ct_lb(backends=10.0.0.4:8080);) ++ table=6 (lr_in_dnat ), priority=50 , match=(ip), action=(flags.loopback = 1; ct_dnat;) +]) + +check ovn-nbctl --wait=sb set logical_router lr0 options:lb_force_snat_ip="20.0.0.4 aef0::4" @@ -14672,14 +20292,18 @@ index 90ca0a4db..11d4a9c86 100644 + table=5 (lr_in_unsnat ), priority=110 , match=(ip6 && ip6.dst == aef0::4), action=(ct_snat;) +]) + -+AT_CHECK([grep "lr_in_dnat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl ++AT_CHECK([grep "lr_in_dnat" lr0flows | sort], [0], [dnl ++ table=6 (lr_in_dnat ), priority=0 , match=(1), action=(next;) + table=6 (lr_in_dnat ), priority=120 , match=(ct.est && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80), action=(flags.force_snat_for_lb = 1; ct_dnat;) + table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80), action=(flags.force_snat_for_lb = 1; ct_lb(backends=10.0.0.4:8080);) ++ table=6 (lr_in_dnat ), priority=50 , match=(ip), action=(flags.loopback = 1; ct_dnat;) +]) + -+AT_CHECK([grep "lr_out_snat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl ++AT_CHECK([grep "lr_out_snat" lr0flows | sort], [0], [dnl ++ table=1 (lr_out_snat ), priority=0 , match=(1), action=(next;) + table=1 (lr_out_snat ), priority=100 , match=(flags.force_snat_for_lb == 1 && ip4), action=(ct_snat(20.0.0.4);) + table=1 (lr_out_snat ), priority=100 , match=(flags.force_snat_for_lb == 1 && ip6), action=(ct_snat(aef0::4);) ++ table=1 (lr_out_snat ), priority=120 , match=(nd_ns), action=(next;) +]) + +check ovn-nbctl --wait=sb set logical_router lr0 options:lb_force_snat_ip="router_ip" @@ -14697,15 +20321,19 @@ index 90ca0a4db..11d4a9c86 100644 + table=5 (lr_in_unsnat ), priority=110 , match=(inport == "lr0-sw1" && ip4.dst == 20.0.0.1), action=(ct_snat;) +]) + -+AT_CHECK([grep "lr_in_dnat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl ++AT_CHECK([grep "lr_in_dnat" lr0flows | sort], [0], [dnl ++ table=6 (lr_in_dnat ), priority=0 , match=(1), action=(next;) + table=6 (lr_in_dnat ), priority=120 , match=(ct.est && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80), action=(flags.force_snat_for_lb = 1; ct_dnat;) + table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80), action=(flags.force_snat_for_lb = 1; ct_lb(backends=10.0.0.4:8080);) ++ table=6 (lr_in_dnat ), priority=50 , match=(ip), action=(flags.loopback = 1; ct_dnat;) +]) + -+AT_CHECK([grep "lr_out_snat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl ++AT_CHECK([grep "lr_out_snat" lr0flows | sort], [0], [dnl ++ table=1 (lr_out_snat ), priority=0 , match=(1), action=(next;) + table=1 (lr_out_snat ), priority=110 , match=(flags.force_snat_for_lb == 1 && ip4 && outport == "lr0-public"), action=(ct_snat(172.168.0.100);) + table=1 (lr_out_snat ), priority=110 , match=(flags.force_snat_for_lb == 1 && ip4 && outport == "lr0-sw0"), action=(ct_snat(10.0.0.1);) + table=1 (lr_out_snat ), priority=110 , match=(flags.force_snat_for_lb == 1 && ip4 && outport == "lr0-sw1"), action=(ct_snat(20.0.0.1);) ++ table=1 (lr_out_snat ), priority=120 , match=(nd_ns), action=(next;) +]) + +check ovn-nbctl --wait=sb remove logical_router lr0 options chassis @@ -14717,7 +20345,9 @@ index 90ca0a4db..11d4a9c86 100644 + table=5 (lr_in_unsnat ), priority=0 , match=(1), action=(next;) +]) + -+AT_CHECK([grep "lr_out_snat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl ++AT_CHECK([grep "lr_out_snat" lr0flows | sort], [0], [dnl ++ table=1 (lr_out_snat ), priority=0 , match=(1), action=(next;) ++ table=1 (lr_out_snat ), priority=120 , match=(nd_ns), action=(next;) +]) + +check ovn-nbctl set logical_router lr0 options:chassis=ch1 @@ -14734,16 +20364,43 @@ index 90ca0a4db..11d4a9c86 100644 + table=5 (lr_in_unsnat ), priority=110 , match=(inport == "lr0-sw1" && ip6.dst == bef0::1), action=(ct_snat;) +]) + -+AT_CHECK([grep "lr_in_dnat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl ++AT_CHECK([grep "lr_in_dnat" lr0flows | sort], [0], [dnl ++ table=6 (lr_in_dnat ), priority=0 , match=(1), action=(next;) + table=6 (lr_in_dnat ), priority=120 , match=(ct.est && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80), action=(flags.force_snat_for_lb = 1; ct_dnat;) + table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80), action=(flags.force_snat_for_lb = 1; ct_lb(backends=10.0.0.4:8080);) ++ table=6 (lr_in_dnat ), priority=50 , match=(ip), action=(flags.loopback = 1; ct_dnat;) +]) + -+AT_CHECK([grep "lr_out_snat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl ++AT_CHECK([grep "lr_out_snat" lr0flows | sort], [0], [dnl ++ table=1 (lr_out_snat ), priority=0 , match=(1), action=(next;) + table=1 (lr_out_snat ), priority=110 , match=(flags.force_snat_for_lb == 1 && ip4 && outport == "lr0-public"), action=(ct_snat(172.168.0.100);) + table=1 (lr_out_snat ), priority=110 , match=(flags.force_snat_for_lb == 1 && ip4 && outport == "lr0-sw0"), action=(ct_snat(10.0.0.1);) + table=1 (lr_out_snat ), priority=110 , match=(flags.force_snat_for_lb == 1 && ip4 && outport == "lr0-sw1"), action=(ct_snat(20.0.0.1);) + table=1 (lr_out_snat ), priority=110 , match=(flags.force_snat_for_lb == 1 && ip6 && outport == "lr0-sw1"), action=(ct_snat(bef0::1);) ++ table=1 (lr_out_snat ), priority=120 , match=(nd_ns), action=(next;) ++]) ++ ++check ovn-nbctl --wait=sb lb-add lb2 10.0.0.20:80 10.0.0.40:8080 ++check ovn-nbctl --wait=sb set load_balancer lb2 options:skip_snat=true ++check ovn-nbctl lr-lb-add lr0 lb2 ++check ovn-nbctl --wait=sb lb-del lb1 ++ovn-sbctl dump-flows lr0 > lr0flows ++ ++AT_CHECK([grep "lr_in_unsnat" lr0flows | sort], [0], [dnl ++ table=5 (lr_in_unsnat ), priority=0 , match=(1), action=(next;) ++ table=5 (lr_in_unsnat ), priority=110 , match=(inport == "lr0-public" && ip4.dst == 172.168.0.100), action=(ct_snat;) ++ table=5 (lr_in_unsnat ), priority=110 , match=(inport == "lr0-sw0" && ip4.dst == 10.0.0.1), action=(ct_snat;) ++ table=5 (lr_in_unsnat ), priority=110 , match=(inport == "lr0-sw1" && ip4.dst == 20.0.0.1), action=(ct_snat;) ++ table=5 (lr_in_unsnat ), priority=110 , match=(inport == "lr0-sw1" && ip6.dst == bef0::1), action=(ct_snat;) ++]) ++ ++AT_CHECK([grep "lr_in_dnat" lr0flows | grep skip_snat_for_lb | sort], [0], [dnl ++ table=6 (lr_in_dnat ), priority=120 , match=(ct.est && ip && ip4.dst == 10.0.0.20 && tcp && tcp.dst == 80), action=(flags.skip_snat_for_lb = 1; ct_dnat;) ++ table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 10.0.0.20 && tcp && tcp.dst == 80), action=(flags.skip_snat_for_lb = 1; ct_lb(backends=10.0.0.40:8080);) ++]) ++ ++AT_CHECK([grep "lr_out_snat" lr0flows | grep skip_snat_for_lb | sort], [0], [dnl ++ table=1 (lr_out_snat ), priority=120 , match=(flags.skip_snat_for_lb == 1 && ip), action=(next;) +]) + +AT_CLEANUP @@ -14847,6 +20504,209 @@ index 90ca0a4db..11d4a9c86 100644 +check_column "" Port_Binding chassis logical_port=sw0-p1 + +AT_CLEANUP ++ ++AT_SETUP([ovn -- LS load balancer logical flows]) ++ovn_start ++ ++check ovn-nbctl \ ++ -- ls-add sw0 \ ++ -- lb-add lb0 10.0.0.10:80 10.0.0.4:8080 \ ++ -- ls-lb-add sw0 lb0 ++ ++check ovn-nbctl lr-add lr0 ++check ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 ++check ovn-nbctl lsp-add sw0 sw0-lr0 ++check ovn-nbctl lsp-set-type sw0-lr0 router ++check ovn-nbctl lsp-set-addresses sw0-lr0 00:00:00:00:ff:01 ++check ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 ++ ++check ovn-nbctl --wait=sb sync ++ ++check_stateful_flows() { ++ ovn-sbctl dump-flows sw0 > sw0flows ++ AT_CAPTURE_FILE([sw0flows]) ++ ++ AT_CHECK([grep "ls_in_pre_lb" sw0flows | sort], [0], [dnl ++ table=6 (ls_in_pre_lb ), priority=0 , match=(1), action=(next;) ++ table=6 (ls_in_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ++ table=6 (ls_in_pre_lb ), priority=110 , match=(eth.dst == $svc_monitor_mac), action=(next;) ++ table=6 (ls_in_pre_lb ), priority=110 , match=(ip && inport == "sw0-lr0"), action=(next;) ++ table=6 (ls_in_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) ++]) ++ ++ AT_CHECK([grep "ls_in_pre_stateful" sw0flows | sort], [0], [dnl ++ table=7 (ls_in_pre_stateful ), priority=0 , match=(1), action=(next;) ++ table=7 (ls_in_pre_stateful ), priority=100 , match=(reg0[[0]] == 1), action=(ct_next;) ++ table=7 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb;) ++ table=7 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4 && sctp), action=(reg1 = ip4.dst; reg2[[0..15]] = sctp.dst; ct_lb;) ++ table=7 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4 && tcp), action=(reg1 = ip4.dst; reg2[[0..15]] = tcp.dst; ct_lb;) ++ table=7 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4 && udp), action=(reg1 = ip4.dst; reg2[[0..15]] = udp.dst; ct_lb;) ++ table=7 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip6 && sctp), action=(xxreg1 = ip6.dst; reg2[[0..15]] = sctp.dst; ct_lb;) ++ table=7 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip6 && tcp), action=(xxreg1 = ip6.dst; reg2[[0..15]] = tcp.dst; ct_lb;) ++ table=7 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip6 && udp), action=(xxreg1 = ip6.dst; reg2[[0..15]] = udp.dst; ct_lb;) ++]) ++ ++ AT_CHECK([grep "ls_in_stateful" sw0flows | sort], [0], [dnl ++ table=12(ls_in_stateful ), priority=0 , match=(1), action=(next;) ++ table=12(ls_in_stateful ), priority=100 , match=(reg0[[1]] == 1), action=(ct_commit { ct_label.blocked = 0; }; next;) ++ table=12(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb(backends=10.0.0.4:8080);) ++]) ++ ++ AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl ++ table=0 (ls_out_pre_lb ), priority=0 , match=(1), action=(next;) ++ table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ++ table=0 (ls_out_pre_lb ), priority=110 , match=(eth.src == $svc_monitor_mac), action=(next;) ++ table=0 (ls_out_pre_lb ), priority=110 , match=(ip && outport == "sw0-lr0"), action=(next;) ++ table=0 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) ++]) ++ ++ AT_CHECK([grep "ls_out_pre_stateful" sw0flows | sort], [0], [dnl ++ table=2 (ls_out_pre_stateful), priority=0 , match=(1), action=(next;) ++ table=2 (ls_out_pre_stateful), priority=100 , match=(reg0[[0]] == 1), action=(ct_next;) ++ table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb;) ++]) ++ ++ AT_CHECK([grep "ls_out_lb" sw0flows | sort], [0], []) ++ ++ AT_CHECK([grep "ls_out_stateful" sw0flows | sort], [0], [dnl ++ table=7 (ls_out_stateful ), priority=0 , match=(1), action=(next;) ++ table=7 (ls_out_stateful ), priority=100 , match=(reg0[[1]] == 1), action=(ct_commit { ct_label.blocked = 0; }; next;) ++]) ++} ++ ++check_stateful_flows ++ ++# Add few ACLs ++check ovn-nbctl --wait=sb acl-add sw0 from-lport 1002 "ip4 && tcp && tcp.dst == 80" allow-related ++check ovn-nbctl --wait=sb acl-add sw0 to-lport 1002 "ip4 && tcp && tcp.src == 80" drop ++ ++check_stateful_flows ++ ++# Remove load balancer from sw0 ++check ovn-nbctl --wait=sb ls-lb-del sw0 lb0 ++ ++ovn-sbctl dump-flows sw0 > sw0flows ++AT_CAPTURE_FILE([sw0flows]) ++ ++AT_CHECK([grep "ls_in_pre_lb" sw0flows | sort], [0], [dnl ++ table=6 (ls_in_pre_lb ), priority=0 , match=(1), action=(next;) ++ table=6 (ls_in_pre_lb ), priority=110 , match=(eth.dst == $svc_monitor_mac), action=(next;) ++ table=6 (ls_in_pre_lb ), priority=110 , match=(ip && inport == "sw0-lr0"), action=(next;) ++ table=6 (ls_in_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) ++]) ++ ++AT_CHECK([grep "ls_in_pre_stateful" sw0flows | sort], [0], [dnl ++ table=7 (ls_in_pre_stateful ), priority=0 , match=(1), action=(next;) ++ table=7 (ls_in_pre_stateful ), priority=100 , match=(reg0[[0]] == 1), action=(ct_next;) ++ table=7 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb;) ++ table=7 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4 && sctp), action=(reg1 = ip4.dst; reg2[[0..15]] = sctp.dst; ct_lb;) ++ table=7 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4 && tcp), action=(reg1 = ip4.dst; reg2[[0..15]] = tcp.dst; ct_lb;) ++ table=7 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4 && udp), action=(reg1 = ip4.dst; reg2[[0..15]] = udp.dst; ct_lb;) ++ table=7 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip6 && sctp), action=(xxreg1 = ip6.dst; reg2[[0..15]] = sctp.dst; ct_lb;) ++ table=7 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip6 && tcp), action=(xxreg1 = ip6.dst; reg2[[0..15]] = tcp.dst; ct_lb;) ++ table=7 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip6 && udp), action=(xxreg1 = ip6.dst; reg2[[0..15]] = udp.dst; ct_lb;) ++]) ++ ++AT_CHECK([grep "ls_in_stateful" sw0flows | sort], [0], [dnl ++ table=12(ls_in_stateful ), priority=0 , match=(1), action=(next;) ++ table=12(ls_in_stateful ), priority=100 , match=(reg0[[1]] == 1), action=(ct_commit { ct_label.blocked = 0; }; next;) ++]) ++ ++AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl ++ table=0 (ls_out_pre_lb ), priority=0 , match=(1), action=(next;) ++ table=0 (ls_out_pre_lb ), priority=110 , match=(eth.src == $svc_monitor_mac), action=(next;) ++ table=0 (ls_out_pre_lb ), priority=110 , match=(ip && outport == "sw0-lr0"), action=(next;) ++ table=0 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) ++]) ++ ++AT_CHECK([grep "ls_out_pre_stateful" sw0flows | sort], [0], [dnl ++ table=2 (ls_out_pre_stateful), priority=0 , match=(1), action=(next;) ++ table=2 (ls_out_pre_stateful), priority=100 , match=(reg0[[0]] == 1), action=(ct_next;) ++ table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb;) ++]) ++ ++AT_CHECK([grep "ls_out_stateful" sw0flows | sort], [0], [dnl ++ table=7 (ls_out_stateful ), priority=0 , match=(1), action=(next;) ++ table=7 (ls_out_stateful ), priority=100 , match=(reg0[[1]] == 1), action=(ct_commit { ct_label.blocked = 0; }; next;) ++]) ++ ++AT_CLEANUP ++]) ++ ++AT_SETUP([ovn -- ct.inv usage]) ++ovn_start ++ ++check ovn-nbctl ls-add sw0 ++check ovn-nbctl lsp-add sw0 sw0p1 ++ ++check ovn-nbctl --wait=sb acl-add sw0 to-lport 1002 ip allow-related ++ ++ovn-sbctl dump-flows sw0 > sw0flows ++AT_CAPTURE_FILE([sw0flows]) ++ ++AT_CHECK([grep -w "ls_in_acl" sw0flows | grep 6553 | sort], [0], [dnl ++ table=9 (ls_in_acl ), priority=65532, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;) ++ table=9 (ls_in_acl ), priority=65532, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;) ++ table=9 (ls_in_acl ), priority=65532, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) ++ table=9 (ls_in_acl ), priority=65532, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;) ++]) ++ ++AT_CHECK([grep -w "ls_out_acl" sw0flows | grep 6553 | sort], [0], [dnl ++ table=4 (ls_out_acl ), priority=65532, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;) ++ table=4 (ls_out_acl ), priority=65532, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;) ++ table=4 (ls_out_acl ), priority=65532, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) ++ table=4 (ls_out_acl ), priority=65532, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;) ++]) ++ ++# Disable ct.inv usage. ++check ovn-nbctl --wait=sb set NB_Global . options:use_ct_inv_match=false ++ ++ovn-sbctl dump-flows sw0 > sw0flows ++AT_CAPTURE_FILE([sw0flows]) ++ ++AT_CHECK([grep -w "ls_in_acl" sw0flows | grep 6553 | sort], [0], [dnl ++ table=9 (ls_in_acl ), priority=65532, match=(!ct.est && ct.rel && !ct.new && ct_label.blocked == 0), action=(next;) ++ table=9 (ls_in_acl ), priority=65532, match=((ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) ++ table=9 (ls_in_acl ), priority=65532, match=(ct.est && !ct.rel && !ct.new && ct.rpl && ct_label.blocked == 0), action=(next;) ++ table=9 (ls_in_acl ), priority=65532, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;) ++]) ++ ++AT_CHECK([grep -w "ls_out_acl" sw0flows | grep 6553 | sort], [0], [dnl ++ table=4 (ls_out_acl ), priority=65532, match=(!ct.est && ct.rel && !ct.new && ct_label.blocked == 0), action=(next;) ++ table=4 (ls_out_acl ), priority=65532, match=((ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) ++ table=4 (ls_out_acl ), priority=65532, match=(ct.est && !ct.rel && !ct.new && ct.rpl && ct_label.blocked == 0), action=(next;) ++ table=4 (ls_out_acl ), priority=65532, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;) ++]) ++ ++AT_CHECK([grep -c "ct.inv" sw0flows], [1], [dnl ++0 ++]) ++ ++# Enable ct.inv usage. ++check ovn-nbctl --wait=sb set NB_Global . options:use_ct_inv_match=true ++ ++ovn-sbctl dump-flows sw0 > sw0flows ++AT_CAPTURE_FILE([sw0flows]) ++ ++AT_CHECK([grep -w "ls_in_acl" sw0flows | grep 6553 | sort], [0], [dnl ++ table=9 (ls_in_acl ), priority=65532, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;) ++ table=9 (ls_in_acl ), priority=65532, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;) ++ table=9 (ls_in_acl ), priority=65532, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) ++ table=9 (ls_in_acl ), priority=65532, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;) ++]) ++ ++AT_CHECK([grep -w "ls_out_acl" sw0flows | grep 6553 | sort], [0], [dnl ++ table=4 (ls_out_acl ), priority=65532, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;) ++ table=4 (ls_out_acl ), priority=65532, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;) ++ table=4 (ls_out_acl ), priority=65532, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) ++ table=4 (ls_out_acl ), priority=65532, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;) ++]) ++ ++AT_CHECK([grep -c "ct.inv" sw0flows], [0], [dnl ++6 ++]) ++ ++AT_CLEANUP diff --git a/tests/ovn-ofctrl-seqno.at b/tests/ovn-ofctrl-seqno.at new file mode 100644 index 000000000..59dfea947 @@ -15239,10 +21099,22 @@ index 6cc5b2174..e510c6cef 100644 for i in 1 2; do diff --git a/tests/ovn.at b/tests/ovn.at -index 2e0bc9c53..bd59c0a77 100644 +index 2e0bc9c53..0d3a1d1cb 100644 --- a/tests/ovn.at +++ b/tests/ovn.at -@@ -1637,6 +1637,17 @@ tcp_reset { }; +@@ -693,6 +693,11 @@ ip,nw_src=4.0.0.0/4.0.0.0 + ip,nw_src=64.0.0.0/64.0.0.0 + ip,nw_src=8.0.0.0/8.0.0.0 + ]) ++AT_CHECK([expr_to_flow 'ip4.dst == 172.27.0.65 && ip4.src == $set1 && ip4.dst != 10.128.0.0/14'], [0], [dnl ++ip,nw_src=10.0.0.1,nw_dst=172.27.0.65 ++ip,nw_src=10.0.0.2,nw_dst=172.27.0.65 ++ip,nw_src=10.0.0.3,nw_dst=172.27.0.65 ++]) + AT_CLEANUP + + AT_SETUP([ovn -- converting expressions to flows -- port groups]) +@@ -1637,6 +1642,17 @@ tcp_reset { }; encodes as controller(userdata=00.00.00.0b.00.00.00.00) has prereqs tcp @@ -15260,7 +21132,7 @@ index 2e0bc9c53..bd59c0a77 100644 # reject reject { eth.dst = ff:ff:ff:ff:ff:ff; output; }; output; encodes as controller(userdata=00.00.00.16.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64) -@@ -1807,6 +1818,68 @@ ct_snat_to_vip; +@@ -1807,6 +1823,68 @@ ct_snat_to_vip; ct_snat_to_vip(foo); Syntax error at `(' expecting `;'. @@ -15329,7 +21201,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Miscellaneous negative tests. ; Syntax error at `;'. -@@ -1837,53 +1910,46 @@ ovn_start +@@ -1837,53 +1915,46 @@ ovn_start # Turn on port security on all the vifs except vif[123]1. # Make vif13, vif2[23], vif3[123] destinations for unknown MACs. # Add some ACLs for Ethertypes 1234, 1235, 1236. @@ -15397,7 +21269,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Make sure there is no attempt to adding duplicated flows by ovn-controller AT_FAIL_IF([test -n "`grep duplicate hv1/ovn-controller.log`"]) AT_FAIL_IF([test -n "`grep duplicate hv2/ovn-controller.log`"]) -@@ -2078,11 +2144,7 @@ done +@@ -2078,11 +2149,7 @@ done # set address for lp13 with invalid characters. # lp13 should be configured with only 192.168.0.13. @@ -15410,7 +21282,7 @@ index 2e0bc9c53..bd59c0a77 100644 sip=`ip_to_hex 192 168 0 11` tip=`ip_to_hex 192 168 0 13` -@@ -2155,7 +2217,11 @@ for i in 1 2; do +@@ -2155,7 +2222,11 @@ for i in 1 2; do done done @@ -15423,7 +21295,7 @@ index 2e0bc9c53..bd59c0a77 100644 # dump port bindings; since we have vxlan and geneve tunnels, we expect the # ports to be bound to geneve tunnels. -@@ -2175,9 +2241,8 @@ check_row_count Port_Binding 1 logical_port=lp22 encap=$encap_rec +@@ -2175,9 +2246,8 @@ check_row_count Port_Binding 1 logical_port=lp22 encap=$encap_rec # for ARP resolution). OVN_POPULATE_ARP @@ -15435,7 +21307,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Make sure there is no attempt to adding duplicated flows by ovn-controller AT_FAIL_IF([test -n "`grep duplicate hv1/ovn-controller.log`"]) -@@ -2567,6 +2632,7 @@ for i in 1 2; do +@@ -2567,6 +2637,7 @@ for i in 1 2; do OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up $lsp_name` = xup]) done done @@ -15443,7 +21315,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=sb sync ovn-sbctl dump-flows > sbflows AT_CAPTURE_FILE([sbflows]) -@@ -2733,6 +2799,7 @@ for hv in 1 2; do +@@ -2733,6 +2804,7 @@ for hv in 1 2; do done @@ -15451,7 +21323,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=sb sync ovn-sbctl dump-flows > sbflows -@@ -2866,6 +2933,7 @@ for hv in 1 2; do +@@ -2866,6 +2938,7 @@ for hv in 1 2; do done @@ -15459,7 +21331,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=sb sync ovn-nbctl show ovn-sbctl dump-flows > sbflows -@@ -3003,6 +3071,7 @@ for i in 1 2; do +@@ -3003,6 +3076,7 @@ for i in 1 2; do OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up $lsp_name` = xup]) done @@ -15467,7 +21339,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=sb sync ovn-nbctl show ovn-sbctl dump-flows > sbflows -@@ -3213,6 +3282,7 @@ for tag in 10 20; do +@@ -3213,6 +3287,7 @@ for tag in 10 20; do OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up $lsp_name` = xup]) done done @@ -15475,7 +21347,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=sb sync ovn-sbctl dump-flows -@@ -3371,9 +3441,8 @@ ovs-vsctl add-port br-phys vif3 -- set Interface vif3 options:tx_pcap=hv3/vif3-t +@@ -3371,9 +3446,8 @@ ovs-vsctl add-port br-phys vif3 -- set Interface vif3 options:tx_pcap=hv3/vif3-t # for ARP resolution). OVN_POPULATE_ARP @@ -15487,7 +21359,7 @@ index 2e0bc9c53..bd59c0a77 100644 # test_packet INPORT DST SRC ETHTYPE OUTPORT... # -@@ -3537,9 +3606,8 @@ ovs-vsctl add-port br-phys vif3 -- set Interface vif3 options:tx_pcap=hv3/vif3-t +@@ -3537,9 +3611,8 @@ ovs-vsctl add-port br-phys vif3 -- set Interface vif3 options:tx_pcap=hv3/vif3-t # for ARP resolution). OVN_POPULATE_ARP @@ -15499,7 +21371,7 @@ index 2e0bc9c53..bd59c0a77 100644 # test_packet INPORT DST SRC ETHTYPE OUTPORT... # -@@ -3728,6 +3796,7 @@ for i in 1 2 3; do +@@ -3728,6 +3801,7 @@ for i in 1 2 3; do done done @@ -15507,7 +21379,7 @@ index 2e0bc9c53..bd59c0a77 100644 check ovn-nbctl --wait=hv sync # Pre-populate the hypervisors' ARP tables so that we don't lose any -@@ -3735,9 +3804,6 @@ check ovn-nbctl --wait=hv sync +@@ -3735,9 +3809,6 @@ check ovn-nbctl --wait=hv sync # for ARP resolution). OVN_POPULATE_ARP @@ -15517,7 +21389,7 @@ index 2e0bc9c53..bd59c0a77 100644 # test_ip INPORT SRC_MAC DST_MAC SRC_IP DST_IP OUTPORT... # # This shell function causes a packet to be received on INPORT. The packet's -@@ -4134,8 +4200,8 @@ done +@@ -4134,8 +4205,8 @@ done OVN_POPULATE_ARP # Allow some time for ovn-northd and ovn-controller to catch up. @@ -15528,7 +21400,7 @@ index 2e0bc9c53..bd59c0a77 100644 # test_ip INPORT SRC_MAC DST_MAC SRC_IP DST_IP OUTPORT... # -@@ -4307,8 +4373,8 @@ done +@@ -4307,8 +4378,8 @@ done OVN_POPULATE_ARP # Allow some time for ovn-northd and ovn-controller to catch up. @@ -15539,7 +21411,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Given the name of a logical port, prints the name of the hypervisor # on which it is located. -@@ -4740,8 +4806,8 @@ ovs-vsctl -- add-port br-int hv2-vif1 -- \ +@@ -4740,8 +4811,8 @@ ovs-vsctl -- add-port br-int hv2-vif1 -- \ OVN_POPULATE_ARP # Allow some time for ovn-northd and ovn-controller to catch up. @@ -15550,7 +21422,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Packet to send. packet="inport==\"ls1-lp1\" && eth.src==$ls1_lp1_mac && eth.dst==$rp_ls1_mac && -@@ -4851,9 +4917,8 @@ ovs-vsctl -- add-port br-int vif2 -- \ +@@ -4851,9 +4922,8 @@ ovs-vsctl -- add-port br-int vif2 -- \ ofport-request=1 @@ -15562,7 +21434,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Send ip packets between the two ports. -@@ -4885,11 +4950,7 @@ as hv1 ovs-ofctl dump-flows br-int +@@ -4885,11 +4955,7 @@ as hv1 ovs-ofctl dump-flows br-int #Disable router R1 @@ -15575,7 +21447,7 @@ index 2e0bc9c53..bd59c0a77 100644 echo "---------SB dump-----" ovn-sbctl list datapath_binding -@@ -4964,10 +5025,11 @@ ovs-vsctl -- add-port br-int vif2 -- \ +@@ -4964,10 +5030,11 @@ ovs-vsctl -- add-port br-int vif2 -- \ options:rxq_pcap=hv1/vif2-rx.pcap \ ofport-request=1 @@ -15590,7 +21462,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Send ip packets between the two ports. -@@ -4979,39 +5041,11 @@ dst_ip=`ip_to_hex 172 16 1 2` +@@ -4979,39 +5046,11 @@ dst_ip=`ip_to_hex 172 16 1 2` packet=${dst_mac}${src_mac}08004500001c0000000040110000${src_ip}${dst_ip}0035111100080000 as hv1 ovs-appctl netdev-dummy/receive vif1 $packet @@ -15633,7 +21505,7 @@ index 2e0bc9c53..bd59c0a77 100644 as hv1 ovs-appctl netdev-dummy/receive vif1 $packet -@@ -5114,8 +5148,11 @@ ovs-vsctl -- add-port br-int hv2-vif1 -- \ +@@ -5114,8 +5153,11 @@ ovs-vsctl -- add-port br-int hv2-vif1 -- \ OVN_POPULATE_ARP # Allow some time for ovn-northd and ovn-controller to catch up. @@ -15647,7 +21519,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Send ip packets between foo1 and alice1 src_mac="f00000010203" -@@ -5133,25 +5170,6 @@ dst_ip=`ip_to_hex 172 16 2 2` +@@ -5133,25 +5175,6 @@ dst_ip=`ip_to_hex 172 16 2 2` packet=${dst_mac}${src_mac}08004500001c0000000040110000${src_ip}${dst_ip}0035111100080000 as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $packet @@ -15673,7 +21545,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Packet to Expect at bob1 src_mac="000000010205" dst_mac="f00000010205" -@@ -5333,8 +5351,8 @@ ovs-vsctl -- add-port br-int hv2-vif1 -- \ +@@ -5333,8 +5356,8 @@ ovs-vsctl -- add-port br-int hv2-vif1 -- \ OVN_POPULATE_ARP # Allow some time for ovn-northd and ovn-controller to catch up. @@ -15684,7 +21556,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Send ip packets between foo1 and alice1 src_mac="f00000010203" -@@ -5469,7 +5487,8 @@ as hv1 ovs-appctl vlog/set dbg +@@ -5469,7 +5492,8 @@ as hv1 ovs-appctl vlog/set dbg OVN_POPULATE_ARP @@ -15694,7 +21566,7 @@ index 2e0bc9c53..bd59c0a77 100644 as hv1 ovs-vsctl show -@@ -6189,7 +6208,8 @@ ovs-vsctl -- add-port br-int hv1-vif5 -- \ +@@ -6189,7 +6213,8 @@ ovs-vsctl -- add-port br-int hv1-vif5 -- \ OVN_POPULATE_ARP @@ -15704,7 +21576,7 @@ index 2e0bc9c53..bd59c0a77 100644 trim_zeros() { sed 's/\(00\)\{1,\}$//' -@@ -6469,10 +6489,8 @@ ovn-nbctl lsp-add foo foo1 \ +@@ -6469,10 +6494,8 @@ ovn-nbctl lsp-add foo foo1 \ ovn-nbctl lsp-add alice alice1 \ -- lsp-set-addresses alice1 "f0:00:00:01:02:04 172.16.1.2" @@ -15717,7 +21589,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Send ip packets between foo1 and alice1 src_mac="f00000010203" -@@ -6535,7 +6553,8 @@ ip_prefix=192.168.1.0/24 nexthop=20.0.0.1 -- add Logical_Router \ +@@ -6535,7 +6558,8 @@ ip_prefix=192.168.1.0/24 nexthop=20.0.0.1 -- add Logical_Router \ R2 static_routes @lrt # Wait for ovn-controller to catch up. @@ -15727,7 +21599,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Send the packet again. as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $packet -@@ -6605,11 +6624,11 @@ ovs-vsctl -- add-port br-int vif2 -- \ +@@ -6605,11 +6629,11 @@ ovs-vsctl -- add-port br-int vif2 -- \ options:rxq_pcap=hv1/vif2-rx.pcap \ ofport-request=1 @@ -15743,7 +21615,7 @@ index 2e0bc9c53..bd59c0a77 100644 for i in 1 2; do : > vif$i.expected -@@ -6764,10 +6783,6 @@ ovs-vsctl -- add-port br-int vif3 -- \ +@@ -6764,10 +6788,6 @@ ovs-vsctl -- add-port br-int vif3 -- \ options:rxq_pcap=pbr-hv/vif3-rx.pcap \ ofport-request=1 @@ -15754,7 +21626,7 @@ index 2e0bc9c53..bd59c0a77 100644 ls1_ro_mac=00:00:00:01:02:f1 ls1_ro_ip=192.168.1.1 -@@ -6952,10 +6967,6 @@ ovs-vsctl -- add-port br-int vif3 -- \ +@@ -6952,10 +6972,6 @@ ovs-vsctl -- add-port br-int vif3 -- \ options:rxq_pcap=pbr-hv/vif3-rx.pcap \ ofport-request=1 @@ -15765,7 +21637,7 @@ index 2e0bc9c53..bd59c0a77 100644 ls1_ro_mac=00:00:00:01:02:f1 ls1_ro_ip=2001::1 -@@ -7158,6 +7169,7 @@ ovn-nbctl lsp-del lp1 +@@ -7158,6 +7174,7 @@ ovn-nbctl lsp-del lp1 ovn-nbctl ls-del ls1 # wait for earlier changes to take effect @@ -15773,7 +21645,7 @@ index 2e0bc9c53..bd59c0a77 100644 check ovn-nbctl --wait=sb sync # ensure OF rules are no longer present. There used to be a bug here. -@@ -7204,14 +7216,15 @@ ovn-nbctl acl-add lsw0 to-lport 1002 'outport == "lp1" && ip6 && icmp6' allow-r +@@ -7204,14 +7221,15 @@ ovn-nbctl acl-add lsw0 to-lport 1002 'outport == "lp1" && ip6 && icmp6' allow-r ovn-nbctl acl-add lsw0 to-lport 1002 'outport == "lp2" && ip6 && icmp6' allow-related # Allow some time for ovn-northd and ovn-controller to catch up. @@ -15795,7 +21667,7 @@ index 2e0bc9c53..bd59c0a77 100644 for i in 1 2; do : > $i.expected done -@@ -7225,11 +7238,6 @@ na_packet=fa163e940598fa163ea1f9ae86dd6000000000203afffd81ce49a9480000f8163efffe +@@ -7225,11 +7243,6 @@ na_packet=fa163e940598fa163ea1f9ae86dd6000000000203afffd81ce49a9480000f8163efffe as hv1 ovs-appctl netdev-dummy/receive vif1 $ns_packet echo $na_packet >> 1.expected @@ -15807,7 +21679,7 @@ index 2e0bc9c53..bd59c0a77 100644 for i in 1 2; do OVN_CHECK_PACKETS([hv1/vif$i-tx.pcap], [$i.expected]) done -@@ -7250,9 +7258,7 @@ ovn_attach n1 br-phys 192.168.0.1 +@@ -7250,9 +7263,7 @@ ovn_attach n1 br-phys 192.168.0.1 row=`ovn-nbctl create Address_Set name=set1 addresses=\"1.1.1.1\"` ovn-nbctl set Address_Set $row name=set1 addresses=\"1.1.1.1,1.1.1.2\" @@ -15818,7 +21690,7 @@ index 2e0bc9c53..bd59c0a77 100644 # A bug previously existed in the address set support code # that caused ovn-controller to crash after an address set -@@ -7640,8 +7646,8 @@ ovs-vsctl -- add-port br-int hv1-vif3 -- \ +@@ -7640,8 +7651,8 @@ ovs-vsctl -- add-port br-int hv1-vif3 -- \ ofport-request=3 # Allow some time for ovn-northd and ovn-controller to catch up. @@ -15829,7 +21701,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Send ip packets between foo1 and foo2 src_mac="0a0000a80103" -@@ -7848,32 +7854,11 @@ ovs-vsctl -- add-port br-int hv1-ls2lp2 -- \ +@@ -7848,32 +7859,11 @@ ovs-vsctl -- add-port br-int hv1-ls2lp2 -- \ ofport-request=2 # Allow some time for ovn-northd and ovn-controller to catch up. @@ -15866,7 +21738,7 @@ index 2e0bc9c53..bd59c0a77 100644 src_mac="f00000000003" dst_mac="f00000000001" -@@ -8256,18 +8241,18 @@ as hv1 +@@ -8256,18 +8246,18 @@ as hv1 AT_CHECK([ovs-vsctl add-port br-int localvif1 -- set Interface localvif1 external_ids:iface-id=localvif1]) # On hv1, check that there are no flows outputting bcast to tunnel @@ -15889,7 +21761,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Verify that the local net patch port exists on hv2. OVS_WAIT_UNTIL([test `ovs-vsctl show | grep "Port patch-br-int-to-ln_port" | wc -l` -eq 1]) -@@ -8319,6 +8304,7 @@ ovn-nbctl --wait=sb lsp-add lsw0 lp2 +@@ -8319,6 +8309,7 @@ ovn-nbctl --wait=sb lsp-add lsw0 lp2 ovn-nbctl lsp-set-addresses lp1 $lp1_mac ovn-nbctl lsp-set-addresses lp2 $lp2_mac ovn-nbctl --wait=sb sync @@ -15897,7 +21769,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl acl-add lsw0 to-lport 1000 'tcp.dst==80' drop ovn-nbctl --log --severity=alert --name=drop-flow acl-add lsw0 to-lport 1000 'tcp.dst==81' drop -@@ -8425,6 +8411,7 @@ ovn-nbctl --wait=sb lsp-add lsw0 lp2 +@@ -8425,6 +8416,7 @@ ovn-nbctl --wait=sb lsp-add lsw0 lp2 ovn-nbctl lsp-set-addresses lp1 $lp1_mac ovn-nbctl lsp-set-addresses lp2 $lp2_mac ovn-nbctl --wait=sb sync @@ -15905,7 +21777,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Add an ACL that rate-limits logs at 10 per second. -@@ -8515,6 +8502,7 @@ ovn-nbctl --wait=sb lsp-add lsw0 lp2 +@@ -8515,6 +8507,7 @@ ovn-nbctl --wait=sb lsp-add lsw0 lp2 ovn-nbctl lsp-set-addresses lp1 $lp1_mac ovn-nbctl lsp-set-addresses lp2 $lp2_mac ovn-nbctl --wait=sb sync @@ -15913,7 +21785,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-appctl -t ovn-controller vlog/set file:dbg -@@ -8562,6 +8550,7 @@ check ovs-vsctl add-br br-phys +@@ -8562,6 +8555,7 @@ check ovs-vsctl add-br br-phys ovn_attach n1 br-phys 192.168.0.1 check ovs-vsctl add-port br-int vif1 -- set Interface vif1 external-ids:iface-id=lp1 options:tx_pcap=vif1-tx.pcap options:rxq_pcap=vif1-rx.pcap ofport-request=1 check ovs-vsctl add-port br-int vif2 -- set Interface vif2 external-ids:iface-id=lp2 options:tx_pcap=vif2-tx.pcap options:rxq_pcap=vif2-rx.pcap ofport-request=2 @@ -15921,7 +21793,7 @@ index 2e0bc9c53..bd59c0a77 100644 AT_CAPTURE_FILE([trace]) ovn_trace () { -@@ -8960,8 +8949,8 @@ ovs-vsctl -- add-port br-int vm2 -- \ +@@ -8960,8 +8954,8 @@ ovs-vsctl -- add-port br-int vm2 -- \ OVN_POPULATE_ARP # Allow some time for ovn-northd and ovn-controller to catch up. @@ -15932,7 +21804,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Test that ovn-controllers create ct-zone entry for container ports. foo1_zoneid=$(as hv1 ovs-vsctl get bridge br-int external_ids:ct-zone-foo1) -@@ -8986,8 +8975,10 @@ bar2_zoneid=$(as hv2 ovs-vsctl get bridge br-int external_ids:ct-zone-bar2) +@@ -8986,8 +8980,10 @@ bar2_zoneid=$(as hv2 ovs-vsctl get bridge br-int external_ids:ct-zone-bar2) AT_CHECK([test -z $bar2_zoneid]) # Add back bar2 @@ -15943,7 +21815,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=hv sync bar2_zoneid=$(as hv2 ovs-vsctl get bridge br-int external_ids:ct-zone-bar2) -@@ -9126,6 +9117,13 @@ OVS_WAIT_UNTIL([test xup = x$(ovn-nbctl lsp-get-up vm1)]) +@@ -9126,6 +9122,13 @@ OVS_WAIT_UNTIL([test xup = x$(ovn-nbctl lsp-get-up vm1)]) OVS_WAIT_UNTIL([test xup = x$(ovn-nbctl lsp-get-up foo1)]) OVS_WAIT_UNTIL([test xup = x$(ovn-nbctl lsp-get-up bar1)]) @@ -15957,7 +21829,7 @@ index 2e0bc9c53..bd59c0a77 100644 as hv1 ovs-vsctl del-port vm1 OVS_WAIT_UNTIL([test xdown = x$(ovn-nbctl lsp-get-up vm1)]) OVS_WAIT_UNTIL([test xdown = x$(ovn-nbctl lsp-get-up foo1)]) -@@ -9267,8 +9265,8 @@ ovn-nbctl --wait=hv lsp-add bob bob1 \ +@@ -9267,8 +9270,8 @@ ovn-nbctl --wait=hv lsp-add bob bob1 \ OVN_POPULATE_ARP # Allow some time for ovn-northd and ovn-controller to catch up. @@ -15968,7 +21840,7 @@ index 2e0bc9c53..bd59c0a77 100644 trim_zeros() { sed 's/\(00\)\{1,\}$//' -@@ -9375,7 +9373,8 @@ ovs-vsctl -- add-port br-int hv1-vif2 -- \ +@@ -9375,7 +9378,8 @@ ovs-vsctl -- add-port br-int hv1-vif2 -- \ ofport-request=2 OVN_POPULATE_ARP @@ -15978,7 +21850,7 @@ index 2e0bc9c53..bd59c0a77 100644 as hv1 ovs-vsctl show echo "*************************" -@@ -9868,6 +9867,7 @@ check as gw1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys +@@ -9868,20 +9872,18 @@ check as gw1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys check as gw2 ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys check as ext1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys @@ -15986,21 +21858,76 @@ index 2e0bc9c53..bd59c0a77 100644 AT_CHECK([ovn-nbctl --wait=sb sync], [0], [ignore]) ovn-sbctl dump-flows > sbflows -@@ -9935,13 +9935,9 @@ test_ip_packet() - fi - as ext1 reset_pcap_file ext1-vif1 ext1/vif1 + AT_CAPTURE_FILE([sbflows]) -- sleep 1 +-reset_pcap_file() { +- local iface=$1 +- local pcap_file=$2 +- check ovs-vsctl -- set Interface $iface options:tx_pcap=dummy-tx.pcap \ +-options:rxq_pcap=dummy-rx.pcap +- rm -f ${pcap_file}*.pcap +- check ovs-vsctl -- set Interface $iface options:tx_pcap=${pcap_file}-tx.pcap \ +-options:rxq_pcap=${pcap_file}-rx.pcap +-} ++hv1_gw1_ofport=$(as hv1 ovs-vsctl --bare --columns ofport find Interface name=ovn-gw1-0) ++hv1_gw2_ofport=$(as hv1 ovs-vsctl --bare --columns ofport find Interface name=ovn-gw2-0) ++ ++OVS_WAIT_UNTIL([ ++ test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=37 | grep -c "active_backup,ofport,members:$hv1_gw1_ofport,$hv1_gw2_ofport") ++]) + + test_ip_packet() + { +@@ -9927,21 +9929,17 @@ test_ip_packet() + echo $expected > ext1-vif1.expected + exp_gw_ip_garp=ffffffffffff00000201020308060001080006040001000002010203ac100101000000000000ac100101 + echo $exp_gw_ip_garp >> ext1-vif1.expected +- as $active_gw reset_pcap_file br-phys_n1 $active_gw/br-phys_n1 ++ as $active_gw reset_iface_pcap_file br-phys_n1 $active_gw/br-phys_n1 + + if test $backup_vswitchd_dead != 1; then + # Reset the file only if vswitchd in backup gw is alive +- as $backup_gw reset_pcap_file br-phys_n1 $backup_gw/br-phys_n1 ++ as $backup_gw reset_iface_pcap_file br-phys_n1 $backup_gw/br-phys_n1 + fi +- as ext1 reset_pcap_file ext1-vif1 ext1/vif1 - +- sleep 1 ++ as ext1 reset_iface_pcap_file ext1-vif1 ext1/vif1 + # Resend packet from foo1 to outside1 check as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $packet -- sleep 1 -- - AT_CAPTURE_FILE([exp]) - AT_CAPTURE_FILE([rcv]) - check_packets() { -@@ -10131,6 +10127,7 @@ as gw1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys +- sleep 1 +- + AT_CAPTURE_FILE([exp]) + AT_CAPTURE_FILE([rcv]) + check_packets() { +@@ -9989,6 +9987,10 @@ AT_CHECK( + <1> + ]) + ++OVS_WAIT_UNTIL([ ++ test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=37 | grep -c "active_backup,ofport,members:$hv1_gw2_ofport,$hv1_gw1_ofport") ++]) ++ + test_ip_packet gw2 gw1 0 + + # Get the claim count of both gw1 and gw2. +@@ -10009,6 +10011,12 @@ OVS_WAIT_UNTIL([test $gw1_claim_ct = `cat gw1/ovn-controller.log \ + AT_CHECK([test $gw2_claim_ct = `cat gw2/ovn-controller.log | \ + grep -c "cr-alice: Claiming"`]) + ++OVS_WAIT_UNTIL([ ++ bfd_status=$(as hv1 ovs-vsctl get interface ovn-gw2-0 bfd_status:state) ++ echo "bfd status = $bfd_status" ++ test "$bfd_status" = "down" ++]) ++ + test_ip_packet gw1 gw2 1 + + as gw2 +@@ -10131,6 +10139,7 @@ as gw1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys as gw2 ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys as ext1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys @@ -16008,7 +21935,7 @@ index 2e0bc9c53..bd59c0a77 100644 check ovn-nbctl --wait=sb sync ovn-sbctl dump-flows > sbflows -@@ -10143,8 +10140,7 @@ hv1_ch_uuid=$(fetch_column Chassis _uuid name=hv1) +@@ -10143,8 +10152,7 @@ hv1_ch_uuid=$(fetch_column Chassis _uuid name=hv1) wait_column "$hv1_ch_uuid" HA_Chassis_Group ref_chassis # Allow some time for ovn-northd and ovn-controller to catch up. @@ -16018,7 +21945,7 @@ index 2e0bc9c53..bd59c0a77 100644 reset_pcap_file() { local iface=$1 -@@ -10342,6 +10338,7 @@ check as hv3 ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys +@@ -10342,6 +10350,7 @@ check as hv3 ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys dnl Allow some time for ovn-northd and ovn-controller to catch up. @@ -16026,7 +21953,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=hv sync (echo "---------NB dump-----" -@@ -10386,12 +10383,12 @@ AT_CAPTURE_FILE([hv2flows]) +@@ -10386,12 +10395,12 @@ AT_CAPTURE_FILE([hv2flows]) AT_CHECK( [# Check that redirect mapping is programmed only on hv2 @@ -16043,7 +21970,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Check that arp reply on distributed gateway port is only programmed on hv2 grep arp hv1flows | grep load:0x2- | grep =0x2,metadata=0x1 | wc -l -@@ -10461,6 +10458,7 @@ OVS_WAIT_UNTIL([test 1 = `as hv2 ovs-vsctl show | \ +@@ -10461,6 +10470,7 @@ OVS_WAIT_UNTIL([test 1 = `as hv2 ovs-vsctl show | \ grep "Port patch-br-int-to-ln-alice" | wc -l`]) dnl Allow some time for ovn-controller to catch up. @@ -16051,7 +21978,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=hv sync # ARP for router IP address from outside1 -@@ -10534,8 +10532,8 @@ ovn-nbctl lsp-add foo foo2 \ +@@ -10534,8 +10544,8 @@ ovn-nbctl lsp-add foo foo2 \ -- lsp-set-addresses foo2 "f0:00:00:01:02:06 192.168.1.3" # Allow some time for ovn-northd and ovn-controller to catch up. @@ -16062,7 +21989,7 @@ index 2e0bc9c53..bd59c0a77 100644 : > hv1-vif2.expected -@@ -10624,10 +10622,6 @@ AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown]) +@@ -10624,10 +10634,6 @@ AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown]) AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet]) AT_CHECK([ovn-nbctl --wait=hv lsp-set-options ln_port network_name=physnet1]) @@ -16073,7 +22000,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Expect no packets when hv2 bridge-mapping is not present : > packets OVN_CHECK_PACKETS([hv1/snoopvif-tx.pcap], [packets]) -@@ -10847,50 +10841,54 @@ ovn-nbctl lsp-set-addresses ln-outside unknown +@@ -10847,50 +10853,54 @@ ovn-nbctl lsp-set-addresses ln-outside unknown ovn-nbctl lsp-set-type ln-outside localnet ovn-nbctl lsp-set-options ln-outside network_name=phys @@ -16109,16 +22036,6 @@ index 2e0bc9c53..bd59c0a77 100644 -echo "---------------------" -ovn-nbctl list logical_router_port -echo "---------------------" -- --echo "---------SB dump-----" --ovn-sbctl list datapath_binding --echo "---------------------" --ovn-sbctl list port_binding --echo "---------------------" --ovn-sbctl dump-flows --echo "---------------------" --ovn-sbctl list chassis --echo "---------------------" +(echo "---------NB dump-----" + ovn-nbctl show + echo "---------------------" @@ -16129,7 +22046,16 @@ index 2e0bc9c53..bd59c0a77 100644 + ovn-nbctl list logical_router_port + echo "---------------------") > nbdump +AT_CAPTURE_FILE([nbdump]) -+ + +-echo "---------SB dump-----" +-ovn-sbctl list datapath_binding +-echo "---------------------" +-ovn-sbctl list port_binding +-echo "---------------------" +-ovn-sbctl dump-flows +-echo "---------------------" +-ovn-sbctl list chassis +-echo "---------------------" +(echo "---------SB dump-----" + ovn-sbctl list datapath_binding + echo "---------------------" @@ -16159,7 +22085,7 @@ index 2e0bc9c53..bd59c0a77 100644 foo1_ip=$(ip_to_hex 192 168 1 2) gw_ip=$(ip_to_hex 172 16 1 6) -@@ -10940,8 +10938,8 @@ as hv3 reset_pcap_file hv3-vif1 hv3/vif1 +@@ -10940,8 +10950,8 @@ as hv3 reset_pcap_file hv3-vif1 hv3/vif1 as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $packet sleep 2 @@ -16170,7 +22096,7 @@ index 2e0bc9c53..bd59c0a77 100644 | grep "NXM_NX_TUN_ID" | grep -v n_packets=0 | wc -l], [0], [[0 ]]) -@@ -11083,8 +11081,8 @@ ovs-vsctl -- add-port br-int hv1-vif3 -- \ +@@ -11083,8 +11093,8 @@ ovs-vsctl -- add-port br-int hv1-vif3 -- \ ofport-request=3 # Allow some time for ovn-northd and ovn-controller to catch up. @@ -16181,7 +22107,7 @@ index 2e0bc9c53..bd59c0a77 100644 reset_pcap_file() { local iface=$1 -@@ -11337,8 +11335,11 @@ ovs-vsctl -- add-port br-int hv2-vif1 -- \ +@@ -11337,8 +11347,11 @@ ovs-vsctl -- add-port br-int hv2-vif1 -- \ OVN_POPULATE_ARP # Allow some time for ovn-northd and ovn-controller to catch up. @@ -16195,7 +22121,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Send ip packets between foo1 and alice1 src_mac="f00000010203" -@@ -11402,6 +11403,7 @@ for i in 1 2; do +@@ -11402,6 +11415,7 @@ for i in 1 2; do OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up lp${i}1` = xup]) done @@ -16203,7 +22129,108 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=sb sync ovn-sbctl dump-flows -@@ -11553,6 +11555,7 @@ ovn-nbctl lsp-set-type ln-outside localnet +@@ -11483,10 +11497,100 @@ for i in 1 2; do + done + done + ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int |awk '/table=65/{print substr($8, 16, length($8))}' |sort -n], [0], [dnl ++10 ++11 ++]) ++ ++# remove the localport from br-int and re-create it ++as hv1 ++check ovs-vsctl del-port vif01 ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int |awk '/table=65/{print substr($8, 16, length($8))}' |sort -n], [0], [dnl ++11 ++]) ++ ++as hv1 ++check ovs-vsctl add-port br-int vif01 \ ++ -- set Interface vif01 external-ids:iface-id=lp01 ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int |awk '/table=65/{print substr($8, 16, length($8))}' |sort -n], [0], [dnl ++2 ++11 ++]) ++ + OVN_CLEANUP([hv1],[hv2]) + + AT_CLEANUP + ++AT_SETUP([ovn -- localport suppress gARP]) ++ovn_start ++ ++send_garp() { ++ local inport=$1 eth_src=$2 eth_dst=$3 spa=$4 tpa=$5 ++ local request=${eth_dst}${eth_src}08060001080006040001${eth_src}${spa}${eth_dst}${tpa} ++ as hv1 ovs-appctl netdev-dummy/receive vif$inport $request ++} ++ ++net_add n1 ++sim_add hv1 ++as hv1 ++check ovs-vsctl add-br br-phys ++ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys ++ovn_attach n1 br-phys 192.168.0.1 ++ ++check ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys ++ ++check ovn-nbctl ls-add ls \ ++ -- lsp-add ls lp \ ++ -- lsp-set-type lp localport \ ++ -- lsp-set-addresses lp "00:00:00:00:00:01 10.0.0.1" \ ++ -- lsp-add ls ln \ ++ -- lsp-set-type ln localnet \ ++ -- lsp-set-addresses ln unknown \ ++ -- lsp-set-options ln network_name=phys \ ++ -- lsp-add ls lsp \ ++ -- lsp-set-addresses lsp "00:00:00:00:00:02 10.0.0.2" ++ ++dnl First bind the localport. ++check ovs-vsctl add-port br-int vif1 \ ++ -- set Interface vif1 external-ids:iface-id=lp ++check ovn-nbctl --wait=hv sync ++ ++dnl Then bind the regular vif. ++check ovs-vsctl add-port br-int vif2 \ ++ -- set Interface vif2 external-ids:iface-id=lsp \ ++ options:tx_pcap=hv1/vif2-tx.pcap \ ++ options:rxq_pcap=hv1/vif2-rx.pcap ++ ++wait_for_ports_up lsp ++check ovn-nbctl --wait=hv sync ++ ++dnl Wait for at least two gARPs from lsp (10.0.0.2). ++lsp_garp=ffffffffffff000000000002080600010800060400010000000000020a0000020000000000000a000002 ++OVS_WAIT_UNTIL([ ++ garps=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/br-phys-tx.pcap | grep ${lsp_garp} -c` ++ test $garps -ge 2 ++]) ++ ++dnl At this point it's safe to assume that ovn-controller skipped sending gARP ++dnl for the localport. Check that there are no other packets than the gARPs ++dnl for the regular vif. ++AT_CHECK([ ++ pkts=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/br-phys-tx.pcap | grep -v ${lsp_garp} -c` ++ test 0 -eq $pkts ++]) ++ ++spa=$(ip_to_hex 10 0 0 1) ++tpa=$(ip_to_hex 10 0 0 100) ++send_garp 1 000000000001 ffffffffffff $spa $tpa ++ ++dnl traffic from localport should not be sent to localnet ++AT_CHECK([tcpdump -r hv1/br-phys_n1-tx.pcap arp[[24:4]]=0x0a000064 | wc -l],[0],[dnl ++0 ++],[ignore]) ++ ++OVN_CLEANUP([hv1]) ++AT_CLEANUP ++ + AT_SETUP([ovn -- 1 LR with HA distributed router gateway port]) + ovn_start + +@@ -11553,6 +11657,7 @@ ovn-nbctl lsp-set-type ln-outside localnet ovn-nbctl lsp-set-options ln-outside network_name=phys # Allow some time for ovn-northd and ovn-controller to catch up. @@ -16211,7 +22238,7 @@ index 2e0bc9c53..bd59c0a77 100644 check ovn-nbctl --wait=hv sync echo "---------NB dump-----" -@@ -11626,20 +11629,20 @@ echo $hv2_gw1_ofport +@@ -11626,20 +11731,20 @@ echo $hv2_gw1_ofport echo $hv2_gw2_ofport echo "--- hv1 ---" @@ -16236,7 +22263,7 @@ index 2e0bc9c53..bd59c0a77 100644 grep active_backup | grep slaves:$hv2_gw1_ofport,$hv2_gw2_ofport \ | wc -l], [0], [1 ]) -@@ -11676,15 +11679,16 @@ ovn-nbctl --id=@gc0 create Gateway_Chassis \ +@@ -11676,15 +11781,16 @@ ovn-nbctl --id=@gc0 create Gateway_Chassis \ set Logical_Router_Port outside 'gateway_chassis=[@gc0,@gc1]' @@ -16255,7 +22282,7 @@ index 2e0bc9c53..bd59c0a77 100644 grep active_backup | grep slaves:$hv2_gw2_ofport,$hv2_gw1_ofport \ | wc -l], [0], [1 ]) -@@ -11837,12 +11841,12 @@ ovn-nbctl set Logical_Router_Port outside ha_chassis_group=$hagrp1_uuid +@@ -11837,12 +11943,12 @@ ovn-nbctl set Logical_Router_Port outside ha_chassis_group=$hagrp1_uuid wait_row_count HA_Chassis_Group 1 wait_row_count HA_Chassis 2 @@ -16270,7 +22297,7 @@ index 2e0bc9c53..bd59c0a77 100644 grep active_backup | grep slaves:$hv2_gw1_ofport,$hv2_gw2_ofport \ | wc -l], [0], [1 ]) -@@ -11894,12 +11898,12 @@ wait_column "$exp_ref_ch_list" HA_Chassis_Group ref_chassis +@@ -11894,12 +12000,12 @@ wait_column "$exp_ref_ch_list" HA_Chassis_Group ref_chassis # Increase the priority of gw2 ovn-nbctl --wait=sb ha-chassis-group-add-chassis hagrp1 gw2 40 @@ -16285,7 +22312,7 @@ index 2e0bc9c53..bd59c0a77 100644 grep active_backup | grep slaves:$hv2_gw2_ofport,$hv2_gw1_ofport \ | wc -l], [0], [1 ]) -@@ -12041,6 +12045,7 @@ AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet]) +@@ -12041,6 +12147,7 @@ AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet]) AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1]) # wait for earlier changes to take effect @@ -16293,7 +22320,7 @@ index 2e0bc9c53..bd59c0a77 100644 check ovn-nbctl --wait=hv sync reset_pcap_file() { -@@ -12241,6 +12246,7 @@ ovn-nbctl lsp-set-type ln-outside localnet +@@ -12241,6 +12348,7 @@ ovn-nbctl lsp-set-type ln-outside localnet ovn-nbctl lsp-set-options ln-outside network_name=phys # Allow some time for ovn-northd and ovn-controller to catch up. @@ -16301,7 +22328,7 @@ index 2e0bc9c53..bd59c0a77 100644 check ovn-nbctl --wait=hv sync # currently when ovn-controller is restarted, the old entry is deleted -@@ -12878,6 +12884,45 @@ test_tcp_syn_packet() { +@@ -12878,6 +12986,45 @@ test_tcp_syn_packet() { check as hv$hv ovs-appctl netdev-dummy/receive vif$inport $packet } @@ -16347,7 +22374,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Create hypervisors hv[123]. # Add vif1[123] to hv1, vif2[123] to hv2, vif3[123] to hv3. # Add all of the vifs to a single logical switch sw0. -@@ -12904,8 +12949,6 @@ for i in 1 2 3; do +@@ -12904,8 +13051,6 @@ for i in 1 2 3; do done OVN_POPULATE_ARP @@ -16356,7 +22383,7 @@ index 2e0bc9c53..bd59c0a77 100644 for i in 1 2 3; do : > vif${i}1.expected -@@ -12916,6 +12959,7 @@ check ovn-nbctl --log acl-add sw0 from-lport 1000 "inport == \"sw0-p11\"" reject +@@ -12916,6 +13061,7 @@ check ovn-nbctl --log acl-add sw0 from-lport 1000 "inport == \"sw0-p11\"" reject check ovn-nbctl --log acl-add sw0 from-lport 1000 "inport == \"sw0-p21\"" reject # Allow some time for ovn-northd and ovn-controller to catch up. @@ -16364,7 +22391,7 @@ index 2e0bc9c53..bd59c0a77 100644 check ovn-nbctl --wait=hv sync ovn-sbctl dump-flows > sbflows -@@ -12931,6 +12975,10 @@ test_tcp_syn_packet 11 1 000000000011 000000000021 $(ip_to_hex 192 168 1 11) $(i +@@ -12931,6 +13077,10 @@ test_tcp_syn_packet 11 1 000000000011 000000000021 $(ip_to_hex 192 168 1 11) $(i test_tcp_syn_packet 21 2 000000000021 000000000011 $(ip_to_hex 192 168 1 21) $(ip_to_hex 192 168 1 11) 0000 8b40 3039 0000 b85f 70e4 test_tcp_syn_packet 31 3 000000000031 000000000012 $(ip_to_hex 192 168 1 31) $(ip_to_hex 192 168 1 12) 0000 8b40 3039 0000 b854 70d9 @@ -16375,7 +22402,7 @@ index 2e0bc9c53..bd59c0a77 100644 for i in 1 2 3; do OVN_CHECK_PACKETS([hv$i/vif${i}1-tx.pcap], [vif${i}1.expected]) done -@@ -13062,8 +13110,8 @@ done +@@ -13062,8 +13212,8 @@ done OVN_POPULATE_ARP # Allow some time for ovn-northd and ovn-controller to catch up. @@ -16386,7 +22413,7 @@ index 2e0bc9c53..bd59c0a77 100644 # test_ip INPORT SRC_MAC DST_MAC SRC_IP DST_IP OUTPORT... # -@@ -13142,10 +13190,6 @@ for is in 1 2 3; do +@@ -13142,10 +13292,6 @@ for is in 1 2 3; do done done @@ -16397,7 +22424,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Now check the packets actually received against the ones expected. for i in 1 2 3; do for j in 1 2 3; do -@@ -13284,8 +13328,8 @@ done +@@ -13284,8 +13430,8 @@ done OVN_POPULATE_ARP # Allow some time for ovn-northd and ovn-controller to catch up. @@ -16408,7 +22435,7 @@ index 2e0bc9c53..bd59c0a77 100644 lsp_to_mac() { echo f0:00:00:00:0${1:0:1}:${1:1:2} -@@ -13391,10 +13435,6 @@ for is in 1 2 3; do +@@ -13391,10 +13537,6 @@ for is in 1 2 3; do done done @@ -16419,7 +22446,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Now check the packets actually received against the ones expected. for i in 1 2 3; do for j in 1 2 3; do -@@ -13704,6 +13744,7 @@ grep conjunction.*conjunction.*conjunction | wc -l`]) +@@ -13704,6 +13846,7 @@ grep conjunction.*conjunction.*conjunction | wc -l`]) ovn-nbctl acl-del ls1 to-lport 1001 \ 'ip4 && ip4.src == $set1 && ip4.dst == $set1' @@ -16427,7 +22454,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=hv sync # priority=2001,ip,metadata=0x1,nw_dst=10.0.0.10 actions=conjunction(10,1/2) # priority=2001,ip,metadata=0x1,nw_dst=10.0.0.8 actions=conjunction(11,1/2) -@@ -13725,27 +13766,30 @@ AT_CLEANUP +@@ -13725,27 +13868,30 @@ AT_CLEANUP AT_SETUP([ovn -- Superseding ACLs with conjunction]) ovn_start @@ -16466,7 +22493,7 @@ index 2e0bc9c53..bd59c0a77 100644 set interface hv1-vif2 external-ids:iface-id=ls1-lp2 \ options:tx_pcap=hv1/vif2-tx.pcap \ options:rxq_pcap=hv1/vif2-rx.pcap \ -@@ -13765,7 +13809,8 @@ test_ip() { +@@ -13765,7 +13911,8 @@ test_ip() { local packet=${dst_mac}${src_mac}08004500001c0000000040110000${src_ip}\ ${dst_ip}0035111100080000 shift; shift; shift; shift; shift @@ -16476,7 +22503,7 @@ index 2e0bc9c53..bd59c0a77 100644 for outport; do echo $packet >> $outport.expected done -@@ -13774,19 +13819,51 @@ ${dst_ip}0035111100080000 +@@ -13774,19 +13921,51 @@ ${dst_ip}0035111100080000 reset_pcap_file() { local iface=$1 local pcap_file=$2 @@ -16535,7 +22562,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Traffic 10.0.0.1, 10.0.0.2 -> 10.0.0.3, 10.0.0.4 should be allowed. for src in `seq 1 2`; do -@@ -13814,9 +13891,9 @@ rm -f 2.packets +@@ -13814,21 +13993,21 @@ rm -f 2.packets > 2.expected # Add two less restrictive allow ACLs for src IP 10.0.0.1. @@ -16547,8 +22574,28 @@ index 2e0bc9c53..bd59c0a77 100644 +check ovn-nbctl --wait=hv sync # Check OVS flows, the less restrictive flows should have been installed. - AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=45 | ofctl_strip_all | \ -@@ -13858,11 +13935,9 @@ reset_pcap_file hv1-vif2 hv1/vif2 +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=45 | ofctl_strip_all | \ ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=44 | ofctl_strip_all | \ + grep "priority=1003" | \ + sed 's/conjunction([[^)]]*)/conjunction()/g' | sort], [0], [dnl +- table=45, priority=1003,conj_id=2,ip,metadata=0x1 actions=resubmit(,46) +- table=45, priority=1003,conj_id=3,ip,metadata=0x1 actions=resubmit(,46) +- table=45, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.3 actions=conjunction(),conjunction() +- table=45, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.4 actions=conjunction(),conjunction() +- table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.1 actions=resubmit(,46) +- table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.2 actions=conjunction() +- table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.42 actions=conjunction() ++ table=44, priority=1003,conj_id=2,ip,metadata=0x1 actions=resubmit(,45) ++ table=44, priority=1003,conj_id=3,ip,metadata=0x1 actions=resubmit(,45) ++ table=44, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.3 actions=conjunction(),conjunction() ++ table=44, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.4 actions=conjunction(),conjunction() ++ table=44, priority=1003,ip,metadata=0x1,nw_src=10.0.0.1 actions=resubmit(,45) ++ table=44, priority=1003,ip,metadata=0x1,nw_src=10.0.0.2 actions=conjunction() ++ table=44, priority=1003,ip,metadata=0x1,nw_src=10.0.0.42 actions=conjunction() + ]) + + # Traffic 10.0.0.1, 10.0.0.2 -> 10.0.0.3, 10.0.0.4 should be allowed. +@@ -13858,40 +14037,38 @@ reset_pcap_file hv1-vif2 hv1/vif2 rm -f 2.packets > 2.expected @@ -16561,8 +22608,24 @@ index 2e0bc9c53..bd59c0a77 100644 +check ovn-nbctl --wait=hv sync # Check OVS flows, the second less restrictive allow ACL should have been installed. - AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=45 | ofctl_strip_all | \ -@@ -13878,8 +13953,8 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=45 | ofctl_strip_all | \ +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=45 | ofctl_strip_all | \ ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=44 | ofctl_strip_all | \ + grep "priority=1003" | \ + sed 's/conjunction([[^)]]*)/conjunction()/g' | sort], [0], [dnl +- table=45, priority=1003,conj_id=2,ip,metadata=0x1 actions=resubmit(,46) +- table=45, priority=1003,conj_id=3,ip,metadata=0x1 actions=resubmit(,46) +- table=45, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.3 actions=conjunction(),conjunction() +- table=45, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.4 actions=conjunction(),conjunction() +- table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.1 actions=resubmit(,46) +- table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.2 actions=conjunction() +- table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.42 actions=conjunction() ++ table=44, priority=1003,conj_id=2,ip,metadata=0x1 actions=resubmit(,45) ++ table=44, priority=1003,conj_id=3,ip,metadata=0x1 actions=resubmit(,45) ++ table=44, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.3 actions=conjunction(),conjunction() ++ table=44, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.4 actions=conjunction(),conjunction() ++ table=44, priority=1003,ip,metadata=0x1,nw_src=10.0.0.1 actions=resubmit(,45) ++ table=44, priority=1003,ip,metadata=0x1,nw_src=10.0.0.2 actions=conjunction() ++ table=44, priority=1003,ip,metadata=0x1,nw_src=10.0.0.42 actions=conjunction() ]) # Remove the less restrictive allow ACL. @@ -16572,8 +22635,28 @@ index 2e0bc9c53..bd59c0a77 100644 +check ovn-nbctl --wait=hv sync # Check OVS flows, the 10.0.0.1 conjunction should have been reinstalled. - AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=45 | ofctl_strip_all | \ -@@ -13917,8 +13992,8 @@ $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=45 | ofctl_strip_all | \ ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=44 | ofctl_strip_all | \ + grep "priority=1003" | \ + sed 's/conjunction([[^)]]*)/conjunction()/g' | sort], [0], [dnl +- table=45, priority=1003,conj_id=2,ip,metadata=0x1 actions=resubmit(,46) +- table=45, priority=1003,conj_id=3,ip,metadata=0x1 actions=resubmit(,46) +- table=45, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.3 actions=conjunction(),conjunction() +- table=45, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.4 actions=conjunction(),conjunction() +- table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.1 actions=conjunction(),conjunction() +- table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.2 actions=conjunction() +- table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.42 actions=conjunction() ++ table=44, priority=1003,conj_id=2,ip,metadata=0x1 actions=resubmit(,45) ++ table=44, priority=1003,conj_id=3,ip,metadata=0x1 actions=resubmit(,45) ++ table=44, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.3 actions=conjunction(),conjunction() ++ table=44, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.4 actions=conjunction(),conjunction() ++ table=44, priority=1003,ip,metadata=0x1,nw_src=10.0.0.1 actions=conjunction(),conjunction() ++ table=44, priority=1003,ip,metadata=0x1,nw_src=10.0.0.2 actions=conjunction() ++ table=44, priority=1003,ip,metadata=0x1,nw_src=10.0.0.42 actions=conjunction() + ]) + + # Traffic 10.0.0.1, 10.0.0.2 -> 10.0.0.3, 10.0.0.4 should be allowed. +@@ -13917,20 +14094,43 @@ $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets AT_CHECK([cat 2.packets], [0], [expout]) # Re-add the less restrictive allow ACL for src IP 10.0.0.1 @@ -16583,11 +22666,19 @@ index 2e0bc9c53..bd59c0a77 100644 +check ovn-nbctl --wait=hv sync # Check OVS flows, the less restrictive flows should have been installed. - AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=45 | ofctl_strip_all | \ -@@ -13933,6 +14008,29 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=45 | ofctl_strip_all | \ - table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.42 actions=conjunction() - ]) - +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=45 | ofctl_strip_all | \ ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=44 | ofctl_strip_all | \ ++ grep "priority=1003" | \ ++ sed 's/conjunction([[^)]]*)/conjunction()/g' | sort], [0], [dnl ++ table=44, priority=1003,conj_id=2,ip,metadata=0x1 actions=resubmit(,45) ++ table=44, priority=1003,conj_id=3,ip,metadata=0x1 actions=resubmit(,45) ++ table=44, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.3 actions=conjunction(),conjunction() ++ table=44, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.4 actions=conjunction(),conjunction() ++ table=44, priority=1003,ip,metadata=0x1,nw_src=10.0.0.1 actions=resubmit(,45) ++ table=44, priority=1003,ip,metadata=0x1,nw_src=10.0.0.2 actions=conjunction() ++ table=44, priority=1003,ip,metadata=0x1,nw_src=10.0.0.42 actions=conjunction() ++]) ++ +# Add another ACL that overlaps with the existing less restrictive ones. +check ovn-nbctl acl-add ls1 to-lport 3 'udp || ((ip4.src==10.0.0.1 || ip4.src==10.0.0.2) && (ip4.dst == 10.0.0.3 || ip4.dst == 10.0.0.4))' allow +check ovn-nbctl --wait=hv sync @@ -16596,25 +22687,30 @@ index 2e0bc9c53..bd59c0a77 100644 +# with an additional conjunction action. +# +# New non-conjunctive flows should be added to match on 'udp'. -+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=45 | ofctl_strip_all | \ -+ grep "priority=1003" | \ -+ sed 's/conjunction([[^)]]*)/conjunction()/g' | sort], [0], [dnl -+ table=45, priority=1003,conj_id=2,ip,metadata=0x1 actions=resubmit(,46) -+ table=45, priority=1003,conj_id=3,ip,metadata=0x1 actions=resubmit(,46) -+ table=45, priority=1003,conj_id=4,ip,metadata=0x1 actions=resubmit(,46) -+ table=45, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.3 actions=conjunction(),conjunction(),conjunction() -+ table=45, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.4 actions=conjunction(),conjunction(),conjunction() -+ table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.1 actions=resubmit(,46) -+ table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.2 actions=conjunction(),conjunction() -+ table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.42 actions=conjunction() -+ table=45, priority=1003,udp,metadata=0x1 actions=resubmit(,46) -+ table=45, priority=1003,udp6,metadata=0x1 actions=resubmit(,46) -+]) -+ - OVN_CLEANUP([hv1]) - AT_CLEANUP ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=44 | ofctl_strip_all | \ + grep "priority=1003" | \ + sed 's/conjunction([[^)]]*)/conjunction()/g' | sort], [0], [dnl +- table=45, priority=1003,conj_id=2,ip,metadata=0x1 actions=resubmit(,46) +- table=45, priority=1003,conj_id=3,ip,metadata=0x1 actions=resubmit(,46) +- table=45, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.3 actions=conjunction(),conjunction() +- table=45, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.4 actions=conjunction(),conjunction() +- table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.1 actions=resubmit(,46) +- table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.2 actions=conjunction() +- table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.42 actions=conjunction() ++ table=44, priority=1003,conj_id=2,ip,metadata=0x1 actions=resubmit(,45) ++ table=44, priority=1003,conj_id=3,ip,metadata=0x1 actions=resubmit(,45) ++ table=44, priority=1003,conj_id=4,ip,metadata=0x1 actions=resubmit(,45) ++ table=44, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.3 actions=conjunction(),conjunction(),conjunction() ++ table=44, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.4 actions=conjunction(),conjunction(),conjunction() ++ table=44, priority=1003,ip,metadata=0x1,nw_src=10.0.0.1 actions=resubmit(,45) ++ table=44, priority=1003,ip,metadata=0x1,nw_src=10.0.0.2 actions=conjunction(),conjunction() ++ table=44, priority=1003,ip,metadata=0x1,nw_src=10.0.0.42 actions=conjunction() ++ table=44, priority=1003,udp,metadata=0x1 actions=resubmit(,45) ++ table=44, priority=1003,udp6,metadata=0x1 actions=resubmit(,45) + ]) -@@ -13983,8 +14081,8 @@ ovn-nbctl create Address_Set name=set1 addresses=\"f0:00:00:00:00:11\",\"f0:00:0 + OVN_CLEANUP([hv1]) +@@ -13983,8 +14183,8 @@ ovn-nbctl create Address_Set name=set1 addresses=\"f0:00:00:00:00:11\",\"f0:00:0 OVN_POPULATE_ARP # Allow some time for ovn-northd and ovn-controller to catch up. @@ -16625,7 +22721,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Make sure there is no attempt to adding duplicated flows by ovn-controller AT_FAIL_IF([test -n "`grep duplicate hv1/ovn-controller.log`"]) -@@ -14224,6 +14322,7 @@ done +@@ -14224,6 +14424,7 @@ done OVN_POPULATE_ARP # allow some time for ovn-northd and ovn-controller to catch up. @@ -16633,7 +22729,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=hv sync test_ip_packet 1 1 000000000001 00000000ff01 $(ip_to_hex 192 168 1 1) $(ip_to_hex 192 168 2 1) $(ip_to_hex 192 168 1 254) 0000 f87c ea96 -@@ -14294,6 +14393,45 @@ test_tcp_syn_packet() { +@@ -14294,6 +14495,45 @@ test_tcp_syn_packet() { as hv$hv ovs-appctl netdev-dummy/receive vif$inport $packet } @@ -16679,7 +22775,7 @@ index 2e0bc9c53..bd59c0a77 100644 # test_tcp6_packet INPORT HV ETH_SRC ETH_DST IPV6_SRC IPV6_ROUTER TCP_SPORT TCP_DPORT TCP_CHKSUM EXP_TCP_RST_CHKSUM # # Causes a packet to be received on INPORT of the hypervisor HV. The packet is a TCP syn segment with -@@ -14314,6 +14452,36 @@ test_tcp6_packet() { +@@ -14314,6 +14554,36 @@ test_tcp6_packet() { as hv$hv ovs-appctl netdev-dummy/receive vif$inport $packet } @@ -16716,7 +22812,7 @@ index 2e0bc9c53..bd59c0a77 100644 # test_ip6_packet INPORT HV ETH_SRC ETH_DST IPV6_SRC IPV6_DST IPV6_PROTO IPV6_LEN DATA EXP_ICMP_CODE EXP_ICMP_CHKSUM # # Causes a packet to be received on INPORT of the hypervisor HV. The packet is an IPv6 -@@ -14365,16 +14533,17 @@ done +@@ -14365,16 +14635,17 @@ done OVN_POPULATE_ARP # allow some time for ovn-northd and ovn-controller to catch up. @@ -16736,7 +22832,7 @@ index 2e0bc9c53..bd59c0a77 100644 OVN_CHECK_PACKETS([hv2/vif2-tx.pcap], [vif2.expected]) OVN_CLEANUP([hv1], [hv2]) -@@ -14439,7 +14608,8 @@ ovs-vsctl -- add-port br-int hv2-vif1 -- \ +@@ -14439,7 +14710,8 @@ ovs-vsctl -- add-port br-int hv2-vif1 -- \ OVN_POPULATE_ARP @@ -16746,7 +22842,7 @@ index 2e0bc9c53..bd59c0a77 100644 packet="inport==\"sw1-p1\" && eth.src==$sw1_p1_mac && eth.dst==$sw1_ro_mac && ip4 && ip.ttl==64 && ip4.src==$sw1_p1_ip && ip4.dst==$sw2_p1_ip && -@@ -14632,6 +14802,8 @@ OVS_WAIT_UNTIL( +@@ -14632,6 +14904,8 @@ OVS_WAIT_UNTIL( logical_port=ls1-lp_ext1` test "$chassis" = "$hv1_uuid"]) @@ -16755,7 +22851,7 @@ index 2e0bc9c53..bd59c0a77 100644 # There should be DHCPv4/v6 OF flows for the ls1-lp_ext1 port in hv1 (ovn-sbctl dump-flows lr0; ovn-sbctl dump-flows ls1) > sbflows as hv1 ovs-ofctl dump-flows br-int > brintflows -@@ -14912,6 +15084,7 @@ OVS_WAIT_UNTIL( +@@ -14912,6 +15186,7 @@ OVS_WAIT_UNTIL( [chassis=`ovn-sbctl --bare --columns chassis find port_binding \ logical_port=ls1-lp_ext1` test "$chassis" = "$hv2_uuid"]) @@ -16763,7 +22859,7 @@ index 2e0bc9c53..bd59c0a77 100644 # There should be OF flows for DHCP4/v6 for the ls1-lp_ext1 port in hv2 AT_CHECK([as hv2 ovs-ofctl dump-flows br-int | \ -@@ -15026,6 +15199,7 @@ OVS_WAIT_UNTIL( +@@ -15026,6 +15301,7 @@ OVS_WAIT_UNTIL( [chassis=`ovn-sbctl --bare --columns chassis find port_binding \ logical_port=ls1-lp_ext1` test "$chassis" = "$hv1_uuid"]) @@ -16771,7 +22867,7 @@ index 2e0bc9c53..bd59c0a77 100644 as hv1 ovs-vsctl show -@@ -15106,6 +15280,7 @@ OVS_WAIT_UNTIL( +@@ -15106,6 +15382,7 @@ OVS_WAIT_UNTIL( [chassis=`ovn-sbctl --bare --columns chassis find port_binding \ logical_port=ls1-lp_ext1` test "$chassis" = "$hv3_uuid"]) @@ -16779,7 +22875,7 @@ index 2e0bc9c53..bd59c0a77 100644 as hv1 ovs-vsctl show -@@ -15190,11 +15365,12 @@ OVS_WAIT_UNTIL( +@@ -15190,11 +15467,12 @@ OVS_WAIT_UNTIL( [chassis=`ovn-sbctl --bare --columns chassis find port_binding \ logical_port=ls1-lp_ext1` test "$chassis" = "$hv1_uuid"]) @@ -16789,11 +22885,11 @@ index 2e0bc9c53..bd59c0a77 100644 # to router mac. AT_CHECK([as hv2 ovs-ofctl dump-flows br-int \ -table=28,dl_src=f0:00:00:00:00:03,dl_dst=a0:10:00:00:00:01 | \ -+table=30,dl_src=f0:00:00:00:00:03,dl_dst=a0:10:00:00:00:01 | \ ++table=29,dl_src=f0:00:00:00:00:03,dl_dst=a0:10:00:00:00:01 | \ grep -c "actions=drop"], [0], [1 ]) -@@ -15207,6 +15383,7 @@ OVS_WAIT_UNTIL( +@@ -15207,6 +15485,7 @@ OVS_WAIT_UNTIL( [chassis=`ovn-sbctl --bare --columns chassis find port_binding \ logical_port=ls1-lp_ext1` test "$chassis" = "$hv2_uuid"]) @@ -16801,7 +22897,7 @@ index 2e0bc9c53..bd59c0a77 100644 as hv1 OVS_APP_EXIT_AND_WAIT([ovs-vswitchd]) -@@ -15357,7 +15534,8 @@ ovs-vsctl -- add-port br-int hv2-vif1 -- \ +@@ -15357,7 +15636,8 @@ ovs-vsctl -- add-port br-int hv2-vif1 -- \ OVN_POPULATE_ARP @@ -16811,7 +22907,7 @@ index 2e0bc9c53..bd59c0a77 100644 packet="inport==\"sw1-p1\" && eth.src==$sw1_p1_mac && eth.dst==$sw1_ro_mac && ip4 && ip.ttl==64 && ip4.src==$sw1_p1_ip && ip4.dst==$sw2_p1_ip && -@@ -15640,6 +15818,7 @@ test_ip6_packet_larger() { +@@ -15640,6 +15920,7 @@ test_ip6_packet_larger() { fi } @@ -16819,7 +22915,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=hv sync ovn-nbctl show > nbdump -@@ -15789,6 +15968,7 @@ ovn-nbctl lsp-add sw1 rp-sw1 -- set Logical_Switch_Port rp-sw1 \ +@@ -15789,6 +16070,7 @@ ovn-nbctl lsp-add sw1 rp-sw1 -- set Logical_Switch_Port rp-sw1 \ ovn-nbctl lsp-add sw0 sw0-p0 \ -- lsp-set-addresses sw0-p0 "f0:00:00:01:02:03 192.168.1.2 2001::2" @@ -16827,7 +22923,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl lsp-add sw0 sw0-p1 \ -- lsp-set-addresses sw0-p1 "f0:00:00:11:02:03 192.168.1.3 2001::3" -@@ -15799,6 +15979,7 @@ ovn-nbctl lr-nat-add lr0 snat 172.16.1.1 192.168.1.0/24 +@@ -15799,6 +16081,7 @@ ovn-nbctl lr-nat-add lr0 snat 172.16.1.1 192.168.1.0/24 ovn-nbctl lr-nat-add lr0 snat 2002::1 2001::/64 OVN_POPULATE_ARP @@ -16835,7 +22931,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=hv sync ovn-sbctl dump-flows > sbflows -@@ -15847,6 +16028,14 @@ ovn-nbctl --wait=hv sync +@@ -15847,6 +16130,14 @@ ovn-nbctl --wait=hv sync ovn-sbctl dump-flows > sbflows2 AT_CAPTURE_FILE([sbflows2]) @@ -16850,7 +22946,7 @@ index 2e0bc9c53..bd59c0a77 100644 dst_ip=$(ip_to_hex 172 16 2 10) fip_ip=$(ip_to_hex 172 16 1 2) src_ip=$(ip_to_hex 192 168 1 3) -@@ -15857,6 +16046,8 @@ echo $(get_arp_req f00000010204 $fip_ip $gw_router_ip) >> expected +@@ -15857,6 +16148,8 @@ echo $(get_arp_req f00000010204 $fip_ip $gw_router_ip) >> expected send_arp_reply 2 1 $gw_router_mac f00000010204 $gw_router_ip $fip_ip echo "${gw_router_mac}f0000001020408004500001c00004000fe0121b4${fip_ip}${dst_ip}${data}" >> expected @@ -16859,7 +22955,7 @@ index 2e0bc9c53..bd59c0a77 100644 OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected]) OVN_CLEANUP([hv1],[hv2]) -@@ -16045,6 +16236,7 @@ for i in 1 2 3 4 5; do +@@ -16045,6 +16338,7 @@ for i in 1 2 3 4 5; do done dnl Wait for the changes to be propagated @@ -16867,7 +22963,7 @@ index 2e0bc9c53..bd59c0a77 100644 check ovn-nbctl --wait=hv sync dnl Assert that each Chassis has a tunnel formed to every other Chassis -@@ -16324,6 +16516,7 @@ ovn-nbctl lrp-add router router-to-ls2 00:00:01:01:02:05 192.168.2.3/24 +@@ -16324,6 +16618,7 @@ ovn-nbctl lrp-add router router-to-ls2 00:00:01:01:02:05 192.168.2.3/24 ovn-nbctl lsp-add ls1 ls1-to-router -- set Logical_Switch_Port ls1-to-router type=router options:router-port=router-to-ls1 -- lsp-set-addresses ls1-to-router router ovn-nbctl lsp-add ls2 ls2-to-router -- set Logical_Switch_Port ls2-to-router type=router options:router-port=router-to-ls2 -- lsp-set-addresses ls2-to-router router @@ -16875,15 +22971,147 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=sb sync #ovn-sbctl dump-flows -@@ -16500,6 +16693,7 @@ ovn-nbctl lsp-set-type sw0-vir virtual - ovn-nbctl set logical_switch_port sw0-vir options:virtual-ip=10.0.0.10 - ovn-nbctl set logical_switch_port sw0-vir options:virtual-parents=sw0-p1,sw0-p2,sw0-p3 +@@ -16449,57 +16744,69 @@ ovs-vsctl -- add-port br-int hv2-vif2 -- \ + + ovn-nbctl ls-add sw0 + +-ovn-nbctl lsp-add sw0 sw0-vir +-ovn-nbctl lsp-set-addresses sw0-vir "50:54:00:00:00:10 10.0.0.10" +-ovn-nbctl lsp-set-port-security sw0-vir "50:54:00:00:00:10 10.0.0.10" +-ovn-nbctl lsp-set-type sw0-vir virtual +-ovn-nbctl set logical_switch_port sw0-vir options:virtual-ip=10.0.0.10 +-ovn-nbctl set logical_switch_port sw0-vir options:virtual-parents=sw0-p1,sw0-p2,sw0-p3 ++check ovn-nbctl lsp-add sw0 sw0-vir ++check ovn-nbctl lsp-set-addresses sw0-vir "50:54:00:00:00:10 10.0.0.10" ++check ovn-nbctl lsp-set-port-security sw0-vir "50:54:00:00:00:10 10.0.0.10" ++check ovn-nbctl lsp-set-type sw0-vir virtual ++check ovn-nbctl set logical_switch_port sw0-vir options:virtual-ip=10.0.0.10 ++check ovn-nbctl set logical_switch_port sw0-vir options:virtual-parents=sw0-p1,sw0-p2,sw0-p3 + +-ovn-nbctl lsp-add sw0 sw0-p1 +-ovn-nbctl lsp-set-addresses sw0-p1 "50:54:00:00:00:03 10.0.0.3" +-ovn-nbctl lsp-set-port-security sw0-p1 "50:54:00:00:00:03 10.0.0.3 10.0.0.10" ++check ovn-nbctl lsp-add sw0 sw0-p1 ++check ovn-nbctl lsp-set-addresses sw0-p1 "50:54:00:00:00:03 10.0.0.3" ++check ovn-nbctl lsp-set-port-security sw0-p1 "50:54:00:00:00:03 10.0.0.3 10.0.0.10" + +-ovn-nbctl lsp-add sw0 sw0-p2 +-ovn-nbctl lsp-set-addresses sw0-p2 "50:54:00:00:00:04 10.0.0.4" +-ovn-nbctl lsp-set-port-security sw0-p2 "50:54:00:00:00:04 10.0.0.4 10.0.0.10" ++check ovn-nbctl lsp-add sw0 sw0-p2 ++check ovn-nbctl lsp-set-addresses sw0-p2 "50:54:00:00:00:04 10.0.0.4" ++check ovn-nbctl lsp-set-port-security sw0-p2 "50:54:00:00:00:04 10.0.0.4 10.0.0.10" + +-ovn-nbctl lsp-add sw0 sw0-p3 +-ovn-nbctl lsp-set-addresses sw0-p3 "50:54:00:00:00:05 10.0.0.5" +-ovn-nbctl lsp-set-port-security sw0-p3 "50:54:00:00:00:05 10.0.0.5 10.0.0.10" ++check ovn-nbctl lsp-add sw0 sw0-p3 ++check ovn-nbctl lsp-set-addresses sw0-p3 "50:54:00:00:00:05 10.0.0.5" ++check ovn-nbctl lsp-set-port-security sw0-p3 "50:54:00:00:00:05 10.0.0.5 10.0.0.10" + + # Create the second logical switch with one port +-ovn-nbctl ls-add sw1 +-ovn-nbctl lsp-add sw1 sw1-p1 +-ovn-nbctl lsp-set-addresses sw1-p1 "40:54:00:00:00:03 20.0.0.3" +-ovn-nbctl lsp-set-port-security sw1-p1 "40:54:00:00:00:03 20.0.0.3" ++check ovn-nbctl ls-add sw1 ++check ovn-nbctl lsp-add sw1 sw1-p1 ++check ovn-nbctl lsp-set-addresses sw1-p1 "40:54:00:00:00:03 20.0.0.3" ++check ovn-nbctl lsp-set-port-security sw1-p1 "40:54:00:00:00:03 20.0.0.3" + + # Create a logical router and attach both logical switches +-ovn-nbctl lr-add lr0 +-ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 +-ovn-nbctl lsp-add sw0 sw0-lr0 +-ovn-nbctl lsp-set-type sw0-lr0 router +-ovn-nbctl lsp-set-addresses sw0-lr0 00:00:00:00:ff:01 +-ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 ++check ovn-nbctl lr-add lr0 ++check ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 ++check ovn-nbctl lsp-add sw0 sw0-lr0 ++check ovn-nbctl lsp-set-type sw0-lr0 router ++check ovn-nbctl lsp-set-addresses sw0-lr0 00:00:00:00:ff:01 ++check ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 + +-ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 +-ovn-nbctl lsp-add sw1 sw1-lr0 +-ovn-nbctl lsp-set-type sw1-lr0 router +-ovn-nbctl lsp-set-addresses sw1-lr0 00:00:00:00:ff:02 +-ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1 ++check ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 ++check ovn-nbctl lsp-add sw1 sw1-lr0 ++check ovn-nbctl lsp-set-type sw1-lr0 router ++check ovn-nbctl lsp-set-addresses sw1-lr0 00:00:00:00:ff:02 ++check ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1 + +-OVN_POPULATE_ARP ++# Add an ACL that matches on sw0-vir being bound locally. ++check ovn-nbctl acl-add sw0 to-lport 1000 'is_chassis_resident("sw0-vir") && ip' allow + +-# Delete sw0-vir and add again. +-ovn-nbctl lsp-del sw0-vir ++check ovn-nbctl ls-add public ++check ovn-nbctl lrp-add lr0 lr0-public 00:00:20:20:12:13 172.168.0.100/24 ++check ovn-nbctl lsp-add public public-lr0 ++check ovn-nbctl lsp-set-type public-lr0 router ++check ovn-nbctl lsp-set-addresses public-lr0 router ++check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public + +-ovn-nbctl lsp-add sw0 sw0-vir +-ovn-nbctl lsp-set-addresses sw0-vir "50:54:00:00:00:10 10.0.0.10" +-ovn-nbctl lsp-set-port-security sw0-vir "50:54:00:00:00:10 10.0.0.10" +-ovn-nbctl lsp-set-type sw0-vir virtual +-ovn-nbctl set logical_switch_port sw0-vir options:virtual-ip=10.0.0.10 +-ovn-nbctl set logical_switch_port sw0-vir options:virtual-parents=sw0-p1,sw0-p2,sw0-p3 ++# localnet port ++check ovn-nbctl lsp-add public ln-public ++check ovn-nbctl lsp-set-type ln-public localnet ++check ovn-nbctl lsp-set-addresses ln-public unknown ++check ovn-nbctl lsp-set-options ln-public network_name=public ++# schedule the gw router port to a chassis. Change the name of the chassis ++check ovn-nbctl --wait=hv lrp-set-gateway-chassis lr0-public hv1 20 ++ ++check ovn-nbctl lr-nat-add lr0 dnat_and_snat 172.168.0.50 10.0.0.10 sw0-vir 10:54:00:00:00:10 ++ ++OVN_POPULATE_ARP ++ +wait_for_ports_up ovn-nbctl --wait=hv sync # Check that logical flows are added for sw0-vir in lsp_in_arp_rsp pipeline -@@ -16555,12 +16749,10 @@ spa=$(ip_to_hex 10 0 0 10) +@@ -16547,6 +16854,30 @@ ovs-vsctl del-port hv1-vif3 + AT_CHECK([test x$(ovn-sbctl --bare --columns chassis find port_binding \ + logical_port=sw0-vir) = x], [0], []) + ++check_virtual_offlows_present() { ++ hv=$1 ++ ++ AT_CHECK([as $hv ovs-ofctl dump-flows br-int table=44 | ofctl_strip_all | grep "priority=2000"], [0], [dnl ++ table=44, priority=2000,ip,metadata=0x1 actions=resubmit(,45) ++ table=44, priority=2000,ipv6,metadata=0x1 actions=resubmit(,45) ++]) ++ ++ AT_CHECK([as $hv ovs-ofctl dump-flows br-int table=11 | ofctl_strip_all | \ ++ grep "priority=92" | grep 172.168.0.50], [0], [dnl ++ table=11, priority=92,arp,reg14=0x3,metadata=0x3,arp_tpa=172.168.0.50,arp_op=1 actions=move:NXM_OF_ETH_SRC[[]]->NXM_OF_ETH_DST[[]],mod_dl_src:10:54:00:00:00:10,load:0x2->NXM_OF_ARP_OP[[]],move:NXM_NX_ARP_SHA[[]]->NXM_NX_ARP_THA[[]],load:0x105400000010->NXM_NX_ARP_SHA[[]],push:NXM_OF_ARP_SPA[[]],push:NXM_OF_ARP_TPA[[]],pop:NXM_OF_ARP_SPA[[]],pop:NXM_OF_ARP_TPA[[]],move:NXM_NX_REG14[[]]->NXM_NX_REG15[[]],load:0x1->NXM_NX_REG10[[0]],resubmit(,37) ++]) ++} ++ ++check_virtual_offlows_not_present() { ++ hv=$1 ++ AT_CHECK([as $hv ovs-ofctl dump-flows br-int table=45 | ofctl_strip_all | grep "priority=2000"], [1], [dnl ++]) ++ ++ AT_CHECK([as $hv ovs-ofctl dump-flows br-int table=11 | ofctl_strip_all | \ ++ grep "priority=92" | grep 172.168.0.50], [1], [dnl ++]) ++} ++ + # From sw0-p0 send GARP for 10.0.0.10. hv1 should claim sw0-vir + # and sw0-p1 should be its virtual_parent. + eth_src=505400000003 +@@ -16555,12 +16886,10 @@ spa=$(ip_to_hex 10 0 0 10) tpa=$(ip_to_hex 10 0 0 10) send_garp 1 1 $eth_src $eth_dst $spa $tpa @@ -16900,41 +23128,153 @@ index 2e0bc9c53..bd59c0a77 100644 # There should be an arp resolve flow to resolve the virtual_ip with the # sw0-p1's MAC. -@@ -16578,6 +16770,8 @@ ovn-sbctl clear port_binding $pb_uuid virtual_parent +@@ -16570,6 +16899,13 @@ AT_CHECK([grep lr_in_arp_resolve lr0-flows2 | grep "reg0 == 10.0.0.10" | sed 's/ + table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:03; next;) + ]) + ++# hv1 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and ++# arp responder flow in lr0 pipeline. ++check_virtual_offlows_present hv1 ++ ++# hv2 should not have the above flows. ++check_virtual_offlows_not_present hv2 ++ + # Forcibly clear virtual_parent. ovn-controller should release the binding + # gracefully. + pb_uuid=$(ovn-sbctl --bare --columns _uuid find port_binding logical_port=sw0-vir) +@@ -16578,6 +16914,15 @@ ovn-sbctl clear port_binding $pb_uuid virtual_parent OVS_WAIT_UNTIL([test x$(ovn-sbctl --bare --columns chassis find port_binding \ logical_port=sw0-vir) = x]) +wait_row_count nb:Logical_Switch_Port 1 up=false name=sw0-vir + ++check ovn-nbctl --wait=hv sync ++# hv1 should remove the flow for the ACL with is_chassis_redirect check for sw0-vir. ++check_virtual_offlows_not_present hv1 ++ ++# hv2 should not have the flow for ACL. ++check_virtual_offlows_not_present hv2 ++ # From sw0-p0 resend GARP for 10.0.0.10. hv1 should reclaim sw0-vir # and sw0-p1 should be its virtual_parent. send_garp 1 1 $eth_src $eth_dst $spa $tpa -@@ -16588,6 +16782,8 @@ logical_port=sw0-vir) = x$hv1_ch_uuid], [0], []) +@@ -16588,6 +16933,60 @@ logical_port=sw0-vir) = x$hv1_ch_uuid], [0], []) AT_CHECK([test x$(ovn-sbctl --bare --columns virtual_parent find port_binding \ logical_port=sw0-vir) = xsw0-p1]) +wait_for_ports_up sw0-vir + ++check ovn-nbctl --wait=hv sync ++# hv1 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and ++# arp responder flow in lr0 pipeline. ++check_virtual_offlows_present hv1 ++ ++# hv2 should not have the above flows. ++check_virtual_offlows_not_present hv2 ++ ++# Release sw0-p1. ++as hv1 ovs-vsctl set interface hv1-vif1 external-ids:iface-id=sw0-px ++wait_column "false" nb:Logical_Switch_Port up name=sw0-p1 ++wait_column "false" nb:Logical_Switch_Port up name=sw0-vir ++ ++check ovn-nbctl --wait=hv sync ++# hv1 should remove the flow for the ACL with is_chassis_redirect check for sw0-vir and ++# arp responder flow in lr0 pipeline. ++check_virtual_offlows_not_present hv1 ++ ++# hv2 should not have the above flows. ++check_virtual_offlows_not_present hv2 ++ ++# Claim sw0-p1 again. ++as hv1 ovs-vsctl set interface hv1-vif1 external-ids:iface-id=sw0-p1 ++wait_for_ports_up sw0-p1 ++ ++# hv1 should not have the flow for the ACL with is_chassis_redirect check for sw0-vir and ++# arp responder flow in lr0 pipeline. ++check_virtual_offlows_not_present hv1 ++ ++# hv2 should not have the above flows. ++check_virtual_offlows_not_present hv2 ++ ++# From sw0-p0 send GARP for 10.0.0.10. hv1 should claim sw0-vir ++# and sw0-p1 should be its virtual_parent. ++eth_src=505400000003 ++eth_dst=ffffffffffff ++spa=$(ip_to_hex 10 0 0 10) ++tpa=$(ip_to_hex 10 0 0 10) ++send_garp 1 1 $eth_src $eth_dst $spa $tpa ++ ++wait_row_count Port_Binding 1 logical_port=sw0-vir chassis=$hv1_ch_uuid ++check_row_count Port_Binding 1 logical_port=sw0-vir virtual_parent=sw0-p1 ++wait_for_ports_up sw0-vir ++check ovn-nbctl --wait=hv sync ++ ++# hv1 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and ++# arp responder flow in lr0 pipeline. ++check_virtual_offlows_present hv1 ++ ++# hv2 should not have the above flows. ++check_virtual_offlows_not_present hv2 ++ # From sw0-p3 send GARP for 10.0.0.10. hv1 should claim sw0-vir # and sw0-p3 should be its virtual_parent. eth_src=505400000005 -@@ -16602,6 +16798,7 @@ logical_port=sw0-vir) = x$hv1_ch_uuid], [0], []) +@@ -16602,10 +17001,11 @@ logical_port=sw0-vir) = x$hv1_ch_uuid], [0], []) OVS_WAIT_UNTIL([test x$(ovn-sbctl --bare --columns virtual_parent find port_binding \ logical_port=sw0-vir) = xsw0-p3]) +wait_for_ports_up sw0-vir # There should be an arp resolve flow to resolve the virtual_ip with the - # sw0-p2's MAC. -@@ -16627,6 +16824,7 @@ logical_port=sw0-vir) = x$hv2_ch_uuid], [0], []) +-# sw0-p2's MAC. +-sleep 1 ++# sw0-p3's MAC. ++check ovn-nbctl --wait=hv sync + ovn-sbctl dump-flows lr0 > lr0-flows3 + AT_CAPTURE_FILE([lr0-flows3]) + cp ovn-sb/ovn-sb.db lr0-flows3.db +@@ -16613,6 +17013,13 @@ AT_CHECK([grep lr_in_arp_resolve lr0-flows3 | grep "reg0 == 10.0.0.10" | sed 's + table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:05; next;) + ]) + ++# hv1 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and ++# arp responder flow in lr0 pipeline. ++check_virtual_offlows_present hv1 ++ ++# hv2 should not have the above flows. ++check_virtual_offlows_not_present hv2 ++ + # send the garp from sw0-p2 (in hv2). hv2 should claim sw0-vir + # and sw0-p2 shpuld be its virtual_parent. + eth_src=505400000004 +@@ -16627,16 +17034,24 @@ logical_port=sw0-vir) = x$hv2_ch_uuid], [0], []) AT_CHECK([test x$(ovn-sbctl --bare --columns virtual_parent find port_binding \ logical_port=sw0-vir) = xsw0-p2]) +wait_for_ports_up sw0-vir # There should be an arp resolve flow to resolve the virtual_ip with the - # sw0-p3's MAC. -@@ -16652,6 +16850,8 @@ sleep 1 +-# sw0-p3's MAC. +-sleep 1 ++# sw0-p2's MAC. ++check ovn-nbctl --wait=hv sync + ovn-sbctl dump-flows lr0 > lr0-flows4 + AT_CAPTURE_FILE([lr0-flows4]) + AT_CHECK([grep lr_in_arp_resolve lr0-flows4 | grep "reg0 == 10.0.0.10" | sed 's/table=../table=??/'], [0], [dnl + table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:04; next;) + ]) + ++# hv2 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and ++# arp responder flow in lr0 pipeline. ++check_virtual_offlows_present hv2 ++ ++# hv1 should not have the above flows. ++check_virtual_offlows_not_present hv1 ++ + # Now send arp reply from sw0-p1. hv1 should claim sw0-vir + # and sw0-p1 shpuld be its virtual_parent. + eth_src=505400000003 +@@ -16652,12 +17067,22 @@ sleep 1 AT_CHECK([test x$(ovn-sbctl --bare --columns virtual_parent find port_binding \ logical_port=sw0-vir) = xsw0-p1]) @@ -16943,7 +23283,21 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-sbctl dump-flows lr0 > lr0-flows5 AT_CAPTURE_FILE([lr0-flows5]) AT_CHECK([grep lr_in_arp_resolve lr0-flows5 | grep "reg0 == 10.0.0.10" | sed 's/table=../table=??/'], [0], [dnl -@@ -16668,6 +16868,8 @@ sleep 1 + table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:03; next;) + ]) + ++check ovn-nbctl --wait=hv sync ++# hv1 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and ++# arp responder flow in lr0 pipeline. ++check_virtual_offlows_present hv1 ++ ++# hv2 should not have the above flows. ++check_virtual_offlows_not_present hv2 ++ + # Delete hv1-vif1 port. hv1 should release sw0-vir + as hv1 ovs-vsctl del-port hv1-vif1 + +@@ -16668,6 +17093,8 @@ sleep 1 AT_CHECK([test x$(ovn-sbctl --bare --columns virtual_parent find port_binding \ logical_port=sw0-vir) = x]) @@ -16952,7 +23306,23 @@ index 2e0bc9c53..bd59c0a77 100644 # Since the sw0-vir is not claimed by any chassis, eth.dst should be set to # zero if the ip4.dst is the virtual ip. ovn-sbctl dump-flows lr0 > lr0-flows6 -@@ -16691,6 +16893,8 @@ sleep 1 +@@ -16676,6 +17103,15 @@ AT_CHECK([grep lr_in_arp_resolve lr0-flows6 | grep "reg0 == 10.0.0.10" | sed 's/ + table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 00:00:00:00:00:00; next;) + ]) + ++check ovn-nbctl --wait=hv sync ++# hv1 should remove the flow for the ACL with is_chassis_redirect check for sw0-vir and ++# arp responder flow in lr0 pipeline. ++check_virtual_offlows_not_present hv1 ++ ++# hv2 should not have the above flows. ++check_virtual_offlows_not_present hv2 ++ ++ + # Now send arp reply from sw0-p2. hv2 should claim sw0-vir + # and sw0-p2 should be its virtual_parent. + eth_src=505400000004 +@@ -16691,12 +17127,22 @@ sleep 1 AT_CHECK([test x$(ovn-sbctl --bare --columns virtual_parent find port_binding \ logical_port=sw0-vir) = xsw0-p2]) @@ -16961,7 +23331,21 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-sbctl dump-flows lr0 > lr0-flows7 AT_CAPTURE_FILE([lr0-flows7]) AT_CHECK([grep lr_in_arp_resolve lr0-flows7 | grep "reg0 == 10.0.0.10" | sed 's/table=../table=??/'], [0], [dnl -@@ -16705,6 +16909,8 @@ logical_port=sw0-vir) = x], [0], []) + table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:04; next;) + ]) + ++check ovn-nbctl --wait=hv sync ++# hv2 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and ++# arp responder flow in lr0 pipeline. ++check_virtual_offlows_present hv2 ++ ++# hv1 should not have the above flows. ++check_virtual_offlows_not_present hv1 ++ + # Delete sw0-p2 logical port + ovn-nbctl lsp-del sw0-p2 + +@@ -16705,6 +17151,8 @@ logical_port=sw0-vir) = x], [0], []) AT_CHECK([test x$(ovn-sbctl --bare --columns virtual_parent find port_binding \ logical_port=sw0-vir) = x]) @@ -16970,7 +23354,61 @@ index 2e0bc9c53..bd59c0a77 100644 # Clear virtual_ip column of sw0-vir. There should be no bind_vport flows. ovn-nbctl --wait=hv remove logical_switch_port sw0-vir options virtual-ip -@@ -16807,22 +17013,22 @@ ovs-vsctl -- add-port br-int vif33 -- \ +@@ -16722,6 +17170,14 @@ AT_CHECK([grep ls_in_arp_rsp sw0-flows3 | grep bind_vport | sed 's/table=../tabl + table=??(ls_in_arp_rsp ), priority=100 , match=(inport == "sw0-p3" && ((arp.op == 1 && arp.spa == 10.0.0.10 && arp.tpa == 10.0.0.10) || (arp.op == 2 && arp.spa == 10.0.0.10))), action=(bind_vport("sw0-vir", inport); next;) + ]) + ++check ovn-nbctl --wait=hv sync ++# hv2 should remove the flow for the ACL with is_chassis_redirect check for sw0-vir and ++# arp responder flow in lr0 pipeline. ++check_virtual_offlows_not_present hv2 ++ ++# hv1 should not have the above flows. ++check_virtual_offlows_not_present hv2 ++ + ovn-nbctl --wait=hv remove logical_switch_port sw0-vir options virtual-parents + ovn-sbctl dump-flows sw0 > sw0-flows4 + AT_CAPTURE_FILE([sw0-flows4]) +@@ -16731,6 +17187,38 @@ ovn-sbctl dump-flows lr0 > lr0-flows8 + AT_CAPTURE_FILE([lr0-flows8]) + AT_CHECK([grep lr_in_arp_resolve lr0-flows8 | grep "reg0 == 10.0.0.10"], [1]) + ++# Delete sw0-vir and add again. ++ovn-nbctl lsp-del sw0-vir ++ ++ovn-nbctl lsp-add sw0 sw0-vir ++ovn-nbctl lsp-set-addresses sw0-vir "50:54:00:00:00:10 10.0.0.10" ++ovn-nbctl lsp-set-port-security sw0-vir "50:54:00:00:00:10 10.0.0.10" ++ovn-nbctl lsp-set-type sw0-vir virtual ++ovn-nbctl set logical_switch_port sw0-vir options:virtual-ip=10.0.0.10 ++ovn-nbctl set logical_switch_port sw0-vir options:virtual-parents=sw0-p1,sw0-p2,sw0-p3 ++ ++ovn-nbctl --wait=hv sync ++ ++# Check that logical flows are added for sw0-vir in lsp_in_arp_rsp pipeline ++# with bind_vport action. ++ ++ovn-sbctl dump-flows sw0 > sw0-flows ++AT_CAPTURE_FILE([sw0-flows]) ++ ++AT_CHECK([grep ls_in_arp_rsp sw0-flows | grep bind_vport | sed 's/table=../table=??/' | sort], [0], [dnl ++ table=??(ls_in_arp_rsp ), priority=100 , match=(inport == "sw0-p1" && ((arp.op == 1 && arp.spa == 10.0.0.10 && arp.tpa == 10.0.0.10) || (arp.op == 2 && arp.spa == 10.0.0.10))), action=(bind_vport("sw0-vir", inport); next;) ++ table=??(ls_in_arp_rsp ), priority=100 , match=(inport == "sw0-p3" && ((arp.op == 1 && arp.spa == 10.0.0.10 && arp.tpa == 10.0.0.10) || (arp.op == 2 && arp.spa == 10.0.0.10))), action=(bind_vport("sw0-vir", inport); next;) ++]) ++ ++ovn-sbctl dump-flows lr0 > lr0-flows ++AT_CAPTURE_FILE([lr0-flows]) ++ ++# Since the sw0-vir is not claimed by any chassis, eth.dst should be set to ++# zero if the ip4.dst is the virtual ip in the router pipeline. ++AT_CHECK([grep lr_in_arp_resolve lr0-flows | grep "reg0 == 10.0.0.10" | sed 's/table=../table=??/'], [0], [dnl ++ table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 00:00:00:00:00:00; next;) ++]) ++ + OVN_CLEANUP([hv1], [hv2]) + AT_CLEANUP + +@@ -16807,22 +17295,22 @@ ovs-vsctl -- add-port br-int vif33 -- \ options:rxq_pcap=hv$i/vif33-rx.pcap \ ofport-request=33 @@ -16997,7 +23435,7 @@ index 2e0bc9c53..bd59c0a77 100644 check ovn-nbctl --wait=hv sync ovn-sbctl lflow-list > sbflows AT_CAPTURE_FILE([sbflows]) -@@ -16889,6 +17095,8 @@ AT_CHECK_UNQUOTED([ovn-sbctl get controller_event $uuid event_info:load_balancer +@@ -16889,6 +17377,8 @@ AT_CHECK_UNQUOTED([ovn-sbctl get controller_event $uuid event_info:load_balancer "$uuid_lb2" ]) @@ -17006,15 +23444,97 @@ index 2e0bc9c53..bd59c0a77 100644 OVN_CLEANUP([hv1], [hv2]) AT_CLEANUP -@@ -17159,6 +17367,7 @@ AT_CAPTURE_FILE([sbflows3]) - cp ovn-sb/ovn-sb.db ovn-sb3.db +@@ -17108,6 +17598,27 @@ check ovs-vsctl -- add-port br-int hv2-vif4 -- \ + ofport-request=1 + ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys + ++AT_CAPTURE_FILE([exp]) ++AT_CAPTURE_FILE([rcv]) ++check_packets() { ++ > exp ++ > rcv ++ if test "$1" = --uniq; then ++ sort="sort -u"; shift ++ else ++ sort=sort ++ fi ++ for tuple in "$@"; do ++ set $tuple; pcap=$1 type=$2 ++ echo "--- $pcap" | tee -a exp >> rcv ++ $sort "$type" >> exp ++ $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" $pcap | $sort >> rcv ++ echo | tee -a exp >> rcv ++ done ++ ++ $at_diff exp rcv >/dev/null ++} ++ + OVN_POPULATE_ARP + + # Enable IGMP snooping on sw1. +@@ -17124,21 +17635,16 @@ ovn-sbctl dump-flows > sbflows + AT_CAPTURE_FILE([expected]) + AT_CAPTURE_FILE([received]) + > expected +-> received +-for i in 1 2; do +- for j in 1 2; do +- pcap=hv$i/vif$j-tx.pcap +- echo "--- $pcap" | tee -a expected >> received +- $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" $pcap | sort >> received +- echo | tee -a expected >> received +- done +-done +-check $at_diff -F'^---' expected received ++OVS_WAIT_UNTIL( ++ [check_packets 'hv1/vif1-tx.pcap expected' \ ++ 'hv1/vif2-tx.pcap expected' \ ++ 'hv2/vif1-tx.pcap expected' \ ++ 'hv2/vif2-tx.pcap expected'], ++ [$at_diff -F'^---' exp rcv]) + + check ovn-nbctl --wait=hv sync + + AT_CAPTURE_FILE([sbflows2]) +-cp ovn-sb/ovn-sb.db ovn-sb2.db + ovn-sbctl dump-flows > sbflows2 + + # Inject IGMP Join for 239.0.1.68 on sw1-p11. +@@ -17156,9 +17662,9 @@ wait_row_count IGMP_Group 2 address=239.0.1.68 + check ovn-nbctl --wait=hv sync + + AT_CAPTURE_FILE([sbflows3]) +-cp ovn-sb/ovn-sb.db ovn-sb3.db ovn-sbctl dump-flows > sbflows3 +AS_BOX([IGMP traffic test 1]) # Send traffic and make sure it gets forwarded only on the two ports that # joined. > expected -@@ -17207,6 +17416,7 @@ send_igmp_v3_report hv1-vif1 hv1 \ +@@ -17172,22 +17678,6 @@ store_ip_multicast_pkt \ + $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 20 ca70 11 \ + e518e518000a3b3a0000 expected + +-AT_CAPTURE_FILE([exp]) +-AT_CAPTURE_FILE([rcv]) +-check_packets() { +- > exp +- > rcv +- for tuple in "$@"; do +- set $tuple; pcap=$1 type=$2 +- echo "--- $pcap" | tee -a exp >> rcv +- sort "$type" >> exp +- $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" $pcap | sort >> rcv +- echo | tee -a exp >> rcv +- done +- +- $at_diff exp rcv >/dev/null +-} +- + OVS_WAIT_UNTIL( + [check_packets 'hv1/vif1-tx.pcap expected' \ + 'hv2/vif1-tx.pcap expected' \ +@@ -17207,6 +17697,7 @@ send_igmp_v3_report hv1-vif1 hv1 \ wait_row_count IGMP_Group 1 address=239.0.1.68 check ovn-nbctl --wait=hv sync @@ -17022,7 +23542,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Send traffic and make sure it gets forwarded only on the port that joined. as hv1 reset_pcap_file hv1-vif1 hv1/vif1 as hv2 reset_pcap_file hv2-vif1 hv2/vif1 -@@ -17246,6 +17456,7 @@ send_igmp_v3_report hv1-vif1 hv1 \ +@@ -17246,6 +17737,7 @@ send_igmp_v3_report hv1-vif1 hv1 \ # Check that the IGMP Group is learned. wait_row_count IGMP_Group 1 address=224.0.0.42 @@ -17030,15 +23550,40 @@ index 2e0bc9c53..bd59c0a77 100644 # Send traffic and make sure it gets flooded to all ports. as hv1 reset_pcap_file hv1-vif1 hv1/vif1 as hv1 reset_pcap_file hv1-vif2 hv1/vif2 -@@ -17275,6 +17486,7 @@ check ovn-nbctl set Logical_Switch sw2 \ +@@ -17275,15 +17767,27 @@ check ovn-nbctl set Logical_Switch sw2 \ other_config:mcast_eth_src="00:00:00:00:02:fe" \ other_config:mcast_ip4_src="20.0.0.254" +-# Wait for 1 query interval (1 sec) and check that two queries are generated. +AS_BOX([IGMP traffic test 4]) - # Wait for 1 query interval (1 sec) and check that two queries are generated. ++# Check that multiple queries are generated over time. > expected store_igmp_v3_query 0000000002fe $(ip_to_hex 20 0 0 254) 84dd expected -@@ -17296,6 +17508,7 @@ check ovn-nbctl set Logical_Switch sw3 \ + store_igmp_v3_query 0000000002fe $(ip_to_hex 20 0 0 254) 84dd expected + +-OVS_WAIT_UNTIL( +- [check_packets 'hv1/vif3-tx.pcap expected' \ +- 'hv2/vif3-tx.pcap expected'], +- [$at_diff -F'^---' exp rcv]) ++for count in 1 2 3; do ++ as hv1 reset_pcap_file hv1-vif1 hv1/vif1 ++ as hv1 reset_pcap_file hv1-vif2 hv1/vif2 ++ as hv1 reset_pcap_file hv1-vif3 hv1/vif3 ++ as hv1 reset_pcap_file hv1-vif4 hv1/vif4 ++ as hv2 reset_pcap_file hv2-vif1 hv2/vif1 ++ as hv2 reset_pcap_file hv2-vif2 hv2/vif2 ++ as hv2 reset_pcap_file hv2-vif3 hv2/vif3 ++ as hv2 reset_pcap_file hv2-vif4 hv2/vif4 ++ OVS_WAIT_UNTIL( ++ [check_packets --uniq \ ++ 'hv1/vif3-tx.pcap expected' \ ++ 'hv2/vif3-tx.pcap expected'], ++ [$at_diff -F'^---' exp rcv]) ++done + + # Disable IGMP querier on sw2. + check ovn-nbctl set Logical_Switch sw2 \ +@@ -17296,6 +17800,7 @@ check ovn-nbctl set Logical_Switch sw3 \ check ovn-nbctl --wait=hv sync @@ -17046,7 +23591,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Send traffic from sw3 and make sure rtr doesn't relay it. > expected_empty -@@ -17345,6 +17558,7 @@ send_igmp_v3_report hv2-vif3 hv2 \ +@@ -17345,6 +17850,7 @@ send_igmp_v3_report hv2-vif3 hv2 \ wait_row_count IGMP_Group 2 address=239.0.1.68 check ovn-nbctl --wait=hv sync @@ -17054,7 +23599,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Send traffic from sw3 and make sure it is relayed by rtr. # to ports that joined. > expected_routed_sw1 -@@ -17394,6 +17608,7 @@ send_igmp_v3_report hv1-vif4 hv1 \ +@@ -17394,6 +17900,7 @@ send_igmp_v3_report hv1-vif4 hv1 \ wait_row_count IGMP_Group 3 address=239.0.1.68 check ovn-nbctl --wait=hv sync @@ -17062,7 +23607,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Send traffic from sw3 and make sure it is relayed by rtr # to ports that joined. > expected_routed_sw1 -@@ -17493,6 +17708,7 @@ send_igmp_v3_report hv1-vif2 hv1 \ +@@ -17493,6 +18000,7 @@ send_igmp_v3_report hv1-vif2 hv1 \ wait_row_count IGMP_Group 1 address=239.0.1.68 check ovn-nbctl --wait=hv sync @@ -17070,7 +23615,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Send traffic from sw1-p21 send_ip_multicast_pkt hv2-vif1 hv2 \ 000000000001 01005e000144 \ -@@ -17790,6 +18006,7 @@ check ovs-vsctl -- add-port br-int hv2-vif4 -- \ +@@ -17790,6 +18298,7 @@ check ovs-vsctl -- add-port br-int hv2-vif4 -- \ ofport-request=1 check ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys @@ -17078,7 +23623,7 @@ index 2e0bc9c53..bd59c0a77 100644 check ovn-nbctl --wait=hv sync AT_CAPTURE_FILE([sbflows]) -@@ -18470,6 +18687,7 @@ m4_define([DVR_N_S_ARP_HANDLING], +@@ -18470,6 +18979,7 @@ m4_define([DVR_N_S_ARP_HANDLING], # Set a hypervisor as gateway chassis, for router port 172.31.0.1 ovn-nbctl lrp-set-gateway-chassis router-to-underlay hv3 @@ -17086,7 +23631,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=sb sync wait_row_count Port_Binding 1 logical_port=cr-router-to-underlay -@@ -18689,6 +18907,7 @@ m4_define([DVR_N_S_PING], +@@ -18689,6 +19199,7 @@ m4_define([DVR_N_S_PING], ovn-nbctl lrp-set-gateway-chassis router-to-underlay hv3 ovn-nbctl lrp-set-redirect-type router-to-underlay bridged @@ -17094,7 +23639,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=sb sync -@@ -18816,7 +19035,7 @@ m4_define([DVR_N_S_PING], +@@ -18816,7 +19327,7 @@ m4_define([DVR_N_S_PING], OVN_CHECK_PACKETS_REMOVE_BROADCAST([hv4/vif-north-tx.pcap], [vif-north.expected]) # Confirm that packets did not go out via tunnel port. @@ -17103,7 +23648,7 @@ index 2e0bc9c53..bd59c0a77 100644 ]]) # Confirm that packet went out via localnet port -@@ -18919,6 +19138,7 @@ ovn-nbctl lsp-set-addresses sw1-lr0 00:00:00:00:ff:02 +@@ -18919,6 +19430,7 @@ ovn-nbctl lsp-set-addresses sw1-lr0 00:00:00:00:ff:02 ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1 OVN_POPULATE_ARP @@ -17111,7 +23656,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=hv sync as hv1 ovs-appctl -t ovn-controller vlog/set dbg -@@ -18945,7 +19165,8 @@ list mac_binding], [0], [lr0-sw0 +@@ -18945,7 +19457,8 @@ list mac_binding], [0], [lr0-sw0 50:54:00:00:00:03 ]) @@ -17121,7 +23666,7 @@ index 2e0bc9c53..bd59c0a77 100644 AT_CHECK([test 1 = `as hv1 ovs-ofctl dump-flows br-int table=10 | grep arp | \ grep controller | grep -v n_packets=0 | wc -l`]) -@@ -18962,7 +19183,8 @@ OVS_WAIT_UNTIL([test 1 = `as hv1 ovs-ofctl dump-flows br-int table=67 | grep n_p +@@ -18962,7 +19475,8 @@ OVS_WAIT_UNTIL([test 1 = `as hv1 ovs-ofctl dump-flows br-int table=67 | grep n_p # The packet should not be sent to ovn-controller. The packet # count should be 1 only. @@ -17131,7 +23676,7 @@ index 2e0bc9c53..bd59c0a77 100644 AT_CHECK([test 1 = `as hv1 ovs-ofctl dump-flows br-int table=10 | grep arp | \ grep controller | grep -v n_packets=0 | wc -l`]) -@@ -18975,7 +19197,8 @@ send_garp 1 1 $eth_src $eth_dst $spa $tpa +@@ -18975,7 +19489,8 @@ send_garp 1 1 $eth_src $eth_dst $spa $tpa # The garp packet should be sent to ovn-controller and the mac_binding entry # should be updated. @@ -17141,7 +23686,7 @@ index 2e0bc9c53..bd59c0a77 100644 check_row_count MAC_Binding 1 -@@ -19000,7 +19223,8 @@ send_garp 1 1 $eth_src $eth_dst $spa $tpa +@@ -19000,7 +19515,8 @@ send_garp 1 1 $eth_src $eth_dst $spa $tpa # The garp packet should be sent to ovn-controller and the mac_binding entry # should be updated. @@ -17151,7 +23696,7 @@ index 2e0bc9c53..bd59c0a77 100644 OVS_WAIT_UNTIL( [test 1 = `as hv1 ovs-ofctl dump-flows br-int table=67 | grep dl_src=50:54:00:00:00:33 \ -@@ -19021,7 +19245,8 @@ OVS_WAIT_UNTIL( +@@ -19021,7 +19537,8 @@ OVS_WAIT_UNTIL( | grep n_packets=1 | wc -l`] ) @@ -17161,7 +23706,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Now send ARP reply packet with IP - 10.0.0.40 and mac 505400000023 eth_src=505400000023 -@@ -19038,7 +19263,8 @@ send_arp_reply 1 1 $eth_src $eth_dst $spa $tpa +@@ -19038,7 +19555,8 @@ send_arp_reply 1 1 $eth_src $eth_dst $spa $tpa # The garp packet should be sent to ovn-controller and the mac_binding entry # should be updated. @@ -17171,7 +23716,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Wait for an entry in table=67 for the learnt mac_binding entry. -@@ -19054,7 +19280,8 @@ OVS_WAIT_UNTIL( +@@ -19054,7 +19572,8 @@ OVS_WAIT_UNTIL( | grep n_packets=1 | wc -l`] ) @@ -17181,7 +23726,7 @@ index 2e0bc9c53..bd59c0a77 100644 send_arp_reply 1 1 $eth_src $eth_dst $spa $tpa OVS_WAIT_UNTIL( -@@ -19062,7 +19289,8 @@ OVS_WAIT_UNTIL( +@@ -19062,7 +19581,8 @@ OVS_WAIT_UNTIL( | grep n_packets=2 | wc -l`] ) @@ -17191,7 +23736,7 @@ index 2e0bc9c53..bd59c0a77 100644 OVN_CLEANUP([hv1], [hv2]) AT_CLEANUP -@@ -19100,8 +19328,7 @@ ovn-nbctl lsp-add ls1 lp11 +@@ -19100,8 +19620,7 @@ ovn-nbctl lsp-add ls1 lp11 ovn-nbctl lsp-set-addresses lp11 "f0:00:00:00:00:11" ovn-nbctl lsp-set-port-security lp11 f0:00:00:00:00:11 @@ -17201,7 +23746,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=sb sync ovn-nbctl show -@@ -19270,6 +19497,7 @@ ovn-nbctl lrp-set-gateway-chassis router-to-underlay hv3 +@@ -19270,6 +19789,7 @@ ovn-nbctl lrp-set-gateway-chassis router-to-underlay hv3 ovn-nbctl --stateless lr-nat-add router dnat_and_snat 172.31.0.100 192.168.1.1 ovn-nbctl lrp-set-redirect-type router-to-underlay bridged @@ -17209,7 +23754,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=sb sync -@@ -19534,6 +19762,7 @@ check ovn-nbctl lsp-set-options ln-public network_name=public +@@ -19534,6 +20054,7 @@ check ovn-nbctl lsp-set-options ln-public network_name=public check ovn-nbctl --wait=hv lrp-set-gateway-chassis lr0-public hv1 20 OVN_POPULATE_ARP @@ -17217,16 +23762,23 @@ index 2e0bc9c53..bd59c0a77 100644 check ovn-nbctl --wait=hv sync wait_row_count Service_Monitor 2 -@@ -19542,7 +19771,7 @@ AT_CAPTURE_FILE([sbflows]) +@@ -19542,7 +20063,14 @@ AT_CAPTURE_FILE([sbflows]) OVS_WAIT_FOR_OUTPUT( [ovn-sbctl dump-flows > sbflows ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 | sed 's/table=..//'], 0, - [ (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80; hash_fields="ip_dst,ip_src,tcp_dst,tcp_src");) -+ [ (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb(backends=10.0.0.3:80,20.0.0.3:80; hash_fields="ip_dst,ip_src,tcp_dst,tcp_src");) ++ [dnl ++ (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4 && sctp), action=(reg1 = ip4.dst; reg2[[0..15]] = sctp.dst; ct_lb;) ++ (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4 && tcp), action=(reg1 = ip4.dst; reg2[[0..15]] = tcp.dst; ct_lb;) ++ (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4 && udp), action=(reg1 = ip4.dst; reg2[[0..15]] = udp.dst; ct_lb;) ++ (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip6 && sctp), action=(xxreg1 = ip6.dst; reg2[[0..15]] = sctp.dst; ct_lb;) ++ (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip6 && tcp), action=(xxreg1 = ip6.dst; reg2[[0..15]] = tcp.dst; ct_lb;) ++ (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip6 && udp), action=(xxreg1 = ip6.dst; reg2[[0..15]] = udp.dst; ct_lb;) ++ (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb(backends=10.0.0.3:80,20.0.0.3:80; hash_fields="ip_dst,ip_src,tcp_dst,tcp_src");) ]) AT_CAPTURE_FILE([sbflows2]) -@@ -19722,6 +19951,7 @@ ovn-nbctl lsp-set-options ln-public network_name=public +@@ -19722,6 +20250,7 @@ ovn-nbctl lsp-set-options ln-public network_name=public ovn-nbctl --wait=hv lrp-set-gateway-chassis lr0-public hv1 20 OVN_POPULATE_ARP @@ -17234,7 +23786,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=hv sync # And now for the anticlimax. We need to ensure that there is no -@@ -19861,6 +20091,7 @@ check ovs-vsctl -- add-port br-int hv1-vif2 -- \ +@@ -19861,6 +20390,7 @@ check ovs-vsctl -- add-port br-int hv1-vif2 -- \ ofport-request=3 OVN_POPULATE_ARP @@ -17242,7 +23794,7 @@ index 2e0bc9c53..bd59c0a77 100644 check ovn-nbctl --wait=hv sync ovn-sbctl dump-flows > sbflows -@@ -20216,6 +20447,7 @@ ovn-nbctl lsp-add lsw0 lp1 +@@ -20216,6 +20746,7 @@ ovn-nbctl lsp-add lsw0 lp1 ovn-nbctl lsp-set-addresses lp1 "f0:00:00:00:00:01 10.0.0.1" ovn-nbctl acl-add lsw0 from-lport 1000 'eth.type == 0x1234' drop @@ -17250,7 +23802,7 @@ index 2e0bc9c53..bd59c0a77 100644 check ovn-nbctl --wait=hv sync # Trace with --ovs should see ovs flow related to the ACL -@@ -20310,6 +20542,7 @@ for az in `seq 1 $n_az`; do +@@ -20310,6 +20841,7 @@ for az in `seq 1 $n_az`; do done check ovn-nbctl --wait=hv sync ovn-sbctl list Port_Binding > az$az.ports @@ -17258,7 +23810,7 @@ index 2e0bc9c53..bd59c0a77 100644 done # Pre-populate the hypervisors' ARP tables so that we don't lose any -@@ -20485,6 +20718,7 @@ ovs-vsctl -- add-port br-int hv1-vif3 -- \ +@@ -20485,6 +21017,7 @@ ovs-vsctl -- add-port br-int hv1-vif3 -- \ # wait for earlier changes to take effect check ovn-nbctl --wait=hv sync @@ -17266,7 +23818,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-sbctl dump-flows > sbflows AT_CAPTURE_FILE([sbflows]) -@@ -20672,8 +20906,9 @@ build_tcp_syn() { +@@ -20672,8 +21205,9 @@ build_tcp_syn() { send_ipv4_pkt() { local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 @@ -17278,7 +23830,7 @@ index 2e0bc9c53..bd59c0a77 100644 local hp_l4_payload=${11} local outfile=${12} -@@ -20681,8 +20916,10 @@ send_ipv4_pkt() { +@@ -20681,8 +21215,10 @@ send_ipv4_pkt() { local eth=${eth_dst}${eth_src}0800 local hp_eth=${eth_src}${eth_dst}0800 @@ -17291,7 +23843,7 @@ index 2e0bc9c53..bd59c0a77 100644 local packet=${eth}${ip}${l4_payload} local hp_packet=${hp_eth}${hp_ip}${hp_l4_payload} -@@ -20694,15 +20931,16 @@ send_ipv6_pkt() { +@@ -20694,15 +21230,16 @@ send_ipv6_pkt() { local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 local ip_src=$5 ip_dst=$6 ip_proto=$7 ip_len=$8 local l4_payload=$9 @@ -17311,7 +23863,7 @@ index 2e0bc9c53..bd59c0a77 100644 local packet=${eth}${ip}${l4_payload} local hp_packet=${hp_eth}${hp_ip}${hp_l4_payload} -@@ -20724,16 +20962,26 @@ ovs-vsctl -- add-port br-int hv1-vif1 -- \ +@@ -20724,16 +21261,26 @@ ovs-vsctl -- add-port br-int hv1-vif1 -- \ # One logical switch with IPv4 and IPv6 load balancers that hairpin the # traffic. @@ -17342,7 +23894,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl lr-add rtr ovn-nbctl lrp-add rtr rtr-sw 00:00:00:00:01:00 42.42.42.254/24 4200::00ff/64 -@@ -20743,67 +20991,332 @@ ovn-nbctl lsp-add sw sw-rtr \ +@@ -20743,59 +21290,324 @@ ovn-nbctl lsp-add sw sw-rtr \ -- lsp-set-options sw-rtr router-port=rtr-sw ovn-nbctl --wait=hv sync @@ -17375,12 +23927,15 @@ index 2e0bc9c53..bd59c0a77 100644 + $(ip_to_hex 42 42 42 1) $(ip_to_hex 88 88 88 89) \ + 06 0028 \ + ${tcp_payload} \ -+ $(ip_to_hex 88 88 88 89) ${hp_tcp_payload} \ -+ expected -+ -+# Check that traffic is hairpinned. -+OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) -+ ++ $(ip_to_hex 88 88 88 89) ${hp_tcp_payload} \ ++ expected + + # Check that traffic is hairpinned. + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) + +-# Inject IPv4 UDP packet from lsp. +-udp_payload=$(build_udp 84d0 0fc8 6666) +-hp_udp_payload=$(build_udp 84d0 07e5 6e49) +# Check learned hairpin reply flows. +OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [0], [dnl + table=69, tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] @@ -17395,8 +23950,10 @@ index 2e0bc9c53..bd59c0a77 100644 +# Inject IPv4 TCP packets from lsp. +tcp_payload=$(build_tcp_syn 84d0 1f90 05a7) +hp_tcp_payload=$(build_tcp_syn 84d0 0fc9 156f) -+send_ipv4_pkt hv1 hv1-vif1 000000000001 000000000100 \ -+ $(ip_to_hex 42 42 42 1) $(ip_to_hex 88 88 88 88) \ + send_ipv4_pkt hv1 hv1-vif1 000000000001 000000000100 \ + $(ip_to_hex 42 42 42 1) $(ip_to_hex 88 88 88 88) \ +- 11 001e 35f4 \ +- ${udp_payload} ${hp_udp_payload} \ + 06 0028 \ + ${tcp_payload} \ + $(ip_to_hex 88 88 88 87) ${hp_tcp_payload} \ @@ -17409,12 +23966,18 @@ index 2e0bc9c53..bd59c0a77 100644 + 06 0028 \ + ${tcp_payload} \ + $(ip_to_hex 88 88 88 89) ${hp_tcp_payload} \ -+ expected + expected # Check that traffic is hairpinned. OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) --# Inject IPv4 UDP packet from lsp. +-# Inject IPv6 TCP packet from lsp. +-tcp_payload=$(build_tcp_syn 84d0 1f90 3ff9) +-hp_tcp_payload=$(build_tcp_syn 84d0 0fc9 4fc0) +-send_ipv6_pkt hv1 hv1-vif1 000000000001 000000000100 \ +- 42000000000000000000000000000001 88000000000000000000000000000088 \ +- 06 0014 \ +- ${tcp_payload} ${hp_tcp_payload} \ +# Check learned hairpin reply flows. +OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [0], [dnl + table=69, tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] @@ -17424,12 +23987,10 @@ index 2e0bc9c53..bd59c0a77 100644 +AS_BOX([IPv4 UDP Hairpin]) + +# Inject IPv4 UDP packets from lsp. - udp_payload=$(build_udp 84d0 0fc8 6666) - hp_udp_payload=$(build_udp 84d0 07e5 6e49) - send_ipv4_pkt hv1 hv1-vif1 000000000001 000000000100 \ - $(ip_to_hex 42 42 42 1) $(ip_to_hex 88 88 88 88) \ -- 11 001e 35f4 \ -- ${udp_payload} ${hp_udp_payload} \ ++udp_payload=$(build_udp 84d0 0fc8 6666) ++hp_udp_payload=$(build_udp 84d0 07e5 6e49) ++send_ipv4_pkt hv1 hv1-vif1 000000000001 000000000100 \ ++ $(ip_to_hex 42 42 42 1) $(ip_to_hex 88 88 88 88) \ + 11 001e \ + ${udp_payload} \ + $(ip_to_hex 88 88 88 88) ${hp_udp_payload} \ @@ -17442,11 +24003,12 @@ index 2e0bc9c53..bd59c0a77 100644 + 11 001e \ + ${udp_payload} \ + $(ip_to_hex 88 88 88 89) ${hp_udp_payload} \ -+ expected -+ -+# Check that traffic is hairpinned. -+OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) -+ + expected + + # Check that traffic is hairpinned. + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) + +-# Inject IPv6 UDP packet from lsp. +# Check learned hairpin reply flows. +OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [0], [dnl + table=69, tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] @@ -17476,12 +24038,11 @@ index 2e0bc9c53..bd59c0a77 100644 + 11 001e \ + ${udp_payload} \ + $(ip_to_hex 88 88 88 89) ${hp_udp_payload} \ - expected - - # Check that traffic is hairpinned. - OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) - --# Inject IPv6 TCP packet from lsp. ++ expected ++ ++# Check that traffic is hairpinned. ++OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) ++ +# Check learned hairpin reply flows. +OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [0], [dnl + table=69, tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] @@ -17493,39 +24054,27 @@ index 2e0bc9c53..bd59c0a77 100644 +AS_BOX([IPv6 TCP Hairpin]) + +# Inject IPv6 TCP packets from lsp. - tcp_payload=$(build_tcp_syn 84d0 1f90 3ff9) - hp_tcp_payload=$(build_tcp_syn 84d0 0fc9 4fc0) - send_ipv6_pkt hv1 hv1-vif1 000000000001 000000000100 \ - 42000000000000000000000000000001 88000000000000000000000000000088 \ - 06 0014 \ -- ${tcp_payload} ${hp_tcp_payload} \ ++tcp_payload=$(build_tcp_syn 84d0 1f90 3ff9) ++hp_tcp_payload=$(build_tcp_syn 84d0 0fc9 4fc0) ++send_ipv6_pkt hv1 hv1-vif1 000000000001 000000000100 \ ++ 42000000000000000000000000000001 88000000000000000000000000000088 \ ++ 06 0014 \ + ${tcp_payload} \ + 88000000000000000000000000000088 ${hp_tcp_payload} \ - expected - --# Check that traffic is hairpinned. --OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) -- --# Inject IPv6 UDP packet from lsp. --udp_payload=$(build_udp 84d0 0fc8 a0b8) --hp_udp_payload=$(build_udp 84d0 07e5 a89b) ++ expected ++ +tcp_payload=$(build_tcp_syn 84d1 1f90 3ff7) +hp_tcp_payload=$(build_tcp_syn 84d1 0fc9 4fbe) - send_ipv6_pkt hv1 hv1-vif1 000000000001 000000000100 \ -- 42000000000000000000000000000001 88000000000000000000000000000088 \ -- 11 000a \ -- ${udp_payload} ${hp_udp_payload} \ ++send_ipv6_pkt hv1 hv1-vif1 000000000001 000000000100 \ + 42000000000000000000000000000001 88000000000000000000000000000089 \ + 06 0014 \ + ${tcp_payload} \ + 88000000000000000000000000000089 ${hp_tcp_payload} \ - expected - - # Check that traffic is hairpinned. - OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) - --OVN_CLEANUP([hv1]) --AT_CLEANUP ++ expected ++ ++# Check that traffic is hairpinned. ++OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) ++ +# Check learned hairpin reply flows. +OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [0], [dnl + table=69, tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] @@ -17535,16 +24084,12 @@ index 2e0bc9c53..bd59c0a77 100644 + table=69, udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] + table=69, udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.89,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] +]) - --AT_SETUP([ovn -- Big Load Balancer]) --ovn_start ++ +# Change LB Hairpin SNAT IP. +# Also flush conntrack to avoid reusing an existing entry. +as hv1 ovs-appctl dpctl/flush-conntrack +ovn-nbctl --wait=hv set load_balancer lb-ipv6-tcp options:hairpin_snat_ip="8800::0087" - --ovn-nbctl ls-add ls1 --ovn-nbctl lsp-add ls1 lsp1 ++ +# Inject IPv6 TCP packets from lsp. +tcp_payload=$(build_tcp_syn 84d0 1f90 3ff9) +hp_tcp_payload=$(build_tcp_syn 84d0 0fc9 4fc1) @@ -17580,11 +24125,12 @@ index 2e0bc9c53..bd59c0a77 100644 +AS_BOX([IPv6 UDP Hairpin]) + +# Inject IPv6 UDP packets from lsp. -+udp_payload=$(build_udp 84d0 0fc8 a0b8) -+hp_udp_payload=$(build_udp 84d0 07e5 a89b) -+send_ipv6_pkt hv1 hv1-vif1 000000000001 000000000100 \ -+ 42000000000000000000000000000001 88000000000000000000000000000088 \ -+ 11 000a \ + udp_payload=$(build_udp 84d0 0fc8 a0b8) + hp_udp_payload=$(build_udp 84d0 07e5 a89b) + send_ipv6_pkt hv1 hv1-vif1 000000000001 000000000100 \ + 42000000000000000000000000000001 88000000000000000000000000000088 \ + 11 000a \ +- ${udp_payload} ${hp_udp_payload} \ + ${udp_payload} \ + 88000000000000000000000000000088 ${hp_udp_payload} \ + expected @@ -17596,11 +24142,11 @@ index 2e0bc9c53..bd59c0a77 100644 + 11 000a \ + ${udp_payload} \ + 88000000000000000000000000000089 ${hp_udp_payload} \ -+ expected -+ -+# Check that traffic is hairpinned. -+OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) -+ + expected + + # Check that traffic is hairpinned. + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) + +# Check learned hairpin reply flows. +OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [0], [dnl + table=69, tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] @@ -17689,18 +24235,10 @@ index 2e0bc9c53..bd59c0a77 100644 +OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [1], [dnl +]) + -+OVN_CLEANUP([hv1]) -+AT_CLEANUP -+ -+AT_SETUP([ovn -- Big Load Balancer]) -+ovn_start -+ -+ovn-nbctl ls-add ls1 -+ovn-nbctl lsp-add ls1 lsp1 + OVN_CLEANUP([hv1]) + AT_CLEANUP - net_add n1 - sim_add hv1 -@@ -20936,6 +21449,7 @@ check ovn-nbctl lsp-set-options ln-public network_name=public +@@ -20936,6 +21748,7 @@ check ovn-nbctl lsp-set-options ln-public network_name=public check ovn-nbctl lrp-set-gateway-chassis lr0-public hv1 20 check ovn-nbctl lr-nat-add lr0 snat 172.168.0.100 10.0.0.0/24 check ovn-nbctl --wait=hv sync @@ -17708,7 +24246,7 @@ index 2e0bc9c53..bd59c0a77 100644 wait_row_count datapath_binding 1 external-ids:name=lr0 lr0_dp_uuid=$(ovn-sbctl --bare --columns _uuid list datapath_binding lr0) -@@ -21156,31 +21670,31 @@ AT_CHECK([ +@@ -21156,31 +21969,31 @@ AT_CHECK([ AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl table=12(lr_in_policy ), priority=1001 , dnl @@ -17745,7 +24283,7 @@ index 2e0bc9c53..bd59c0a77 100644 ]) OVN_CLEANUP([hv1]) -@@ -21602,22 +22116,22 @@ AT_CHECK([test ! -z $p1_zoneid]) +@@ -21602,22 +22415,22 @@ AT_CHECK([test ! -z $p1_zoneid]) p2_zoneid=$(as hv1 ovs-vsctl get bridge br-int external_ids:ct-zone-sw0-p2 | sed 's/"//g') AT_CHECK([test ! -z $p2_zoneid]) @@ -17773,7 +24311,7 @@ index 2e0bc9c53..bd59c0a77 100644 reg15=0x${p1_dpkey} | grep REG13 | wc -l) -eq 0]) p1_zoneid=$(as hv1 ovs-vsctl get bridge br-int external_ids:ct-zone-sw0-p1 | sed 's/"//g') -@@ -21629,16 +22143,16 @@ OVS_WAIT_UNTIL([test x$(ovn-nbctl lsp-get-up sw0-p1) = xup]) +@@ -21629,16 +22442,16 @@ OVS_WAIT_UNTIL([test x$(ovn-nbctl lsp-get-up sw0-p1) = xup]) p1_zoneid=$(as hv1 ovs-vsctl get bridge br-int external_ids:ct-zone-sw0-p1 | sed 's/"//g') AT_CHECK([test ! -z $p1_zoneid]) @@ -17793,7 +24331,7 @@ index 2e0bc9c53..bd59c0a77 100644 reg15=0x${p2_dpkey} | grep REG13 | wc -l) -eq 0]) p2_zoneid=$(as hv1 ovs-vsctl get bridge br-int external_ids:ct-zone-sw0-p2 | sed 's/"//g') -@@ -21646,7 +22160,7 @@ AT_CHECK([test -z $p2_zoneid]) +@@ -21646,7 +22459,7 @@ AT_CHECK([test -z $p2_zoneid]) ovn-nbctl lsp-del sw0-p1 @@ -17802,7 +24340,7 @@ index 2e0bc9c53..bd59c0a77 100644 reg15=0x${p1_dpkey} | grep REG13 | wc -l) -eq 0]) p1_zoneid=$(as hv1 ovs-vsctl get bridge br-int external_ids:ct-zone-sw0-p1 | sed 's/"//g') -@@ -21723,6 +22237,7 @@ check ovn-nbctl --policy="src-ip" lr-route-add DR 10.0.0.0/24 20.0.0.2 +@@ -21723,6 +22536,7 @@ check ovn-nbctl --policy="src-ip" lr-route-add DR 10.0.0.0/24 20.0.0.2 check ovn-nbctl --ecmp-symmetric-reply --policy="src-ip" lr-route-add GW 10.0.0.0/24 172.16.0.2 check ovn-nbctl --ecmp-symmetric-reply --policy="src-ip" lr-route-add GW 10.0.0.0/24 172.16.0.3 @@ -17810,7 +24348,7 @@ index 2e0bc9c53..bd59c0a77 100644 check ovn-nbctl --wait=hv sync # Ensure ECMP symmetric reply flows are not present on any hypervisor. -@@ -21753,26 +22268,25 @@ ovn-nbctl set Logical_Router $gw_uuid options:chassis=hv1 +@@ -21753,26 +22567,25 @@ ovn-nbctl set Logical_Router $gw_uuid options:chassis=hv1 ovn-nbctl --wait=hv sync # And ensure that ECMP symmetric reply flows are present only on hv1 @@ -17854,7 +24392,7 @@ index 2e0bc9c53..bd59c0a77 100644 ]) OVN_CLEANUP([hv1], [hv2]) -@@ -21856,6 +22370,7 @@ ovs-vsctl -- add-port br-int hv2-vif1 -- \ +@@ -21856,6 +22669,7 @@ ovs-vsctl -- add-port br-int hv2-vif1 -- \ # for ARP resolution). OVN_POPULATE_ARP @@ -17862,7 +24400,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=hv sync AT_CHECK([ovn-sbctl lflow-list | grep lr_in_arp_resolve | grep 10.0.0.1], [1], []) -@@ -21895,22 +22410,22 @@ as hv1 +@@ -21895,22 +22709,22 @@ as hv1 ovs-vsctl add-br br-phys ovn_attach n1 br-phys 192.168.0.1 @@ -17898,7 +24436,7 @@ index 2e0bc9c53..bd59c0a77 100644 as hv1 ovs-vsctl -- add-port br-int hv1-vif1 -- \ -@@ -21934,88 +22449,101 @@ ovs-vsctl -- add-port br-int hv1-vif4 -- \ +@@ -21934,93 +22748,186 @@ ovs-vsctl -- add-port br-int hv1-vif4 -- \ options:rxq_pcap=hv1/vif4-rx.pcap \ ofport-request=4 @@ -17916,10 +24454,60 @@ index 2e0bc9c53..bd59c0a77 100644 +check ovn-nbctl --wait=hv sync -OVS_WAIT_UNTIL([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id=2")]) +- +-# Add sw0-p3 to the port group pg0. The conj_id should be 2. +-ovn-nbctl pg-set-ports pg0 sw0-p1 sw0-p2 sw0-p3 +-OVS_WAIT_UNTIL([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id=2")]) +- +-# Add sw0p4 to the port group pg0. The conj_id should be 2. +-ovn-nbctl pg-set-ports pg0 sw0-p1 sw0-p2 sw0-p3 sw0-p4 +-OVS_WAIT_UNTIL([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id=2")]) +- +-# Add another ACL with conjunction. +-ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && udp.dst >= 80 && udp.dst <= 82" allow +-OVS_WAIT_UNTIL([test 2 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep tcp | grep -c "conj_id=2")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep udp | grep -c "conj_id=3")]) +- +-# Delete tcp ACL. +-ovn-nbctl acl-del pg0 to-lport 1002 "outport == @pg0 && ip4 && tcp.dst >= 80 && tcp.dst <= 82" +-OVS_WAIT_UNTIL([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep udp | grep -c "conj_id=3")]) +- +-# Add back the tcp ACL. +-ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && tcp.dst >= 80 && tcp.dst <= 82" allow +-OVS_WAIT_UNTIL([test 2 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep udp | grep -c "conj_id=3")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep tcp | grep -c "conj_id=4")]) +- +-ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && inport == @pg0 && ip4 && tcp.dst >= 84 && tcp.dst <= 86" allow +-OVS_WAIT_UNTIL([test 3 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep udp | grep -c "conj_id=3")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep tcp | grep -c "conj_id=4")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep tcp | grep -c "conj_id=5")]) +- +-ovn-nbctl clear port_group pg0 acls +-OVS_WAIT_UNTIL([test 0 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) +- +-ovn-nbctl --wait=hv acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && tcp.dst >= 80 && tcp.dst <= 82" allow +-ovn-nbctl --wait=hv acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && udp.dst >= 80 && udp.dst <= 82" allow +-OVS_WAIT_UNTIL([test 2 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep tcp | grep -c "conj_id=6")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep udp | grep -c "conj_id=7")]) +- +-# Flush the lflow cache. +-as hv1 ovn-appctl -t ovn-controller flush-lflow-cache +-OVS_WAIT_UNTIL([test 2 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id=2")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id=3")]) +- +-# Disable lflow caching. +# wait_conj_id_count COUNT ["ID COUNT [MATCH]"]... +# +# Waits until COUNT flows matching against conj_id appear in the -+# table 45 on hv1's br-int bridge. Makes the flows available in ++# table 44 on hv1's br-int bridge. Makes the flows available in +# "hv1flows", which will be logged on error. +# +# In addition, for each quoted "ID COUNT" or "ID COUNT MATCH", @@ -17936,7 +24524,7 @@ index 2e0bc9c53..bd59c0a77 100644 + echo "waiting for $1 conj_id flows..." + OVS_WAIT_FOR_OUTPUT_UNQUOTED( + [ovs-ofctl dump-flows br-int > hv1flows -+ grep table=45 hv1flows | grep -c conj_id], ++ grep table=44 hv1flows | grep -c conj_id], + [$retval], [$1 +]) + @@ -17945,112 +24533,74 @@ index 2e0bc9c53..bd59c0a77 100644 + set -- $arg; id=$1 count=$2 match=$3 + echo "checking that there are $count ${match:+$match }flows with conj_id=$id..." + AT_CHECK_UNQUOTED( -+ [grep table=45 hv1flows | grep "$match" | grep -c conj_id=$id], ++ [grep table=44 hv1flows | grep "$match" | grep -c conj_id=$id], + [0], [$count +]) + done +} --# Add sw0-p3 to the port group pg0. The conj_id should be 2. --ovn-nbctl pg-set-ports pg0 sw0-p1 sw0-p2 sw0-p3 --OVS_WAIT_UNTIL([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) --AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id=2")]) +-as hv1 ovs-vsctl set open . external_ids:ovn-enable-lflow-cache=false +AS_BOX([Add sw0-p3 to the port group pg0. The conj_id should be 2.]) +check ovn-nbctl --wait=hv pg-set-ports pg0 sw0-p1 sw0-p2 sw0-p3 +wait_conj_id_count 1 "2 1" --# Add sw0p4 to the port group pg0. The conj_id should be 2. --ovn-nbctl pg-set-ports pg0 sw0-p1 sw0-p2 sw0-p3 sw0-p4 --OVS_WAIT_UNTIL([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) --AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id=2")]) +-# Wait until ovn-enble-lflow-cache is processed by ovn-controller. +-OVS_WAIT_UNTIL([ +- test $(ovn-sbctl get chassis hv1 other_config:ovn-enable-lflow-cache) = '"false"' +-]) +AS_BOX([Add sw0p4 to the port group pg0. The conj_id should be 2.]) +check ovn-nbctl --wait=hv pg-set-ports pg0 sw0-p1 sw0-p2 sw0-p3 sw0-p4 +wait_conj_id_count 1 "2 1" --# Add another ACL with conjunction. --ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && udp.dst >= 80 && udp.dst <= 82" allow --OVS_WAIT_UNTIL([test 2 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) --AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep tcp | grep -c "conj_id=2")]) --AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep udp | grep -c "conj_id=3")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id=2")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id=3")]) +AS_BOX([Add another ACL with conjunction.]) +check ovn-nbctl --wait=hv acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && udp.dst >= 80 && udp.dst <= 82" allow +wait_conj_id_count 2 "2 1 tcp" "3 1 udp" --# Delete tcp ACL. --ovn-nbctl acl-del pg0 to-lport 1002 "outport == @pg0 && ip4 && tcp.dst >= 80 && tcp.dst <= 82" --OVS_WAIT_UNTIL([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) --AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep udp | grep -c "conj_id=3")]) +-# Remove port sw0-p4 from port group. +-ovn-nbctl pg-set-ports pg0 sw0-p1 sw0-p2 sw0-p3 +-OVS_WAIT_UNTIL([test 2 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id=4")]) +-AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id=5")]) +AS_BOX([Delete tcp ACL.]) +check ovn-nbctl --wait=hv acl-del pg0 to-lport 1002 "outport == @pg0 && ip4 && tcp.dst >= 80 && tcp.dst <= 82" +wait_conj_id_count 1 "3 1 udp" --# Add back the tcp ACL. --ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && tcp.dst >= 80 && tcp.dst <= 82" allow --OVS_WAIT_UNTIL([test 2 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) +AS_BOX([Add back the tcp ACL.]) +check ovn-nbctl --wait=hv acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && tcp.dst >= 80 && tcp.dst <= 82" allow +wait_conj_id_count 2 "3 1 udp" "4 1 tcp" - AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep udp | grep -c "conj_id=3")]) - AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep tcp | grep -c "conj_id=4")]) - --ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && inport == @pg0 && ip4 && tcp.dst >= 84 && tcp.dst <= 86" allow --OVS_WAIT_UNTIL([test 3 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) --AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep udp | grep -c "conj_id=3")]) --AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep tcp | grep -c "conj_id=4")]) --AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep tcp | grep -c "conj_id=5")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep udp | grep -c "conj_id=3")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep tcp | grep -c "conj_id=4")]) ++ +AS_BOX([Add another tcp ACL.]) +check ovn-nbctl --wait=hv acl-add pg0 to-lport 1002 "outport == @pg0 && inport == @pg0 && ip4 && tcp.dst >= 84 && tcp.dst <= 86" allow +wait_conj_id_count 3 "3 1 udp" "4 1 tcp" "5 1 tcp" - --ovn-nbctl clear port_group pg0 acls --OVS_WAIT_UNTIL([test 0 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) ++ +AS_BOX([Clear ACLs.]) +check ovn-nbctl --wait=hv clear port_group pg0 acls +wait_conj_id_count 0 - --ovn-nbctl --wait=hv acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && tcp.dst >= 80 && tcp.dst <= 82" allow --ovn-nbctl --wait=hv acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && udp.dst >= 80 && udp.dst <= 82" allow --OVS_WAIT_UNTIL([test 2 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) --AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep tcp | grep -c "conj_id=6")]) --AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep udp | grep -c "conj_id=7")]) ++ +AS_BOX([Add TCP ACL.]) +check ovn-nbctl --wait=hv acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && tcp.dst >= 80 && tcp.dst <= 82" allow +check ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && udp.dst >= 80 && udp.dst <= 82" allow +wait_conj_id_count 2 "6 1 tcp" "7 1 udp" - --# Flush the lflow cache. ++ +AS_BOX([Flush lflow cache.]) - as hv1 ovn-appctl -t ovn-controller flush-lflow-cache --OVS_WAIT_UNTIL([test 2 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) --AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id=2")]) --AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id=3")]) -- --# Disable lflow caching. ++as hv1 ovn-appctl -t ovn-controller lflow-cache/flush +wait_conj_id_count 2 "2 1" "3 1" - ++ +AS_BOX([Disable lflow caching.]) - as hv1 ovs-vsctl set open . external_ids:ovn-enable-lflow-cache=false - --# Wait until ovn-enble-lflow-cache is processed by ovn-controller. --OVS_WAIT_UNTIL([ -- test $(ovn-sbctl get chassis hv1 other_config:ovn-enable-lflow-cache) = '"false"' --]) -- --AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id=2")]) --AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id=3")]) ++as hv1 ovs-vsctl set open . external_ids:ovn-enable-lflow-cache=false ++ +AS_BOX([Wait until ovn-enble-lflow-cache is processed by ovn-controller.]) +wait_row_count Chassis 1 name=hv1 other_config:ovn-enable-lflow-cache=false +wait_conj_id_count 2 "2 1" "3 1" - --# Remove port sw0-p4 from port group. --ovn-nbctl pg-set-ports pg0 sw0-p1 sw0-p2 sw0-p3 --OVS_WAIT_UNTIL([test 2 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id")]) --AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id=4")]) --AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=45 | grep -c "conj_id=5")]) ++ +AS_BOX([Remove port sw0-p4 from port group.]) +check ovn-nbctl --wait=hv pg-set-ports pg0 sw0-p1 sw0-p2 sw0-p3 +wait_conj_id_count 2 "4 1" "5 1" - ++ +AS_BOX([Recompute.]) as hv1 ovn-appctl -t ovn-controller recompute @@ -18061,7 +24611,92 @@ index 2e0bc9c53..bd59c0a77 100644 OVN_CLEANUP([hv1]) -@@ -22131,6 +22659,77 @@ AT_CHECK_UNQUOTED([grep -c "output:4" offlows_table65_2.txt], [0], [dnl + AT_CLEANUP + ++AT_SETUP([ovn -- lflow cache operations]) ++ovn_start ++net_add n1 ++sim_add hv1 ++ ++as hv1 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.1 ++ ++as hv1 ++ovs-vsctl -- add-port br-int hv1-vif1 \ ++ -- set interface hv1-vif1 external-ids:iface-id=lsp1 \ ++ -- add-port br-int hv1-vif2 \ ++ -- set interface hv1-vif2 external-ids:iface-id=lsp2 ++ ++ovn-nbctl ls-add ls1 \ ++ -- lsp-add ls1 lsp1 \ ++ -- lsp-add ls1 lsp2 \ ++ -- pg-add pg1 lsp1 lsp2 \ ++ -- create Address_Set name=as1 addresses=\"10.0.0.1\",\"10.0.0.2\" ++check ovn-nbctl --wait=hv sync ++wait_for_ports_up lsp1 lsp2 ++ ++get_cache_count () { ++ local cache_name=$1 ++ as hv1 ovn-appctl -t ovn-controller lflow-cache/show-stats | grep ${cache_name} | awk '{ print $3 }' ++} ++ ++AS_BOX([Check matches caching]) ++conj_id_cnt=$(get_cache_count cache-conj-id) ++expr_cnt=$(get_cache_count cache-expr) ++matches_cnt=$(get_cache_count cache-matches) ++ ++check ovn-nbctl acl-add ls1 from-lport 1 '1' drop ++check ovn-nbctl --wait=hv sync ++ ++AT_CHECK([test "$conj_id_cnt" = "$(get_cache_count cache-conj-id)"], [0], []) ++AT_CHECK([test "$expr_cnt" = "$(get_cache_count cache-expr)"], [0], []) ++AT_CHECK([test "$(($matches_cnt + 1))" = "$(get_cache_count cache-matches)"], [0], []) ++ ++AS_BOX([Check expr caching for is_chassis_resident() matches]) ++conj_id_cnt=$(get_cache_count cache-conj-id) ++expr_cnt=$(get_cache_count cache-expr) ++matches_cnt=$(get_cache_count cache-matches) ++ ++check ovn-nbctl acl-add ls1 from-lport 1 'is_chassis_resident("lsp1")' drop ++check ovn-nbctl --wait=hv sync ++ ++AT_CHECK([test "$conj_id_cnt" = "$(get_cache_count cache-conj-id)"], [0], []) ++AT_CHECK([test "$(($expr_cnt + 1))" = "$(get_cache_count cache-expr)"], [0], []) ++AT_CHECK([test "$matches_cnt" = "$(get_cache_count cache-matches)"], [0], []) ++ ++AS_BOX([Check conj-id caching for conjunctive port group/address set matches]) ++conj_id_cnt=$(get_cache_count cache-conj-id) ++expr_cnt=$(get_cache_count cache-expr) ++matches_cnt=$(get_cache_count cache-matches) ++ ++check ovn-nbctl acl-add ls1 from-lport 1 'inport == @pg1 && outport == @pg1 && is_chassis_resident("lsp1")' drop ++check ovn-nbctl acl-add ls1 from-lport 1 'ip4.src == $as1 && ip4.dst == $as1 && is_chassis_resident("lsp1")' drop ++check ovn-nbctl --wait=hv sync ++ ++AT_CHECK([test "$(($conj_id_cnt + 2))" = "$(get_cache_count cache-conj-id)"], [0], []) ++AT_CHECK([test "$expr_cnt" = "$(get_cache_count cache-expr)"], [0], []) ++AT_CHECK([test "$matches_cnt" = "$(get_cache_count cache-matches)"], [0], []) ++ ++AS_BOX([Check no caching for non-conjunctive port group/address set matches]) ++conj_id_cnt=$(get_cache_count cache-conj-id) ++expr_cnt=$(get_cache_count cache-expr) ++matches_cnt=$(get_cache_count cache-matches) ++ ++check ovn-nbctl acl-add ls1 from-lport 1 'inport == @pg2 && outport == @pg2 && is_chassis_resident("lsp1")' drop ++check ovn-nbctl --wait=hv sync ++ ++AT_CHECK([test "$conj_id_cnt" = "$(get_cache_count cache-conj-id)"], [0], []) ++AT_CHECK([test "$expr_cnt" = "$(get_cache_count cache-expr)"], [0], []) ++AT_CHECK([test "$matches_cnt" = "$(get_cache_count cache-matches)"], [0], []) ++ ++OVN_CLEANUP([hv1]) ++AT_CLEANUP ++ + AT_SETUP([ovn -- Delete Port_Binding and OVS port Incremental Processing]) + ovn_start + +@@ -22131,6 +23038,77 @@ AT_CHECK_UNQUOTED([grep -c "output:4" offlows_table65_2.txt], [0], [dnl OVN_CLEANUP([hv1]) AT_CLEANUP @@ -18139,7 +24774,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Test dropping traffic destined to router owned IPs. AT_SETUP([ovn -- gateway router drop traffic for own IPs]) ovn_start -@@ -22145,7 +22744,8 @@ ovn-nbctl lsp-add s1 lsp-s1-r1 -- set Logical_Switch_Port lsp-s1-r1 type=router +@@ -22145,7 +23123,8 @@ ovn-nbctl lsp-add s1 lsp-s1-r1 -- set Logical_Switch_Port lsp-s1-r1 type=router # Create logical port p1 in s1 ovn-nbctl lsp-add s1 p1 \ @@ -18149,7 +24784,7 @@ index 2e0bc9c53..bd59c0a77 100644 # Create two hypervisor and create OVS ports corresponding to logical ports. net_add n1 -@@ -22165,6 +22765,7 @@ ovs-vsctl -- add-port br-int hv1-vif1 -- \ +@@ -22165,6 +23144,7 @@ ovs-vsctl -- add-port br-int hv1-vif1 -- \ # for ARP resolution). OVN_POPULATE_ARP @@ -18157,7 +24792,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=hv sync sw_key=$(ovn-sbctl --bare --columns tunnel_key list datapath_binding r1) -@@ -22208,7 +22809,7 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int | grep "actions=controller" | grep +@@ -22208,7 +23188,7 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int | grep "actions=controller" | grep ]) # The packet should've been dropped in the lr_in_arp_resolve stage. @@ -18166,7 +24801,7 @@ index 2e0bc9c53..bd59c0a77 100644 1 ]) -@@ -22281,6 +22882,7 @@ check test "$hvt2" -gt 0 +@@ -22281,6 +23261,7 @@ check test "$hvt2" -gt 0 # Then wait for 9 out of 10 sleep 1 check as hv3 ovn-appctl -t ovn-controller exit --restart @@ -18174,7 +24809,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=sb sync wait_row_count Chassis_Private 9 name!=hv3 nb_cfg=2 check_row_count Chassis_Private 1 name=hv3 nb_cfg=1 -@@ -22454,6 +23056,7 @@ ovn-nbctl set logical_router gw_router options:chassis=hv3 +@@ -22454,6 +23435,7 @@ ovn-nbctl set logical_router gw_router options:chassis=hv3 ovn-nbctl lr-nat-add gw_router snat 172.16.0.200 30.0.0.0/24 ovn-nbctl lr-nat-add gw_router snat 172.16.0.201 30.0.0.3 @@ -18182,7 +24817,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=hv sync # Create an interface in br-phys in hv2 and send ARP request for 172.16.0.100 -@@ -22643,6 +23246,7 @@ check ovn-nbctl acl-add ls1 to-lport 1001 \ +@@ -22643,6 +23625,7 @@ check ovn-nbctl acl-add ls1 to-lport 1001 \ check ovn-nbctl acl-add ls1 to-lport 1001 \ 'outport == "lsp1" && ip4 && ip4.src == {10.0.0.2, 10.0.0.3}' allow @@ -18190,7 +24825,7 @@ index 2e0bc9c53..bd59c0a77 100644 check ovn-nbctl --wait=hv sync sip=`ip_to_hex 10 0 0 2` -@@ -22811,6 +23415,7 @@ ovs-vsctl -- add-port br-int hv1-vif1 -- \ +@@ -22811,6 +23794,7 @@ ovs-vsctl -- add-port br-int hv1-vif1 -- \ options:rxq_pcap=hv1/vif1-rx.pcap \ ofport-request=1 @@ -18198,7 +24833,7 @@ index 2e0bc9c53..bd59c0a77 100644 ovn-nbctl --wait=hv sync # Expected conjunction flows: -@@ -22869,6 +23474,7 @@ as hv1 ovs-vsctl \ +@@ -22869,6 +23853,7 @@ as hv1 ovs-vsctl \ ovn-nbctl --wait=hv sync # hv1 ovn-controller should not bind sw0-p2. @@ -18206,7 +24841,7 @@ index 2e0bc9c53..bd59c0a77 100644 check_row_count Port_Binding 0 logical_port=sw0-p2 chassis=$c # Trigger recompute and sw0-p2 should not be claimed. -@@ -22976,93 +23582,79 @@ check ovn-nbctl lb-add lb-ipv4-udp 88.88.88.88:4040 42.42.42.1:2021 udp +@@ -22976,93 +23961,79 @@ check ovn-nbctl lb-add lb-ipv4-udp 88.88.88.88:4040 42.42.42.1:2021 udp check ovn-nbctl lb-add lb-ipv6-tcp [[8800::0088]]:8080 [[4200::1]]:4041 tcp check ovn-nbctl --wait=hv lb-add lb-ipv6-udp [[8800::0088]]:4040 [[4200::1]]:2021 udp @@ -18329,7 +24964,7 @@ index 2e0bc9c53..bd59c0a77 100644 ]) check ovn-nbctl lsp-add sw0 sw0-p2 -@@ -23070,184 +23662,159 @@ check ovn-nbctl lsp-add sw0 sw0-p2 +@@ -23070,184 +24041,159 @@ check ovn-nbctl lsp-add sw0 sw0-p2 OVS_WAIT_UNTIL([test x$(ovn-nbctl lsp-get-up sw0-p2) = xup]) OVS_WAIT_UNTIL( @@ -18601,7 +25236,7 @@ index 2e0bc9c53..bd59c0a77 100644 ]) check ovn-nbctl --wait=hv ls-lb-add sw1 lb-ipv6-udp -@@ -23255,65 +23822,115 @@ check ovn-nbctl --wait=hv ls-lb-add sw1 lb-ipv6-udp +@@ -23255,65 +24201,115 @@ check ovn-nbctl --wait=hv ls-lb-add sw1 lb-ipv6-udp # Number of hairpin flows shouldn't change as it doesn't depend on how many # datapaths the LB is applied. OVS_WAIT_UNTIL( @@ -18759,7 +25394,7 @@ index 2e0bc9c53..bd59c0a77 100644 as hv2 ovs-vsctl del-port hv2-vif1 OVS_WAIT_UNTIL([test x$(ovn-nbctl lsp-get-up sw0-p2) = xdown]) -@@ -23321,75 +23938,73 @@ OVS_WAIT_UNTIL([test x$(ovn-nbctl lsp-get-up sw0-p2) = xdown]) +@@ -23321,75 +24317,73 @@ OVS_WAIT_UNTIL([test x$(ovn-nbctl lsp-get-up sw0-p2) = xdown]) as hv2 ovn-appctl -t ovn-controller recompute OVS_WAIT_UNTIL( @@ -18858,7 +25493,7 @@ index 2e0bc9c53..bd59c0a77 100644 ) OVN_CLEANUP([hv1], [hv2]) -@@ -23541,3 +24156,680 @@ as ovn-nb +@@ -23541,3 +24535,1318 @@ as ovn-nb OVS_APP_EXIT_AND_WAIT([ovsdb-server]) AT_CLEANUP @@ -18938,6 +25573,14 @@ index 2e0bc9c53..bd59c0a77 100644 +wait_column "true" Port_Binding up logical_port=lsp1 +wait_column "true" nb:Logical_Switch_Port up name=lsp1 + ++AS_BOX([ovn-controller should set Port_Binding.up - to false when OVS port is released]) ++check ovs-vsctl remove Interface lsp1 external_ids iface-id ++check ovs-vsctl remove Interface lsp2 external_ids iface-id ++wait_column "false" Port_Binding up logical_port=lsp1 ++wait_column "false" Port_Binding up logical_port=lsp2 ++wait_column "false" Port_Binding up logical_port=lsp1 ++wait_column "false" nb:Logical_Switch_Port up name=lsp1 ++ +OVN_CLEANUP([hv1]) +AT_CLEANUP + @@ -19075,43 +25718,43 @@ index 2e0bc9c53..bd59c0a77 100644 +check ovn-nbctl --wait=hv sync + +# Check OVS flows are installed properly. -+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=45 | ofctl_strip_all | \ ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=44 | ofctl_strip_all | \ + grep "priority=2002" | grep conjunction | \ + sed 's/conjunction([[^)]]*)/conjunction()/g' | sort], [0], [dnl -+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x10/0xfff0 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x100/0xff00 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x1000/0xf000 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x2/0xfffe actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x20/0xffe0 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x200/0xfe00 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x2000/0xe000 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x4/0xfffc actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x40/0xffc0 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x400/0xfc00 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x4000/0xc000 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x8/0xfff8 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x80/0xff80 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x800/0xf800 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x8000/0x8000 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=1 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x100/0x100,reg15=0x3,metadata=0x1,nw_src=192.168.47.3 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x10/0xfff0 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x100/0xff00 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x1000/0xf000 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x2/0xfffe actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x20/0xffe0 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x200/0xfe00 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x2000/0xe000 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x4/0xfffc actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x40/0xffc0 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x400/0xfc00 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x4000/0xc000 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x8/0xfff8 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x80/0xff80 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x800/0xf800 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x8000/0x8000 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=1 actions=conjunction() -+ table=45, priority=2002,udp,reg0=0x80/0x80,reg15=0x3,metadata=0x1,nw_src=192.168.47.3 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x10/0xfff0 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x100/0xff00 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x1000/0xf000 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x2/0xfffe actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x20/0xffe0 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x200/0xfe00 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x2000/0xe000 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x4/0xfffc actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x40/0xffc0 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x400/0xfc00 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x4000/0xc000 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x8/0xfff8 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x80/0xff80 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x800/0xf800 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x8000/0x8000 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=1 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x100/0x100,reg15=0x3,metadata=0x1,nw_src=192.168.47.3 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x10/0xfff0 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x100/0xff00 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x1000/0xf000 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x2/0xfffe actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x20/0xffe0 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x200/0xfe00 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x2000/0xe000 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x4/0xfffc actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x40/0xffc0 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x400/0xfc00 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x4000/0xc000 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x8/0xfff8 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x80/0xff80 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x800/0xf800 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x8000/0x8000 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=1 actions=conjunction() ++ table=44, priority=2002,udp,reg0=0x80/0x80,reg15=0x3,metadata=0x1,nw_src=192.168.47.3 actions=conjunction() +]) + +OVN_CLEANUP([hv1]) @@ -19518,23 +26161,653 @@ index 2e0bc9c53..bd59c0a77 100644 +as hv1 ovs-ofctl dump-flows br-int table=71 > hv1_offlows_table71.txt +as hv2 ovs-ofctl dump-flows br-int table=71 > hv2_offlows_table71.txt + -+AT_CAPTURE_FILE([hv1_offlows_table71.txt]) -+AT_CAPTURE_FILE([hv2_offlows_table71.txt]) -+AT_CHECK([cat hv1_offlows_table71.txt | grep -v NXST], [1], [dnl ++AT_CAPTURE_FILE([hv1_offlows_table71.txt]) ++AT_CAPTURE_FILE([hv2_offlows_table71.txt]) ++AT_CHECK([cat hv1_offlows_table71.txt | grep -v NXST], [1], [dnl ++]) ++ ++AT_CHECK([cat hv2_offlows_table71.txt | grep -v NXST], [1], [dnl ++]) ++ ++as hv1 ovs-ofctl dump-flows br-int table=72 > hv1_offlows_table72.txt ++as hv2 ovs-ofctl dump-flows br-int table=72 > hv2_offlows_table72.txt ++ ++AT_CAPTURE_FILE([hv1_offlows_table72.txt]) ++AT_CAPTURE_FILE([hv2_offlows_table72.txt]) ++AT_CHECK([cat hv1_offlows_table72.txt | grep -v NXST], [1], [dnl ++]) ++ ++AT_CHECK([cat hv2_offlows_table72.txt | grep -v NXST], [1], [dnl ++]) ++ ++OVN_CLEANUP([hv1], [hv2]) ++AT_CLEANUP ++ ++AT_SETUP([ovn -- container port changed to normal port and then deleted]) ++ovn_start ++ ++net_add n1 ++ ++sim_add hv1 ++as hv1 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.1 ++ovs-vsctl -- add-port br-int vm1 ++ ++check ovn-nbctl ls-add ls ++check ovn-nbctl lsp-add ls vm1 ++check ovn-nbctl lsp-add ls vm-cont vm1 1 ++check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=vm1 ++ ++wait_for_ports_up ++ ++check as hv1 ovn-appctl -t ovn-controller debug/pause ++check ovn-nbctl clear logical_switch_port vm-cont parent_name ++check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=foo ++check ovn-nbctl lsp-del vm-cont ++check as hv1 ovn-appctl -t ovn-controller debug/resume ++ ++ovn-nbctl --wait=hv sync ++ ++# Make sure that ovn-controller has not asserted. ++AT_CHECK([kill -0 $(cat hv1/ovn-controller.pid)]) ++ ++wait_column "false" nb:Logical_Switch_Port up name=vm1 ++ ++check ovn-nbctl lsp-add ls vm-cont1 vm1 1 ++check ovn-nbctl lsp-add ls vm-cont2 vm1 2 ++ ++check ovn-nbctl --wait=sb lsp-del vm1 ++ ++check as hv1 ovn-appctl -t ovn-controller debug/pause ++check ovn-nbctl clear logical_switch_port vm-cont1 parent_name ++check ovn-nbctl clear logical_switch_port vm-cont2 parent_name ++ ++check as hv1 ovn-appctl -t ovn-controller debug/resume ++ ++check ovn-nbctl --wait=hv sync ++ ++# Make sure that ovn-controller has not crashed. ++AT_CHECK([kill -0 $(cat hv1/ovn-controller.pid)]) ++ ++check ovn-nbctl lsp-add ls vm1 ++check ovn-nbctl set logical_switch_port vm-cont1 parent_name=vm1 ++check ovn-nbctl --wait=sb set logical_switch_port vm-cont2 parent_name=vm1 ++check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=vm1 ++ ++wait_for_ports_up ++ ++check as hv1 ovn-appctl -t ovn-controller debug/pause ++check ovn-nbctl --wait=sb lsp-del vm1 ++check ovn-nbctl clear logical_switch_port vm-cont1 parent_name ++check ovn-nbctl --wait=sb clear logical_switch_port vm-cont2 parent_name ++check ovn-nbctl lsp-del vm-cont1 ++check ovn-nbctl --wait=sb lsp-del vm-cont2 ++check as hv1 ovn-appctl -t ovn-controller debug/resume ++ ++check ovn-nbctl --wait=hv sync ++ ++# Make sure that ovn-controller has not crashed. ++AT_CHECK([kill -0 $(cat hv1/ovn-controller.pid)]) ++ ++check ovn-nbctl lsp-add ls vm1 ++check ovn-nbctl lsp-add ls vm-cont1 vm1 1 ++check ovn-nbctl lsp-add ls vm-cont2 vm1 2 ++ ++wait_for_ports_up ++ ++check as hv1 ovn-appctl -t ovn-controller debug/pause ++check ovn-nbctl clear logical_switch_port vm-cont1 parent_name ++check ovn-nbctl --wait=sb clear logical_switch_port vm-cont2 parent_name ++check ovn-nbctl lsp-del vm-cont1 ++check ovn-nbctl lsp-del vm-cont2 ++check as hv1 ovn-appctl -t ovn-controller debug/resume ++ ++check ovn-nbctl --wait=hv sync ++ ++# Make sure that ovn-controller has not crashed. ++AT_CHECK([kill -0 $(cat hv1/ovn-controller.pid)]) ++ ++check ovn-nbctl lsp-add ls vm-cont1 vm1 1 ++check ovn-nbctl lsp-add ls vm-cont2 vm1 2 ++ ++wait_for_ports_up ++ ++check as hv1 ovn-appctl -t ovn-controller debug/pause ++check ovn-nbctl clear logical_switch_port vm-cont1 parent_name ++check ovn-nbctl --wait=sb clear logical_switch_port vm-cont2 parent_name ++ ++check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=foo ++ ++check as hv1 ovn-appctl -t ovn-controller debug/resume ++ ++wait_column "false" nb:Logical_Switch_Port up name=vm1 ++wait_column "false" nb:Logical_Switch_Port up name=vm-cont1 ++wait_column "false" nb:Logical_Switch_Port up name=vm-cont2 ++ ++check ovn-nbctl set logical_switch_port vm-cont1 parent_name=vm1 ++check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=vm1 ++check ovn-nbctl --wait=sb set logical_switch_port vm-cont2 parent_name=vm1 ++ ++wait_for_ports_up ++ ++check as hv1 ovn-appctl -t ovn-controller debug/pause ++check ovn-nbctl clear logical_switch_port vm-cont1 parent_name ++check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=vm-cont1 ++check as hv1 ovn-appctl -t ovn-controller debug/resume ++ ++wait_column "false" nb:Logical_Switch_Port up name=vm1 ++wait_column "true" nb:Logical_Switch_Port up name=vm-cont1 ++wait_column "false" nb:Logical_Switch_Port up name=vm-cont2 ++ ++check ovn-nbctl --wait=sb set logical_switch_port vm-cont2 parent_name=vm-cont1 ++check ovn-nbctl --wait=sb set logical_switch_port vm1 parent_name=vm-cont1 ++ ++wait_for_ports_up ++ ++# Delete vm1, vm-cont1 and vm-cont2 and recreate again. ++check ovn-nbctl lsp-del vm1 ++check ovn-nbctl lsp-del vm-cont1 ++check ovn-nbctl --wait=hv lsp-del vm-cont2 ++ ++check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=vm1 ++check ovn-nbctl lsp-add ls vm1 ++check ovn-nbctl lsp-add ls vm-cont1 vm1 1 ++check ovn-nbctl lsp-add ls vm-cont2 vm1 2 ++ ++wait_for_ports_up ++ ++# Make vm1 as a child port of some non existent lport - foo. vm1, vm1-cont1 and ++# vm1-cont2 should be released. ++check ovn-nbctl --wait=sb set logical_switch_port vm1 parent_name=bar ++wait_column "false" nb:Logical_Switch_Port up name=vm1 ++wait_column "false" nb:Logical_Switch_Port up name=vm-cont1 ++wait_column "false" nb:Logical_Switch_Port up name=vm-cont2 ++ ++OVN_CLEANUP([hv1]) ++AT_CLEANUP ++ ++AT_SETUP([ovn -- container port changed from one parent to another]) ++ovn_start ++ ++net_add n1 ++ ++sim_add hv1 ++as hv1 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.1 ++ovs-vsctl -- add-port br-int vm1 -- set interface vm1 ofport-request=1 ++ovs-vsctl -- add-port br-int vm2 -- set interface vm1 ofport-request=2 ++ ++check ovn-nbctl ls-add ls ++check ovn-nbctl lsp-add ls vm1 ++check ovn-nbctl lsp-add ls vm1-cont vm1 1 ++check ovn-nbctl lsp-add ls vm2 ++check ovn-nbctl lsp-add ls vm2-cont vm2 2 ++ ++check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=vm1 ++check as hv1 ovs-vsctl set Interface vm2 external_ids:iface-id=vm2 ++ ++wait_for_ports_up ++ ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep -c dl_vlan=1], [0], [dnl ++1 ++]) ++ ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep -c dl_vlan=2], [0], [dnl ++1 ++]) ++ ++# change the parent of vm1-cont to vm2. ++as hv1 ovn-appctl -t ovn-controller vlog/set dbg ++check ovn-nbctl --wait=sb set logical_switch_port vm1-cont parent_name=vm2 \ ++-- set logical_switch_port vm1-cont tag_request=3 ++ ++wait_for_ports_up ++ ++check ovn-nbctl --wait=hv sync ++ ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep -c dl_vlan=1], [1], [dnl ++0 ++]) ++ ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep -c dl_vlan=2], [0], [dnl ++1 ++]) ++ ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep -c dl_vlan=3], [0], [dnl ++1 ++]) ++ ++OVN_CLEANUP([hv1]) ++AT_CLEANUP ++ ++AT_SETUP([ovn -- container port use-after-free test]) ++ovn_start ++ ++net_add n1 ++ ++sim_add hv1 ++as hv1 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.1 ++ovs-vsctl -- add-port br-int vm1 ++ ++check ovn-nbctl ls-add ls ++check ovn-nbctl lsp-add ls vm1 ++check ovn-nbctl lsp-add ls vm-cont vm1 1 ++check ovs-vsctl set Interface vm1 external_ids:iface-id=vm1 ++check ovn-nbctl clear logical_switch_port vm-cont parent_name ++check ovs-vsctl set Interface vm1 external_ids:iface-id=foo ++check ovn-nbctl lsp-del vm-cont ++check ovn-nbctl ls-del ls ++check ovn-nbctl ls-add ls ++check ovn-nbctl lsp-add ls vm1 ++check ovn-nbctl lsp-add ls vm-cont vm1 1 ++check ovs-vsctl set Interface vm1 external_ids:iface-id=vm1 ++check as hv1 ovn-appctl -t ovn-controller debug/pause ++check ovn-nbctl clear logical_switch_port vm-cont parent_name ++check ovn-nbctl lsp-del vm-cont ++check as hv1 ovn-appctl -t ovn-controller debug/resume ++check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=foo ++ ++ovn-nbctl --wait=hv sync ++ ++# Make sure that ovn-controller has not asserted. ++AT_CHECK([kill -0 $(cat hv1/ovn-controller.pid)]) ++ ++wait_column "false" nb:Logical_Switch_Port up name=vm1 ++ ++OVN_CLEANUP([hv1]) ++AT_CLEANUP ++ ++# Test that OVS.external_ids:iface-id doesn't affect non-VIF port bindings. ++AT_SETUP([ovn -- Non-VIF ports incremental processing]) ++ovn_start ++ ++net_add n1 ++sim_add hv1 ++as hv1 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.10 ++ ++check ovn-nbctl ls-add ls1 -- lsp-add ls1 lsp1 ++ ++as hv1 ++check ovs-vsctl \ ++ -- add-port br-int vif1 \ ++ -- set Interface vif1 external_ids:iface-id=lsp1 ++ ++# ovn-controller should bind the interface. ++wait_for_ports_up ++hv_uuid=$(fetch_column Chassis _uuid name=hv1) ++check_column "$hv_uuid" Port_Binding chassis logical_port=lsp1 ++ ++# Change the port type to router, ovn-controller should release it. ++check ovn-nbctl --wait=hv lsp-set-type lsp1 router ++check_column "" Port_Binding chassis logical_port=lsp1 ++ ++# Clear port type, ovn-controller should rebind it. ++check ovn-nbctl --wait=hv lsp-set-type lsp1 '' ++check_column "$hv_uuid" Port_Binding chassis logical_port=lsp1 ++ ++# Change the port type to localnet, ovn-controller should release it. ++check ovn-nbctl --wait=hv lsp-set-type lsp1 localnet ++check_column "" Port_Binding chassis logical_port=lsp1 ++ ++# Clear port type, ovn-controller should rebind it. ++check ovn-nbctl --wait=hv lsp-set-type lsp1 '' ++check_column "$hv_uuid" Port_Binding chassis logical_port=lsp1 ++ ++# Change the port type to localport, ovn-controller should release it. ++check ovn-nbctl --wait=hv lsp-set-type lsp1 localport ++check_column "" Port_Binding chassis logical_port=lsp1 ++ ++# Clear port type, ovn-controller should rebind it. ++check ovn-nbctl --wait=hv lsp-set-type lsp1 '' ++check_column "$hv_uuid" Port_Binding chassis logical_port=lsp1 ++ ++# Change the port type to localnet and then delete it. ++# ovn-controller should handle this properly. ++check as hv1 ovn-appctl -t ovn-controller debug/pause ++check ovn-nbctl --wait=sb lsp-set-type lsp1 localport ++check ovn-nbctl --wait=sb lsp-del lsp1 ++check as hv1 ovn-appctl -t ovn-controller debug/resume ++ ++check ovn-nbctl --wait=hv sync ++ ++# Make sure that ovn-controller has not asserted. ++AT_CHECK([kill -0 $(cat hv1/ovn-controller.pid)]) ++ ++check ovn-nbctl lsp-add ls1 lsp1 ++wait_for_ports_up ++ ++# Change the port type to virtual and then delete it. ++# ovn-controller should handle this properly. ++check as hv1 ovn-appctl -t ovn-controller debug/pause ++check ovn-nbctl --wait=sb lsp-set-type lsp1 virtual ++check ovn-nbctl --wait=sb lsp-del lsp1 ++check as hv1 ovn-appctl -t ovn-controller debug/resume ++ ++check ovn-nbctl --wait=hv sync ++ ++# Make sure that ovn-controller has not asserted. ++AT_CHECK([kill -0 $(cat hv1/ovn-controller.pid)]) ++ ++OVN_CLEANUP([hv1]) ++AT_CLEANUP ++ ++# Tests that ovn-controller creates local bindings correctly by running ++# ovn-appctl -t ovn-controller debug/dump-local-bindings. ++# Ideally this test case should have been a unit test case. ++AT_SETUP([ovn -- ovn-controller local bindings]) ++ovn_start ++ ++net_add n1 ++ ++sim_add hv1 ++as hv1 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.1 ++ovs-vsctl -- add-port br-int hv1-vm1 ++ ++sim_add hv2 ++as hv2 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.2 ++ovs-vsctl -- add-port br-int hv2-vm1 ++ ++check ovn-nbctl ls-add sw0 ++check ovn-nbctl lsp-add sw0 sw0p1 ++check ovn-nbctl lsp-add sw0 sw0p2 ++ ++check as hv1 ovs-vsctl set interface hv1-vm1 external_ids:iface-id=sw0p1 ++check as hv2 ovs-vsctl set interface hv2-vm1 external_ids:iface-id=sw0p2 ++ ++wait_for_ports_up ++ ++AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[sw0p1]], OVS interface name : [[hv1-vm1]], num binding lports : [[1]] ++primary lport : [[sw0p1]] ++---------------------------------------- ++]) ++ ++AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[sw0p2]], OVS interface name : [[hv2-vm1]], num binding lports : [[1]] ++primary lport : [[sw0p2]] ++---------------------------------------- ++]) ++ ++# Create an ovs interface in hv1 ++check as hv1 ovs-vsctl add-port br-int hv1-vm2 -- set interface hv1-vm2 external_ids:iface-id=sw1p1 ++check ovn-nbctl --wait=hv sync ++AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[sw0p1]], OVS interface name : [[hv1-vm1]], num binding lports : [[1]] ++primary lport : [[sw0p1]] ++---------------------------------------- ++name: [[sw1p1]], OVS interface name : [[hv1-vm2]], num binding lports : [[0]] ++---------------------------------------- ++]) ++ ++# Create lport sw1p1 ++check ovn-nbctl ls-add sw1 -- lsp-add sw1 sw1p1 ++ ++wait_for_ports_up ++ ++AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[sw0p1]], OVS interface name : [[hv1-vm1]], num binding lports : [[1]] ++primary lport : [[sw0p1]] ++---------------------------------------- ++name: [[sw1p1]], OVS interface name : [[hv1-vm2]], num binding lports : [[1]] ++primary lport : [[sw1p1]] ++---------------------------------------- ++]) ++ ++# Swap sw0p1 and sw0p2. ++check as hv1 ovs-vsctl set interface hv1-vm1 external_ids:iface-id=sw0p2 ++check as hv2 ovs-vsctl set interface hv2-vm1 external_ids:iface-id=sw0p1 ++ ++check ovn-nbctl --wait=hv sync ++ ++AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[sw0p2]], OVS interface name : [[hv1-vm1]], num binding lports : [[1]] ++primary lport : [[sw0p2]] ++---------------------------------------- ++name: [[sw1p1]], OVS interface name : [[hv1-vm2]], num binding lports : [[1]] ++primary lport : [[sw1p1]] ++---------------------------------------- ++]) ++ ++AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[sw0p1]], OVS interface name : [[hv2-vm1]], num binding lports : [[1]] ++primary lport : [[sw0p1]] ++---------------------------------------- ++]) ++ ++# Create child port for sw0p1 ++check ovn-nbctl --wait=hv lsp-add sw0 sw0p1-c1 sw0p1 1 ++AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[2]] ++primary lport : [[sw0p1]] ++child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw0p2]], OVS interface name : [[hv1-vm1]], num binding lports : [[1]] ++primary lport : [[sw0p2]] ++---------------------------------------- ++name: [[sw1p1]], OVS interface name : [[hv1-vm2]], num binding lports : [[1]] ++primary lport : [[sw1p1]] ++---------------------------------------- ++]) ++ ++AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[sw0p1]], OVS interface name : [[hv2-vm1]], num binding lports : [[2]] ++primary lport : [[sw0p1]] ++child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] ++---------------------------------------- ++]) ++ ++# Create another child port for sw0p1 ++check ovn-nbctl --wait=hv lsp-add sw0 sw0p1-c2 sw0p1 2 ++ ++wait_for_ports_up ++ ++AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[3]] ++primary lport : [[sw0p1]] ++child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] ++child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw0p2]], OVS interface name : [[hv1-vm1]], num binding lports : [[1]] ++primary lport : [[sw0p2]] ++---------------------------------------- ++name: [[sw1p1]], OVS interface name : [[hv1-vm2]], num binding lports : [[1]] ++primary lport : [[sw1p1]] ++---------------------------------------- ++]) ++ ++AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[sw0p1]], OVS interface name : [[hv2-vm1]], num binding lports : [[3]] ++primary lport : [[sw0p1]] ++child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] ++child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] ++---------------------------------------- ++]) ++ ++# Swap sw0p1 and sw0p2 again. ++check as hv1 ovs-vsctl set interface hv1-vm1 external_ids:iface-id=sw0p1 ++check as hv2 ovs-vsctl set interface hv2-vm1 external_ids:iface-id=sw0p2 ++ ++check ovn-nbctl --wait=hv sync ++ ++AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[sw0p1]], OVS interface name : [[hv1-vm1]], num binding lports : [[3]] ++primary lport : [[sw0p1]] ++child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] ++child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw1p1]], OVS interface name : [[hv1-vm2]], num binding lports : [[1]] ++primary lport : [[sw1p1]] ++---------------------------------------- ++]) ++ ++AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[3]] ++primary lport : [[sw0p1]] ++child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] ++child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw0p2]], OVS interface name : [[hv2-vm1]], num binding lports : [[1]] ++primary lport : [[sw0p2]] ++---------------------------------------- ++]) ++ ++# Make sw0p1 as child port of non existent lport - foo ++check ovn-nbctl --wait=hv set logical_switch_port sw0p1 parent_name=foo ++ ++AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[foo]], OVS interface name : [[NULL]], num binding lports : [[1]] ++no primary lport ++child lport[[1]] : [[sw0p1]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw0p1]], OVS interface name : [[hv1-vm1]], num binding lports : [[2]] ++no primary lport ++child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] ++child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw1p1]], OVS interface name : [[hv1-vm2]], num binding lports : [[1]] ++primary lport : [[sw1p1]] ++---------------------------------------- ++]) ++ ++AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[foo]], OVS interface name : [[NULL]], num binding lports : [[1]] ++no primary lport ++child lport[[1]] : [[sw0p1]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[2]] ++no primary lport ++child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] ++child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw0p2]], OVS interface name : [[hv2-vm1]], num binding lports : [[1]] ++primary lport : [[sw0p2]] ++---------------------------------------- +]) + -+AT_CHECK([cat hv2_offlows_table71.txt | grep -v NXST], [1], [dnl ++# Change the lport type of sw0p2 to different types and make sure that ++# local bindings are correct. ++ ++hv2_uuid=$(fetch_column Chassis _uuid name=hv2) ++check_column "$hv2_uuid" Port_Binding chassis logical_port=sw0p2 ++ ++# Change the port type to router, ovn-controller should release it. ++check ovn-nbctl --wait=hv lsp-set-type sw0p2 router ++check_column "" Port_Binding chassis logical_port=sw0p2 ++ ++AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[foo]], OVS interface name : [[NULL]], num binding lports : [[1]] ++no primary lport ++child lport[[1]] : [[sw0p1]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[2]] ++no primary lport ++child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] ++child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw0p2]], OVS interface name : [[hv2-vm1]], num binding lports : [[0]] ++---------------------------------------- +]) + -+as hv1 ovs-ofctl dump-flows br-int table=72 > hv1_offlows_table72.txt -+as hv2 ovs-ofctl dump-flows br-int table=72 > hv2_offlows_table72.txt ++# change the port type to external from router. ++check ovn-nbctl --wait=hv lsp-set-type sw0p2 external ++check_column "" Port_Binding chassis logical_port=sw0p2 ++ ++AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[foo]], OVS interface name : [[NULL]], num binding lports : [[1]] ++no primary lport ++child lport[[1]] : [[sw0p1]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[2]] ++no primary lport ++child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] ++child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw0p2]], OVS interface name : [[hv2-vm1]], num binding lports : [[0]] ++---------------------------------------- ++]) + -+AT_CAPTURE_FILE([hv1_offlows_table72.txt]) -+AT_CAPTURE_FILE([hv2_offlows_table72.txt]) -+AT_CHECK([cat hv1_offlows_table72.txt | grep -v NXST], [1], [dnl ++# change the port type to localnet from external. ++check ovn-nbctl --wait=hv lsp-set-type sw0p2 localnet ++check_column "" Port_Binding chassis logical_port=sw0p2 ++ ++AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[foo]], OVS interface name : [[NULL]], num binding lports : [[1]] ++no primary lport ++child lport[[1]] : [[sw0p1]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[2]] ++no primary lport ++child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] ++child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw0p2]], OVS interface name : [[hv2-vm1]], num binding lports : [[0]] ++---------------------------------------- +]) + -+AT_CHECK([cat hv2_offlows_table72.txt | grep -v NXST], [1], [dnl ++# change the port type to localport from localnet. ++check ovn-nbctl --wait=hv lsp-set-type sw0p2 localnet ++check_column "" Port_Binding chassis logical_port=sw0p2 ++ ++AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[foo]], OVS interface name : [[NULL]], num binding lports : [[1]] ++no primary lport ++child lport[[1]] : [[sw0p1]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[2]] ++no primary lport ++child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] ++child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw0p2]], OVS interface name : [[hv2-vm1]], num binding lports : [[0]] ++---------------------------------------- ++]) ++ ++# change the port type back to vif. ++check ovn-nbctl --wait=hv lsp-set-type sw0p2 "" ++wait_column "$hv2_uuid" Port_Binding chassis logical_port=sw0p2 ++ ++AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl ++Local bindings: ++name: [[foo]], OVS interface name : [[NULL]], num binding lports : [[1]] ++no primary lport ++child lport[[1]] : [[sw0p1]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[2]] ++no primary lport ++child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] ++child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] ++---------------------------------------- ++name: [[sw0p2]], OVS interface name : [[hv2-vm1]], num binding lports : [[1]] ++primary lport : [[sw0p2]] ++---------------------------------------- +]) + +OVN_CLEANUP([hv1], [hv2]) @@ -19583,7 +26856,7 @@ index 05b17ebce..8b1b03e24 100644 dnl OVS_WAIT_WHILE(COMMAND[, IF-FAILED]) dnl diff --git a/tests/system-ovn.at b/tests/system-ovn.at -index d59f7c97e..9819573bb 100644 +index d59f7c97e..bd27b01a0 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -1574,6 +1574,18 @@ OVS_WAIT_UNTIL([ @@ -19605,7 +26878,7 @@ index d59f7c97e..9819573bb 100644 OVS_APP_EXIT_AND_WAIT([ovn-controller]) as ovn-sb -@@ -2212,6 +2224,144 @@ tcp,orig=(src=172.16.1.2,dst=192.168.2.2,sport=,dport=),reply= +@@ -2212,6 +2224,46 @@ tcp,orig=(src=172.16.1.2,dst=192.168.2.2,sport=,dport=),reply= OVS_WAIT_UNTIL([check_est_flows], [check established flows]) @@ -19649,22 +26922,13 @@ index d59f7c97e..9819573bb 100644 + +OVS_WAIT_UNTIL([check_est_flows], [check established flows]) + -+OVS_APP_EXIT_AND_WAIT([ovn-controller]) -+ -+as ovn-sb -+OVS_APP_EXIT_AND_WAIT([ovsdb-server]) -+ -+as ovn-nb -+OVS_APP_EXIT_AND_WAIT([ovsdb-server]) -+ -+as northd -+OVS_APP_EXIT_AND_WAIT([ovn-northd]) -+ -+as -+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d -+/connection dropped.*/d"]) -+AT_CLEANUP -+ + OVS_APP_EXIT_AND_WAIT([ovn-controller]) + + as ovn-sb +@@ -2228,6 +2280,105 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d + /connection dropped.*/d"]) + AT_CLEANUP + +AT_SETUP([ovn -- load balancing in gateway router hairpin scenario]) +AT_KEYWORDS([ovnlb]) + @@ -19747,16 +27011,25 @@ index d59f7c97e..9819573bb 100644 + NS_CHECK_EXEC([client], [wget 172.16.1.150 -t 5 -T 1 --retry-connrefused -v -o wget$i.log]) +done + - OVS_APP_EXIT_AND_WAIT([ovn-controller]) - - as ovn-sb -@@ -2225,6 +2375,7 @@ OVS_APP_EXIT_AND_WAIT([ovn-northd]) - - as - OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d ++OVS_APP_EXIT_AND_WAIT([ovn-controller]) ++ ++as ovn-sb ++OVS_APP_EXIT_AND_WAIT([ovsdb-server]) ++ ++as ovn-nb ++OVS_APP_EXIT_AND_WAIT([ovsdb-server]) ++ ++as northd ++OVS_APP_EXIT_AND_WAIT([ovn-northd]) ++ ++as ++OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d +/Failed to acquire.*/d - /connection dropped.*/d"]) - AT_CLEANUP ++/connection dropped.*/d"]) ++AT_CLEANUP ++ + AT_SETUP([ovn -- load balancing in gateway router - IPv6]) + AT_KEYWORDS([ovnlb]) @@ -4151,7 +4302,7 @@ ovn-nbctl lsp-set-type sw1-lr0 router ovn-nbctl lsp-set-addresses sw1-lr0 router @@ -19886,7 +27159,16 @@ index d59f7c97e..9819573bb 100644 ]) OVS_APP_EXIT_AND_WAIT([ovn-controller]) -@@ -5505,3 +5682,152 @@ as +@@ -4545,7 +4722,7 @@ OVS_WAIT_UNTIL([ + ]) + + OVS_WAIT_UNTIL([ +- n_pkt=$(ovs-ofctl dump-flows br-int table=45 | grep -v n_packets=0 | \ ++ n_pkt=$(ovs-ofctl dump-flows br-int table=44 | grep -v n_packets=0 | \ + grep controller | grep tp_dst=84 -c) + test $n_pkt -eq 1 + ]) +@@ -5505,3 +5682,280 @@ as OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d /.*terminating with signal 15.*/d"]) AT_CLEANUP @@ -20039,6 +27321,134 @@ index d59f7c97e..9819573bb 100644 +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d +/.*terminating with signal 15.*/d"]) +AT_CLEANUP ++ ++AT_SETUP([ovn -- No ct_state matches in dp flows when no ACLs in an LS]) ++AT_KEYWORDS([no ct_state match]) ++ovn_start ++ ++OVS_TRAFFIC_VSWITCHD_START() ++ADD_BR([br-int]) ++ ++# Set external-ids in br-int needed for ovn-controller ++ovs-vsctl \ ++ -- set Open_vSwitch . external-ids:system-id=hv1 \ ++ -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ ++ -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ ++ -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ ++ -- set bridge br-int fail-mode=secure other-config:disable-in-band=true ++ ++# Start ovn-controller ++start_daemon ovn-controller ++ ++check ovn-nbctl ls-add sw0 ++ ++check ovn-nbctl lsp-add sw0 sw0-p1 ++check ovn-nbctl lsp-set-addresses sw0-p1 "50:54:00:00:00:03" ++check ovn-nbctl lsp-set-port-security sw0-p1 "50:54:00:00:00:03" ++ ++check ovn-nbctl lsp-add sw0 sw0-p2 ++check ovn-nbctl lsp-set-addresses sw0-p2 "50:54:00:00:00:04 10.0.0.4" ++check ovn-nbctl lsp-set-port-security sw0-p2 "50:54:00:00:00:04 10.0.0.4" ++ ++ ++# Create the second logical switch with one port and configure some ACLs. ++check ovn-nbctl ls-add sw1 ++check ovn-nbctl lsp-add sw1 sw1-p1 ++ ++# Create port group and ACLs for sw1 ports. ++check ovn-nbctl pg-add pg1 sw1-p1 ++check ovn-nbctl acl-add pg1 from-lport 1002 "ip" allow-related ++check ovn-nbctl acl-add pg1 to-lport 1002 "ip" allow-related ++ ++ ++OVN_POPULATE_ARP ++ovn-nbctl --wait=hv sync ++ ++ADD_NAMESPACES(sw0-p1) ++ADD_VETH(sw0-p1, sw0-p1, br-int, "10.0.0.3/24", "50:54:00:00:00:03", \ ++ "10.0.0.1") ++ ++ ++ADD_NAMESPACES(sw0-p2) ++ADD_VETH(sw0-p2, sw0-p2, br-int, "10.0.0.4/24", "50:54:00:00:00:04", \ ++ "10.0.0.1") ++ ++ADD_NAMESPACES(sw1-p1) ++ADD_VETH(sw1-p1, sw1-p1, br-int, "20.0.0.4/24", "30:54:00:00:00:04", \ ++ "20.0.0.1") ++ ++wait_for_ports_up ++ ++NS_CHECK_EXEC([sw0-p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \ ++[0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++ ++ovs-appctl dpctl/dump-flows ++ ++# sw1-p1 may send IPv6 traffic. So filter this out. Since sw1-p1 has ++# ACLs configured, the datapath flows for the packets from sw1-p1 will have ++# matches on ct_state and ct_label fields. ++# Since sw0 doesn't have any ACLs, there should be no match on ct fields. ++AT_CHECK([ovs-appctl dpctl/dump-flows | grep ct_state | grep -v ipv6 -c], [1], [dnl ++0 ++]) ++ ++AT_CHECK([ovs-appctl dpctl/dump-flows | grep ct_label | grep -v ipv6 -c], [1], [dnl ++0 ++]) ++ ++# Add an ACL to sw0. ++check ovn-nbctl --wait=hv acl-add sw0 to-lport 1002 ip allow-related ++ ++NS_CHECK_EXEC([sw0-p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \ ++[0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++ ++ovs-appctl dpctl/dump-flows ++ ++AT_CHECK([ovs-appctl dpctl/dump-flows | grep ct_state | grep -v ipv6 -c], [0], [ignore]) ++ ++AT_CHECK([ovs-appctl dpctl/dump-flows | grep ct_label | grep -v ipv6 -c], [0], [ignore]) ++ ++# Clear ACL for sw0 ++check ovn-nbctl --wait=hv clear logical_switch sw0 acls ++ ++check ovs-appctl dpctl/del-flows ++ ++check ovn-nbctl --wait=hv sync ++ ++NS_CHECK_EXEC([sw0-p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \ ++[0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++ ++ovs-appctl dpctl/dump-flows ++ ++AT_CHECK([ovs-appctl dpctl/dump-flows | grep ct_state | grep -v ipv6 -c], [1], [dnl ++0 ++]) ++ ++AT_CHECK([ovs-appctl dpctl/dump-flows | grep ct_label | grep -v ipv6 -c], [1], [dnl ++0 ++]) ++ ++OVS_APP_EXIT_AND_WAIT([ovn-controller]) ++ ++as ovn-sb ++OVS_APP_EXIT_AND_WAIT([ovsdb-server]) ++ ++as ovn-nb ++OVS_APP_EXIT_AND_WAIT([ovsdb-server]) ++ ++as northd ++OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE]) ++ ++as ++OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d ++/connection dropped.*/d"]) ++AT_CLEANUP diff --git a/tests/test-ovn.c b/tests/test-ovn.c index 49a1947f6..3fbe90b32 100644 --- a/tests/test-ovn.c @@ -20052,14 +27462,102 @@ index 49a1947f6..3fbe90b32 100644 }; struct ofpbuf ofpacts; ofpbuf_init(&ofpacts, 0); +diff --git a/tests/test-utils.c b/tests/test-utils.c +new file mode 100644 +index 000000000..6a3b198ae +--- /dev/null ++++ b/tests/test-utils.c +@@ -0,0 +1,49 @@ ++/* Copyright (c) 2021, Red Hat, Inc. ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at: ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++#include ++ ++#include "test-utils.h" ++ ++#include "util.h" ++ ++bool ++test_read_uint_value(struct ovs_cmdl_context *ctx, unsigned int index, ++ const char *descr, unsigned int *result) ++{ ++ if (index >= ctx->argc) { ++ fprintf(stderr, "Missing %s argument\n", descr); ++ return false; ++ } ++ ++ const char *arg = ctx->argv[index]; ++ if (!str_to_uint(arg, 10, result)) { ++ fprintf(stderr, "Invalid %s: %s\n", descr, arg); ++ return false; ++ } ++ return true; ++} ++ ++const char * ++test_read_value(struct ovs_cmdl_context *ctx, unsigned int index, ++ const char *descr) ++{ ++ if (index >= ctx->argc) { ++ fprintf(stderr, "Missing %s argument\n", descr); ++ return NULL; ++ } ++ ++ return ctx->argv[index]; ++} +diff --git a/tests/test-utils.h b/tests/test-utils.h +new file mode 100644 +index 000000000..721032f82 +--- /dev/null ++++ b/tests/test-utils.h +@@ -0,0 +1,26 @@ ++/* Copyright (c) 2021, Red Hat, Inc. ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at: ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++#ifndef TEST_UTILS_H ++#define TEST_UTILS_H 1 ++ ++#include "ovstest.h" ++ ++bool test_read_uint_value(struct ovs_cmdl_context *ctx, unsigned int index, ++ const char *descr, unsigned int *result); ++const char *test_read_value(struct ovs_cmdl_context *ctx, unsigned int index, ++ const char *descr); ++ ++#endif /* tests/test-utils.h */ diff --git a/tests/testsuite.at b/tests/testsuite.at -index 960227dcc..3eba785c6 100644 +index 960227dcc..6cbf3d21a 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at -@@ -26,6 +26,7 @@ m4_include([tests/ovn.at]) +@@ -26,6 +26,8 @@ m4_include([tests/ovn.at]) m4_include([tests/ovn-performance.at]) m4_include([tests/ovn-northd.at]) m4_include([tests/ovn-nbctl.at]) ++m4_include([tests/ovn-lflow-cache.at]) +m4_include([tests/ovn-ofctrl-seqno.at]) m4_include([tests/ovn-sbctl.at]) m4_include([tests/ovn-ic-nbctl.at]) @@ -20079,10 +27577,45 @@ index 981a433be..fb96fd66b 100755 def is_subtracted_line(line): diff --git a/utilities/ovn-ctl b/utilities/ovn-ctl -index c44201ccf..211c764a6 100755 +index c44201ccf..358baa09e 100755 --- a/utilities/ovn-ctl +++ b/utilities/ovn-ctl -@@ -251,6 +251,11 @@ $cluster_remote_port +@@ -45,18 +45,12 @@ pidfile_is_running () { + test -e "$pidfile" && [ -s "$pidfile" ] && pid=`cat "$pidfile"` && pid_exists "$pid" + } >/dev/null 2>&1 + +-stop_xx_ovsdb() { +- if pidfile_is_running $1; then +- ovn-appctl -t $OVN_RUNDIR/$2 exit +- fi +-} +- + stop_nb_ovsdb() { +- stop_xx_ovsdb $DB_NB_PID ovnnb_db.ctl ++ OVS_RUNDIR=${OVS_RUNDIR} stop_ovn_daemon ovnnb_db $DB_NB_PID $OVN_RUNDIR/ovnnb_db.ctl + } + + stop_sb_ovsdb() { +- stop_xx_ovsdb $DB_SB_PID ovnsb_db.ctl ++ OVS_RUNDIR=${OVS_RUNDIR} stop_ovn_daemon ovnsb_db $DB_SB_PID $OVN_RUNDIR/ovnsb_db.ctl + } + + stop_ovsdb () { +@@ -65,11 +59,11 @@ stop_ovsdb () { + } + + stop_ic_nb_ovsdb() { +- stop_xx_ovsdb $DB_IC_NB_PID ovn_ic_nb_db.ctl ++ OVS_RUNDIR=${OVS_RUNDIR} stop_ovn_daemon ovn_ic_nb_db $DB_IC_NB_PID $OVN_RUNDIR/ovn_ic_nb_db.ctl + } + + stop_ic_sb_ovsdb() { +- stop_xx_ovsdb $DB_IC_SB_PID ovn_ic_sb_db.ctl ++ OVS_RUNDIR=${OVS_RUNDIR} stop_ovn_daemon ovn_ic_sb_db $DB_IC_SB_PID $OVN_RUNDIR/ovn_ic_sb_db.ctl + } + + stop_ic_ovsdb () { +@@ -251,6 +245,11 @@ $cluster_remote_port [ "$OVN_USER" != "" ] && set "$@" --user "$OVN_USER" @@ -20094,7 +27627,16 @@ index c44201ccf..211c764a6 100755 if test X"$detach" != Xno; then set "$@" --detach --monitor else -@@ -715,6 +720,8 @@ set_defaults () { +@@ -585,7 +584,7 @@ stop_ic () { + } + + stop_controller () { +- OVS_RUNDIR=${OVS_RUNDIR} stop_ovn_daemon ovn-controller "$@" ++ OVS_RUNDIR=${OVS_RUNDIR} stop_ovn_daemon ovn-controller "" "" "$@" + } + + stop_controller_vtep () { +@@ -715,6 +714,8 @@ set_defaults () { OVSDB_NB_WRAPPER= OVSDB_SB_WRAPPER= @@ -20103,7 +27645,7 @@ index c44201ccf..211c764a6 100755 OVN_USER= OVN_CONTROLLER_LOG="-vconsole:emer -vsyslog:err -vfile:info" -@@ -932,6 +939,11 @@ Options: +@@ -932,6 +933,11 @@ Options: --ovs-user="user[:group]" pass the --user flag to ovs daemons --ovsdb-nb-wrapper=WRAPPER run with a wrapper like valgrind for debugging --ovsdb-sb-wrapper=WRAPPER run with a wrapper like valgrind for debugging @@ -20115,6 +27657,84 @@ index c44201ccf..211c764a6 100755 -h, --help display this help message File location options: +diff --git a/utilities/ovn-lib.in b/utilities/ovn-lib.in +index 016815626..301cc5712 100644 +--- a/utilities/ovn-lib.in ++++ b/utilities/ovn-lib.in +@@ -137,10 +137,22 @@ start_ovn_daemon () { + } + + stop_ovn_daemon () { +- if test -e "$ovn_rundir/$1.pid"; then +- if pid=`cat "$ovn_rundir/$1.pid"`; then ++ local pid_file=$2 ++ local ctl_file=$3 ++ local other_args=$4 ++ ++ if [ -z "$pid_file" ]; then ++ pid_file="$ovn_rundir/$1.pid" ++ fi ++ ++ if test -e "$pid_file"; then ++ if pid=`cat "$pid_file"`; then ++ if [ -z "$ctl_file" ]; then ++ ctl_file="$ovn_rundir/$1.$pid.ctl" ++ fi ++ + if pid_exists "$pid" >/dev/null 2>&1; then :; else +- rm -f $ovn_rundir/$1.$pid.ctl $ovn_rundir/$1.$pid ++ rm -f $ctl_file $pid_file + return 0 + fi + +@@ -148,7 +160,7 @@ stop_ovn_daemon () { + actions="TERM .1 .25 .65 1 1 1 1 \ + KILL 1 1 1 2 10 15 30 \ + FAIL" +- version=`ovs-appctl -T 1 -t $ovn_rundir/$1.$pid.ctl version \ ++ version=`ovs-appctl -T 1 -t $ctl_file version \ + | awk 'NR==1{print $NF}'` + + # Use `ovs-appctl exit` only if the running daemon version +@@ -159,20 +171,36 @@ stop_ovn_daemon () { + if version_geq "$version" "2.5.90"; then + actions="$graceful $actions" + fi ++ actiontype="" + for action in $actions; do + if pid_exists "$pid" >/dev/null 2>&1; then :; else +- return 0 ++ # pid does not exist. ++ if [ -n "$actiontype" ]; then ++ return 0 ++ fi ++ # But, does the file exist? We may have had a daemon ++ # segfault with `ovs-appctl exit`. Check one more time ++ # before deciding that the daemon is dead. ++ [ -e "$pid_file" ] && sleep 2 && pid=`cat "$pid_file"` 2>/dev/null ++ if pid_exists "$pid" >/dev/null 2>&1; then :; else ++ return 0 ++ fi + fi + case $action in + EXIT) + action "Exiting $1 ($pid)" \ +- ${bindir}/ovs-appctl -T 1 -t $ovn_rundir/$1.$pid.ctl exit $2 ++ ${bindir}/ovs-appctl -T 1 -t $ctl_file exit $other_args ++ # The above command could have resulted in delayed ++ # daemon segfault. And if a monitor is running, it ++ # would restart the daemon giving it a new pid. + ;; + TERM) + action "Killing $1 ($pid)" kill $pid ++ actiontype="force" + ;; + KILL) + action "Killing $1 ($pid) with SIGKILL" kill -9 $pid ++ actiontype="force" + ;; + FAIL) + log_failure_msg "Killing $1 ($pid) failed" diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml index 59302296b..2cab592ce 100644 --- a/utilities/ovn-nbctl.8.xml @@ -20204,10 +27824,22 @@ index 59302296b..2cab592ce 100644 The following example adds a load balancer.

    diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c -index d19e1b6c6..dc0c50854 100644 +index d19e1b6c6..ec373d094 100644 --- a/utilities/ovn-nbctl.c +++ b/utilities/ovn-nbctl.c -@@ -125,6 +125,65 @@ static char * OVS_WARN_UNUSED_RESULT main_loop(const char *args, +@@ -29,9 +29,11 @@ + #include "lib/acl-log.h" + #include "lib/ovn-nb-idl.h" + #include "lib/ovn-util.h" ++#include "memory.h" + #include "packets.h" + #include "openvswitch/poll-loop.h" + #include "process.h" ++#include "simap.h" + #include "smap.h" + #include "sset.h" + #include "stream.h" +@@ -125,6 +127,65 @@ static char * OVS_WARN_UNUSED_RESULT main_loop(const char *args, const struct timer *); static void server_loop(struct ovsdb_idl *idl, int argc, char *argv[]); @@ -20273,7 +27905,7 @@ index d19e1b6c6..dc0c50854 100644 int main(int argc, char *argv[]) { -@@ -707,7 +766,7 @@ Route commands:\n\ +@@ -707,7 +768,7 @@ Route commands:\n\ lr-route-list ROUTER print routes for ROUTER\n\ \n\ Policy commands:\n\ @@ -20282,7 +27914,7 @@ index d19e1b6c6..dc0c50854 100644 [OPTIONS KEY=VALUE ...] \n\ add a policy to router\n\ lr-policy-del ROUTER [{PRIORITY | UUID} [MATCH]]\n\ -@@ -1249,6 +1308,7 @@ static void +@@ -1249,6 +1310,7 @@ static void nbctl_ls_del(struct ctl_context *ctx) { bool must_exist = !shash_find(&ctx->options, "--if-exists"); @@ -20290,7 +27922,7 @@ index d19e1b6c6..dc0c50854 100644 const char *id = ctx->argv[1]; const struct nbrec_logical_switch *ls = NULL; -@@ -1261,6 +1321,11 @@ nbctl_ls_del(struct ctl_context *ctx) +@@ -1261,6 +1323,11 @@ nbctl_ls_del(struct ctl_context *ctx) return; } @@ -20302,7 +27934,7 @@ index d19e1b6c6..dc0c50854 100644 nbrec_logical_switch_delete(ls); } -@@ -1317,22 +1382,19 @@ lsp_by_name_or_uuid(struct ctl_context *ctx, const char *id, +@@ -1317,22 +1384,19 @@ lsp_by_name_or_uuid(struct ctl_context *ctx, const char *id, /* Returns the logical switch that contains 'lsp'. */ static char * OVS_WARN_UNUSED_RESULT @@ -20331,7 +27963,7 @@ index d19e1b6c6..dc0c50854 100644 /* Can't happen because of the database schema */ return xasprintf("logical port %s is not part of any logical switch", lsp->name); -@@ -1353,6 +1415,7 @@ static void +@@ -1353,6 +1417,7 @@ static void nbctl_lsp_add(struct ctl_context *ctx) { bool may_exist = shash_find(&ctx->options, "--may-exist") != NULL; @@ -20339,7 +27971,7 @@ index d19e1b6c6..dc0c50854 100644 const struct nbrec_logical_switch *ls = NULL; char *error = ls_by_name_or_uuid(ctx, ctx->argv[1], true, &ls); -@@ -1395,7 +1458,7 @@ nbctl_lsp_add(struct ctl_context *ctx) +@@ -1395,7 +1460,7 @@ nbctl_lsp_add(struct ctl_context *ctx) } const struct nbrec_logical_switch *lsw; @@ -20348,7 +27980,7 @@ index d19e1b6c6..dc0c50854 100644 if (error) { ctx->error = error; return; -@@ -1448,31 +1511,27 @@ nbctl_lsp_add(struct ctl_context *ctx) +@@ -1448,31 +1513,27 @@ nbctl_lsp_add(struct ctl_context *ctx) } /* Insert the logical port into the logical switch. */ @@ -20393,7 +28025,7 @@ index d19e1b6c6..dc0c50854 100644 /* Delete 'lsp' from the IDL. This won't have a real effect on the * database server (the IDL will suppress it in fact) but it means that it -@@ -1498,18 +1557,13 @@ nbctl_lsp_del(struct ctl_context *ctx) +@@ -1498,18 +1559,13 @@ nbctl_lsp_del(struct ctl_context *ctx) /* Find the switch that contains 'lsp', then delete it. */ const struct nbrec_logical_switch *ls; @@ -20418,7 +28050,7 @@ index d19e1b6c6..dc0c50854 100644 } static void -@@ -1658,7 +1712,7 @@ nbctl_lsp_set_addresses(struct ctl_context *ctx) +@@ -1658,7 +1714,7 @@ nbctl_lsp_set_addresses(struct ctl_context *ctx) } const struct nbrec_logical_switch *ls; @@ -20427,7 +28059,7 @@ index d19e1b6c6..dc0c50854 100644 if (error) { ctx->error = error; return; -@@ -2299,17 +2353,11 @@ nbctl_acl_add(struct ctl_context *ctx) +@@ -2299,17 +2355,11 @@ nbctl_acl_add(struct ctl_context *ctx) } /* Insert the acl into the logical switch/port group. */ @@ -20447,7 +28079,7 @@ index d19e1b6c6..dc0c50854 100644 } static void -@@ -2349,23 +2397,15 @@ nbctl_acl_del(struct ctl_context *ctx) +@@ -2349,23 +2399,15 @@ nbctl_acl_del(struct ctl_context *ctx) /* If priority and match are not specified, delete all ACLs with the * specified direction. */ if (ctx->argc == 3) { @@ -20477,7 +28109,7 @@ index d19e1b6c6..dc0c50854 100644 return; } -@@ -2387,19 +2427,11 @@ nbctl_acl_del(struct ctl_context *ctx) +@@ -2387,19 +2429,11 @@ nbctl_acl_del(struct ctl_context *ctx) if (priority == acl->priority && !strcmp(ctx->argv[4], acl->match) && !strcmp(direction, acl->direction)) { @@ -20499,7 +28131,7 @@ index d19e1b6c6..dc0c50854 100644 return; } } -@@ -2552,15 +2584,7 @@ nbctl_qos_add(struct ctl_context *ctx) +@@ -2552,15 +2586,7 @@ nbctl_qos_add(struct ctl_context *ctx) } /* Insert the qos rule the logical switch. */ @@ -20516,7 +28148,7 @@ index d19e1b6c6..dc0c50854 100644 } static void -@@ -2597,34 +2621,31 @@ nbctl_qos_del(struct ctl_context *ctx) +@@ -2597,34 +2623,31 @@ nbctl_qos_del(struct ctl_context *ctx) /* If uuid was specified, delete qos_rule with the * specified uuid. */ if (ctx->argc == 3) { @@ -20563,7 +28195,7 @@ index d19e1b6c6..dc0c50854 100644 return; } -@@ -2651,14 +2672,7 @@ nbctl_qos_del(struct ctl_context *ctx) +@@ -2651,14 +2674,7 @@ nbctl_qos_del(struct ctl_context *ctx) if (priority == qos->priority && !strcmp(ctx->argv[4], qos->match) && !strcmp(direction, qos->direction)) { @@ -20579,7 +28211,7 @@ index d19e1b6c6..dc0c50854 100644 return; } } -@@ -2821,6 +2835,14 @@ nbctl_lb_add(struct ctl_context *ctx) +@@ -2821,6 +2837,14 @@ nbctl_lb_add(struct ctl_context *ctx) bool may_exist = shash_find(&ctx->options, "--may-exist") != NULL; bool add_duplicate = shash_find(&ctx->options, "--add-duplicate") != NULL; @@ -20594,7 +28226,7 @@ index d19e1b6c6..dc0c50854 100644 const char *lb_proto; bool is_update_proto = false; -@@ -2934,6 +2956,14 @@ nbctl_lb_add(struct ctl_context *ctx) +@@ -2934,6 +2958,14 @@ nbctl_lb_add(struct ctl_context *ctx) smap_add(CONST_CAST(struct smap *, &lb->vips), lb_vip_normalized, ds_cstr(&lb_ips_new)); nbrec_load_balancer_set_vips(lb, &lb->vips); @@ -20609,7 +28241,7 @@ index d19e1b6c6..dc0c50854 100644 out: ds_destroy(&lb_ips_new); -@@ -3115,17 +3145,7 @@ nbctl_lr_lb_add(struct ctl_context *ctx) +@@ -3115,17 +3147,7 @@ nbctl_lr_lb_add(struct ctl_context *ctx) } /* Insert the load balancer into the logical router. */ @@ -20628,7 +28260,7 @@ index d19e1b6c6..dc0c50854 100644 } static void -@@ -3158,15 +3178,7 @@ nbctl_lr_lb_del(struct ctl_context *ctx) +@@ -3158,15 +3180,7 @@ nbctl_lr_lb_del(struct ctl_context *ctx) if (uuid_equals(&del_lb->header_.uuid, &lb->header_.uuid)) { /* Remove the matching rule. */ @@ -20645,7 +28277,7 @@ index d19e1b6c6..dc0c50854 100644 return; } } -@@ -3240,17 +3252,7 @@ nbctl_ls_lb_add(struct ctl_context *ctx) +@@ -3240,17 +3254,7 @@ nbctl_ls_lb_add(struct ctl_context *ctx) } /* Insert the load balancer into the logical switch. */ @@ -20664,7 +28296,7 @@ index d19e1b6c6..dc0c50854 100644 } static void -@@ -3283,15 +3285,7 @@ nbctl_ls_lb_del(struct ctl_context *ctx) +@@ -3283,15 +3287,7 @@ nbctl_ls_lb_del(struct ctl_context *ctx) if (uuid_equals(&del_lb->header_.uuid, &lb->header_.uuid)) { /* Remove the matching rule. */ @@ -20681,7 +28313,7 @@ index d19e1b6c6..dc0c50854 100644 return; } } -@@ -3378,6 +3372,7 @@ static void +@@ -3378,6 +3374,7 @@ static void nbctl_lr_del(struct ctl_context *ctx) { bool must_exist = !shash_find(&ctx->options, "--if-exists"); @@ -20689,7 +28321,7 @@ index d19e1b6c6..dc0c50854 100644 const char *id = ctx->argv[1]; const struct nbrec_logical_router *lr = NULL; -@@ -3390,6 +3385,11 @@ nbctl_lr_del(struct ctl_context *ctx) +@@ -3390,6 +3387,11 @@ nbctl_lr_del(struct ctl_context *ctx) return; } @@ -20701,7 +28333,7 @@ index d19e1b6c6..dc0c50854 100644 nbrec_logical_router_delete(lr); } -@@ -3645,7 +3645,8 @@ nbctl_lr_policy_add(struct ctl_context *ctx) +@@ -3645,7 +3647,8 @@ nbctl_lr_policy_add(struct ctl_context *ctx) return; } const char *action = ctx->argv[4]; @@ -20711,7 +28343,7 @@ index d19e1b6c6..dc0c50854 100644 bool reroute = false; /* Validate action. */ -@@ -3665,7 +3666,8 @@ nbctl_lr_policy_add(struct ctl_context *ctx) +@@ -3665,7 +3668,8 @@ nbctl_lr_policy_add(struct ctl_context *ctx) /* Check if same routing policy already exists. * A policy is uniquely identified by priority and match */ bool may_exist = !!shash_find(&ctx->options, "--may-exist"); @@ -20721,7 +28353,7 @@ index d19e1b6c6..dc0c50854 100644 const struct nbrec_logical_router_policy *policy = lr->policies[i]; if (policy->priority == priority && !strcmp(policy->match, ctx->argv[3])) { -@@ -3676,12 +3678,53 @@ nbctl_lr_policy_add(struct ctl_context *ctx) +@@ -3676,12 +3680,53 @@ nbctl_lr_policy_add(struct ctl_context *ctx) return; } } @@ -20779,7 +28411,7 @@ index d19e1b6c6..dc0c50854 100644 } struct nbrec_logical_router_policy *policy; -@@ -3690,12 +3733,13 @@ nbctl_lr_policy_add(struct ctl_context *ctx) +@@ -3690,12 +3735,13 @@ nbctl_lr_policy_add(struct ctl_context *ctx) nbrec_logical_router_policy_set_match(policy, ctx->argv[3]); nbrec_logical_router_policy_set_action(policy, action); if (reroute) { @@ -20795,7 +28427,7 @@ index d19e1b6c6..dc0c50854 100644 char *key, *value; value = xstrdup(ctx->argv[i]); key = strsep(&value, "="); -@@ -3705,7 +3749,10 @@ nbctl_lr_policy_add(struct ctl_context *ctx) +@@ -3705,7 +3751,10 @@ nbctl_lr_policy_add(struct ctl_context *ctx) ctl_error(ctx, "No value specified for the option : %s", key); smap_destroy(&options); free(key); @@ -20807,7 +28439,7 @@ index d19e1b6c6..dc0c50854 100644 return; } free(key); -@@ -3713,18 +3760,12 @@ nbctl_lr_policy_add(struct ctl_context *ctx) +@@ -3713,18 +3762,12 @@ nbctl_lr_policy_add(struct ctl_context *ctx) nbrec_logical_router_policy_set_options(policy, &options); smap_destroy(&options); @@ -20831,7 +28463,7 @@ index d19e1b6c6..dc0c50854 100644 } static void -@@ -3758,38 +3799,34 @@ nbctl_lr_policy_del(struct ctl_context *ctx) +@@ -3758,38 +3801,34 @@ nbctl_lr_policy_del(struct ctl_context *ctx) /* If uuid was specified, delete routing policy with the * specified uuid. */ if (ctx->argc == 3) { @@ -20884,7 +28516,7 @@ index d19e1b6c6..dc0c50854 100644 return; } -@@ -3798,14 +3835,7 @@ nbctl_lr_policy_del(struct ctl_context *ctx) +@@ -3798,14 +3837,7 @@ nbctl_lr_policy_del(struct ctl_context *ctx) struct nbrec_logical_router_policy *routing_policy = lr->policies[i]; if (priority == routing_policy->priority && !strcmp(ctx->argv[3], routing_policy->match)) { @@ -20900,7 +28532,28 @@ index d19e1b6c6..dc0c50854 100644 return; } } -@@ -3884,6 +3914,47 @@ nbctl_lr_policy_list(struct ctl_context *ctx) +@@ -3833,11 +3865,15 @@ static void + print_routing_policy(const struct nbrec_logical_router_policy *policy, + struct ds *s) + { +- if (policy->nexthop != NULL) { +- char *next_hop = normalize_prefix_str(policy->nexthop); +- ds_put_format(s, "%10"PRId64" %50s %15s %25s", policy->priority, +- policy->match, policy->action, next_hop); +- free(next_hop); ++ if (policy->n_nexthops) { ++ ds_put_format(s, "%10"PRId64" %50s %15s", policy->priority, ++ policy->match, policy->action); ++ for (int i = 0; i < policy->n_nexthops; i++) { ++ char *next_hop = normalize_prefix_str(policy->nexthops[i]); ++ char *fmt = i ? ", %s" : " %25s"; ++ ds_put_format(s, fmt, next_hop); ++ free(next_hop); ++ } + } else { + ds_put_format(s, "%10"PRId64" %50s %15s", policy->priority, + policy->match, policy->action); +@@ -3884,6 +3920,47 @@ nbctl_lr_policy_list(struct ctl_context *ctx) } free(policies); } @@ -20948,7 +28601,7 @@ index d19e1b6c6..dc0c50854 100644 static void nbctl_lr_route_add(struct ctl_context *ctx) -@@ -3927,44 +3998,42 @@ nbctl_lr_route_add(struct ctl_context *ctx) +@@ -3927,44 +4004,42 @@ nbctl_lr_route_add(struct ctl_context *ctx) goto cleanup; } @@ -21019,7 +28672,7 @@ index d19e1b6c6..dc0c50854 100644 goto cleanup; } -@@ -3981,12 +4050,25 @@ nbctl_lr_route_add(struct ctl_context *ctx) +@@ -3981,12 +4056,27 @@ nbctl_lr_route_add(struct ctl_context *ctx) if (policy) { nbrec_logical_router_static_route_set_policy(route, policy); } @@ -21039,7 +28692,9 @@ index d19e1b6c6..dc0c50854 100644 goto cleanup; } + } else if (route) { -+ ctl_error(ctx, "duplicate nexthop for the same ECMP route"); ++ if (!may_exist) { ++ ctl_error(ctx, "duplicate nexthop for the same ECMP route"); ++ } + goto cleanup; } @@ -21047,7 +28702,7 @@ index d19e1b6c6..dc0c50854 100644 route = nbrec_logical_router_static_route_insert(ctx->txn); nbrec_logical_router_static_route_set_ip_prefix(route, prefix); nbrec_logical_router_static_route_set_nexthop(route, next_hop); -@@ -4004,15 +4086,19 @@ nbctl_lr_route_add(struct ctl_context *ctx) +@@ -4004,15 +4094,19 @@ nbctl_lr_route_add(struct ctl_context *ctx) nbrec_logical_router_static_route_set_options(route, &options); } @@ -21076,7 +28731,7 @@ index d19e1b6c6..dc0c50854 100644 cleanup: free(next_hop); -@@ -4069,11 +4155,8 @@ nbctl_lr_route_del(struct ctl_context *ctx) +@@ -4069,11 +4163,8 @@ nbctl_lr_route_del(struct ctl_context *ctx) output_port = ctx->argv[4]; } @@ -21090,7 +28745,7 @@ index d19e1b6c6..dc0c50854 100644 /* Compare route policy, if specified. */ if (policy) { char *nb_policy = lr->static_routes[i]->policy; -@@ -4082,7 +4165,6 @@ nbctl_lr_route_del(struct ctl_context *ctx) +@@ -4082,7 +4173,6 @@ nbctl_lr_route_del(struct ctl_context *ctx) nb_is_src_route = true; } if (is_src_route != nb_is_src_route) { @@ -21098,7 +28753,7 @@ index d19e1b6c6..dc0c50854 100644 continue; } } -@@ -4093,14 +4175,12 @@ nbctl_lr_route_del(struct ctl_context *ctx) +@@ -4093,14 +4183,12 @@ nbctl_lr_route_del(struct ctl_context *ctx) normalize_prefix_str(lr->static_routes[i]->ip_prefix); if (!rt_prefix) { /* Ignore existing prefix we couldn't parse. */ @@ -21113,7 +28768,7 @@ index d19e1b6c6..dc0c50854 100644 continue; } } -@@ -4111,13 +4191,11 @@ nbctl_lr_route_del(struct ctl_context *ctx) +@@ -4111,13 +4199,11 @@ nbctl_lr_route_del(struct ctl_context *ctx) normalize_prefix_str(lr->static_routes[i]->nexthop); if (!rt_nexthop) { /* Ignore existing nexthop we couldn't parse. */ @@ -21127,7 +28782,7 @@ index d19e1b6c6..dc0c50854 100644 continue; } } -@@ -4126,18 +4204,17 @@ nbctl_lr_route_del(struct ctl_context *ctx) +@@ -4126,18 +4212,17 @@ nbctl_lr_route_del(struct ctl_context *ctx) if (output_port) { char *rt_output_port = lr->static_routes[i]->output_port; if (!rt_output_port || strcmp(output_port, rt_output_port)) { @@ -21152,7 +28807,7 @@ index d19e1b6c6..dc0c50854 100644 ctl_error(ctx, "no matching route: policy '%s', prefix '%s', nexthop " "'%s', output_port '%s'.", policy ? policy : "any", -@@ -4146,8 +4223,6 @@ nbctl_lr_route_del(struct ctl_context *ctx) +@@ -4146,8 +4231,6 @@ nbctl_lr_route_del(struct ctl_context *ctx) output_port ? output_port : "any"); } @@ -21161,7 +28816,7 @@ index d19e1b6c6..dc0c50854 100644 free(prefix); free(nexthop); } -@@ -4418,12 +4493,7 @@ nbctl_lr_nat_add(struct ctl_context *ctx) +@@ -4418,12 +4501,7 @@ nbctl_lr_nat_add(struct ctl_context *ctx) smap_destroy(&nat_options); /* Insert the NAT into the logical router. */ @@ -21175,7 +28830,7 @@ index d19e1b6c6..dc0c50854 100644 cleanup: free(new_logical_ip); -@@ -4459,17 +4529,11 @@ nbctl_lr_nat_del(struct ctl_context *ctx) +@@ -4459,17 +4537,11 @@ nbctl_lr_nat_del(struct ctl_context *ctx) if (ctx->argc == 3) { /*Deletes all NATs with the specified type. */ @@ -21195,7 +28850,7 @@ index d19e1b6c6..dc0c50854 100644 return; } -@@ -4491,13 +4555,7 @@ nbctl_lr_nat_del(struct ctl_context *ctx) +@@ -4491,13 +4563,7 @@ nbctl_lr_nat_del(struct ctl_context *ctx) continue; } if (!strcmp(nat_type, nat->type) && !strcmp(nat_ip, old_ip)) { @@ -21210,7 +28865,7 @@ index d19e1b6c6..dc0c50854 100644 should_return = true; } free(old_ip); -@@ -4667,20 +4725,18 @@ lrp_by_name_or_uuid(struct ctl_context *ctx, const char *id, bool must_exist, +@@ -4667,20 +4733,18 @@ lrp_by_name_or_uuid(struct ctl_context *ctx, const char *id, bool must_exist, /* Returns the logical router that contains 'lrp'. */ static char * OVS_WARN_UNUSED_RESULT @@ -21237,7 +28892,7 @@ index d19e1b6c6..dc0c50854 100644 } /* Can't happen because of the database schema */ -@@ -4777,15 +4833,7 @@ nbctl_lrp_set_gateway_chassis(struct ctl_context *ctx) +@@ -4777,15 +4841,7 @@ nbctl_lrp_set_gateway_chassis(struct ctl_context *ctx) nbrec_gateway_chassis_set_priority(gc, priority); /* Insert the logical gateway chassis into the logical router port. */ @@ -21254,7 +28909,7 @@ index d19e1b6c6..dc0c50854 100644 free(gc_name); } -@@ -4802,14 +4850,7 @@ remove_gc(const struct nbrec_logical_router_port *lrp, size_t idx) +@@ -4802,14 +4858,7 @@ remove_gc(const struct nbrec_logical_router_port *lrp, size_t idx) * will actually cause the gateway chassis to be deleted when the * transaction is sent to the database server (due to garbage * collection). */ @@ -21270,7 +28925,7 @@ index d19e1b6c6..dc0c50854 100644 } /* Delete 'gc' from the IDL. This won't have a real effect on -@@ -4893,6 +4934,7 @@ static void +@@ -4893,6 +4942,7 @@ static void nbctl_lrp_add(struct ctl_context *ctx) { bool may_exist = shash_find(&ctx->options, "--may-exist") != NULL; @@ -21278,7 +28933,7 @@ index d19e1b6c6..dc0c50854 100644 const struct nbrec_logical_router *lr = NULL; char *error = lr_by_name_or_uuid(ctx, ctx->argv[1], true, &lr); -@@ -4942,7 +4984,7 @@ nbctl_lrp_add(struct ctl_context *ctx) +@@ -4942,7 +4992,7 @@ nbctl_lrp_add(struct ctl_context *ctx) } const struct nbrec_logical_router *bound_lr; @@ -21287,7 +28942,7 @@ index d19e1b6c6..dc0c50854 100644 if (error) { ctx->error = error; return; -@@ -5040,31 +5082,27 @@ nbctl_lrp_add(struct ctl_context *ctx) +@@ -5040,31 +5090,27 @@ nbctl_lrp_add(struct ctl_context *ctx) } /* Insert the logical port into the logical router. */ @@ -21332,7 +28987,7 @@ index d19e1b6c6..dc0c50854 100644 /* Delete 'lrp' from the IDL. This won't have a real effect on * the database server (the IDL will suppress it in fact) but it -@@ -5090,18 +5128,13 @@ nbctl_lrp_del(struct ctl_context *ctx) +@@ -5090,18 +5136,13 @@ nbctl_lrp_del(struct ctl_context *ctx) /* Find the router that contains 'lrp', then delete it. */ const struct nbrec_logical_router *lr; @@ -21357,7 +29012,7 @@ index d19e1b6c6..dc0c50854 100644 } /* Print a list of logical router ports. */ -@@ -5275,7 +5308,7 @@ fwd_group_to_logical_switch(struct ctl_context *ctx, +@@ -5275,7 +5316,7 @@ fwd_group_to_logical_switch(struct ctl_context *ctx, } const struct nbrec_logical_switch *ls; @@ -21366,7 +29021,7 @@ index d19e1b6c6..dc0c50854 100644 if (error) { ctx->error = error; return NULL; -@@ -5350,7 +5383,7 @@ nbctl_fwd_group_add(struct ctl_context *ctx) +@@ -5350,7 +5391,7 @@ nbctl_fwd_group_add(struct ctl_context *ctx) return; } if (lsp) { @@ -21375,7 +29030,7 @@ index d19e1b6c6..dc0c50854 100644 if (error) { ctx->error = error; return; -@@ -5373,15 +5406,7 @@ nbctl_fwd_group_add(struct ctl_context *ctx) +@@ -5373,15 +5414,7 @@ nbctl_fwd_group_add(struct ctl_context *ctx) nbrec_forwarding_group_set_liveness(fwd_group, true); } @@ -21392,7 +29047,7 @@ index d19e1b6c6..dc0c50854 100644 } static void -@@ -5403,14 +5428,8 @@ nbctl_fwd_group_del(struct ctl_context *ctx) +@@ -5403,14 +5436,8 @@ nbctl_fwd_group_del(struct ctl_context *ctx) for (int i = 0; i < ls->n_forwarding_groups; ++i) { if (!strcmp(ls->forwarding_groups[i]->name, fwd_group->name)) { @@ -21409,7 +29064,7 @@ index d19e1b6c6..dc0c50854 100644 nbrec_forwarding_group_delete(fwd_group); return; } -@@ -5498,17 +5517,27 @@ struct ipv4_route { +@@ -5498,17 +5525,27 @@ struct ipv4_route { const struct nbrec_logical_router_static_route *route; }; @@ -21442,7 +29097,7 @@ index d19e1b6c6..dc0c50854 100644 } return route_cmp_details(route1p->route, route2p->route); } -@@ -5519,16 +5548,22 @@ struct ipv6_route { +@@ -5519,16 +5556,22 @@ struct ipv6_route { const struct nbrec_logical_router_static_route *route; }; @@ -21469,7 +29124,7 @@ index d19e1b6c6..dc0c50854 100644 if (ret) { return ret; } -@@ -5536,7 +5571,8 @@ ipv6_route_cmp(const void *route1_, const void *route2_) +@@ -5536,7 +5579,8 @@ ipv6_route_cmp(const void *route1_, const void *route2_) } static void @@ -21479,7 +29134,7 @@ index d19e1b6c6..dc0c50854 100644 { char *prefix = normalize_prefix_str(route->ip_prefix); -@@ -5558,6 +5594,19 @@ print_route(const struct nbrec_logical_router_static_route *route, struct ds *s) +@@ -5558,6 +5602,19 @@ print_route(const struct nbrec_logical_router_static_route *route, struct ds *s) if (smap_get(&route->external_ids, "ic-learned-route")) { ds_put_format(s, " (learned)"); } @@ -21499,7 +29154,7 @@ index d19e1b6c6..dc0c50854 100644 ds_put_char(s, '\n'); } -@@ -5623,7 +5672,16 @@ nbctl_lr_route_list(struct ctl_context *ctx) +@@ -5623,7 +5680,16 @@ nbctl_lr_route_list(struct ctl_context *ctx) ds_put_cstr(&ctx->output, "IPv4 Routes\n"); } for (int i = 0; i < n_ipv4_routes; i++) { @@ -21517,7 +29172,7 @@ index d19e1b6c6..dc0c50854 100644 } if (n_ipv6_routes) { -@@ -5631,7 +5689,16 @@ nbctl_lr_route_list(struct ctl_context *ctx) +@@ -5631,7 +5697,16 @@ nbctl_lr_route_list(struct ctl_context *ctx) n_ipv4_routes ? "\n" : ""); } for (int i = 0; i < n_ipv6_routes; i++) { @@ -21535,7 +29190,7 @@ index d19e1b6c6..dc0c50854 100644 } free(ipv4_routes); -@@ -6007,17 +6074,7 @@ cmd_ha_ch_grp_add_chassis(struct ctl_context *ctx) +@@ -6007,17 +6082,7 @@ cmd_ha_ch_grp_add_chassis(struct ctl_context *ctx) nbrec_ha_chassis_set_chassis_name(ha_chassis, chassis_name); nbrec_ha_chassis_set_priority(ha_chassis, priority); @@ -21554,7 +29209,7 @@ index d19e1b6c6..dc0c50854 100644 } static void -@@ -6032,11 +6089,9 @@ cmd_ha_ch_grp_remove_chassis(struct ctl_context *ctx) +@@ -6032,11 +6097,9 @@ cmd_ha_ch_grp_remove_chassis(struct ctl_context *ctx) const char *chassis_name = ctx->argv[2]; struct nbrec_ha_chassis *ha_chassis = NULL; @@ -21566,7 +29221,7 @@ index d19e1b6c6..dc0c50854 100644 break; } } -@@ -6047,14 +6102,7 @@ cmd_ha_ch_grp_remove_chassis(struct ctl_context *ctx) +@@ -6047,14 +6110,7 @@ cmd_ha_ch_grp_remove_chassis(struct ctl_context *ctx) return; } @@ -21582,7 +29237,7 @@ index d19e1b6c6..dc0c50854 100644 nbrec_ha_chassis_delete(ha_chassis); } -@@ -6231,7 +6279,7 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands, +@@ -6231,7 +6287,7 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands, struct ovsdb_idl_txn *txn; enum ovsdb_idl_txn_status status; struct ovsdb_symbol_table *symtab; @@ -21591,7 +29246,7 @@ index d19e1b6c6..dc0c50854 100644 struct ctl_command *c; struct shash_node *node; int64_t next_cfg = 0; -@@ -6268,25 +6316,26 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands, +@@ -6268,25 +6324,26 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands, ds_init(&c->output); c->table = NULL; } @@ -21628,7 +29283,7 @@ index d19e1b6c6..dc0c50854 100644 SHASH_FOR_EACH (node, &symtab->sh) { struct ovsdb_symbol *symbol = node->data; -@@ -6317,14 +6366,14 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands, +@@ -6317,14 +6374,14 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands, if (status == TXN_UNCHANGED || status == TXN_SUCCESS) { for (c = commands; c < &commands[n_commands]; c++) { if (c->syntax->postprocess) { @@ -21649,7 +29304,7 @@ index d19e1b6c6..dc0c50854 100644 } } } -@@ -6412,6 +6461,7 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands, +@@ -6412,6 +6469,7 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands, done: ; } @@ -21657,7 +29312,7 @@ index d19e1b6c6..dc0c50854 100644 ovsdb_symbol_table_destroy(symtab); ovsdb_idl_txn_destroy(txn); the_idl_txn = NULL; -@@ -6429,6 +6479,7 @@ out_error: +@@ -6429,6 +6487,7 @@ out_error: ovsdb_idl_txn_destroy(txn); the_idl_txn = NULL; @@ -21665,7 +29320,7 @@ index d19e1b6c6..dc0c50854 100644 ovsdb_symbol_table_destroy(symtab); return error; } -@@ -6561,7 +6612,7 @@ static const struct ctl_command_syntax nbctl_commands[] = { +@@ -6561,7 +6620,7 @@ static const struct ctl_command_syntax nbctl_commands[] = { /* logical router route commands. */ { "lr-route-add", 3, 4, "ROUTER PREFIX NEXTHOP [PORT]", NULL, nbctl_lr_route_add, NULL, "--may-exist,--ecmp,--ecmp-symmetric-reply," @@ -21674,7 +29329,7 @@ index d19e1b6c6..dc0c50854 100644 { "lr-route-del", 1, 4, "ROUTER [PREFIX [NEXTHOP [PORT]]]", NULL, nbctl_lr_route_del, NULL, "--if-exists,--policy=", RW }, { "lr-route-list", 1, 1, "ROUTER", NULL, nbctl_lr_route_list, NULL, -@@ -6588,7 +6639,7 @@ static const struct ctl_command_syntax nbctl_commands[] = { +@@ -6588,7 +6647,7 @@ static const struct ctl_command_syntax nbctl_commands[] = { nbctl_lr_nat_set_ext_ips, NULL, "--is-exempted", RW}, /* load balancer commands. */ { "lb-add", 3, 4, "LB VIP[:PORT] IP[:PORT]... [PROTOCOL]", NULL, @@ -21683,6 +29338,30 @@ index d19e1b6c6..dc0c50854 100644 { "lb-del", 1, 2, "LB [VIP]", NULL, nbctl_lb_del, NULL, "--if-exists", RW }, { "lb-list", 0, 1, "[LB]", NULL, nbctl_lb_list, NULL, "", RO }, +@@ -6897,6 +6956,15 @@ server_loop(struct ovsdb_idl *idl, int argc, char *argv[]) + server_cmd_init(idl, &exiting); + + for (;;) { ++ memory_run(); ++ if (memory_should_report()) { ++ struct simap usage = SIMAP_INITIALIZER(&usage); ++ ++ /* Nothing special to report yet. */ ++ memory_report(&usage); ++ simap_destroy(&usage); ++ } ++ + ovsdb_idl_run(idl); + if (!ovsdb_idl_is_alive(idl)) { + int retval = ovsdb_idl_get_last_error(idl); +@@ -6912,6 +6980,7 @@ server_loop(struct ovsdb_idl *idl, int argc, char *argv[]) + break; + } + ++ memory_wait(); + ovsdb_idl_wait(idl); + unixctl_server_wait(server); + poll_block(); diff --git a/utilities/ovn-sbctl.c b/utilities/ovn-sbctl.c index 0a1b9ffdc..c38e8ec3b 100644 --- a/utilities/ovn-sbctl.c @@ -22087,3 +29766,27 @@ index 6fad36512..fb88bc06c 100644 } } ds_destroy(&s); +diff --git a/utilities/ovndb-servers.ocf b/utilities/ovndb-servers.ocf +index 7351c7d64..eba9c97a1 100755 +--- a/utilities/ovndb-servers.ocf ++++ b/utilities/ovndb-servers.ocf +@@ -259,6 +259,9 @@ ovsdb_server_notify() { + ovn-nbctl -- --id=@conn_uuid create Connection \ + target="p${NB_MASTER_PROTO}\:${NB_MASTER_PORT}\:${LISTEN_ON_IP}" \ + inactivity_probe=$INACTIVE_PROBE -- set NB_Global . connections=@conn_uuid ++ else ++ CONN_UID=$(sed -e 's/^\[//' -e 's/\]$//' <<< ${conn}) ++ ovn-nbctl set connection "${CONN_UID}" target="p${NB_MASTER_PROTO}\:${NB_MASTER_PORT}\:${LISTEN_ON_IP}" + fi + + conn=`ovn-sbctl get SB_global . connections` +@@ -267,6 +270,9 @@ inactivity_probe=$INACTIVE_PROBE -- set NB_Global . connections=@conn_uuid + ovn-sbctl -- --id=@conn_uuid create Connection \ + target="p${SB_MASTER_PROTO}\:${SB_MASTER_PORT}\:${LISTEN_ON_IP}" \ + inactivity_probe=$INACTIVE_PROBE -- set SB_Global . connections=@conn_uuid ++ else ++ CONN_UID=$(sed -e 's/^\[//' -e 's/\]$//' <<< ${conn}) ++ ovn-sbctl set connection "${CONN_UID}" target="p${SB_MASTER_PROTO}\:${SB_MASTER_PORT}\:${LISTEN_ON_IP}" + fi + + else diff --git a/SPECS/ovn2.13.spec b/SPECS/ovn2.13.spec index 1b900e5..2eb1782 100644 --- a/SPECS/ovn2.13.spec +++ b/SPECS/ovn2.13.spec @@ -51,7 +51,7 @@ Summary: Open Virtual Network support Group: System Environment/Daemons URL: http://www.ovn.org/ Version: 20.12.0 -Release: 85%{?commit0:.%{date}git%{shortcommit0}}%{?dist} +Release: 135%{?commit0:.%{date}git%{shortcommit0}}%{?dist} Provides: openvswitch%{pkgver}-ovn-common = %{?epoch:%{epoch}:}%{version}-%{release} Obsoletes: openvswitch%{pkgver}-ovn-common < 2.11.0-1 @@ -62,8 +62,8 @@ License: ASL 2.0 and LGPLv2+ and SISSL # Always pull an upstream release, since this is what we rebase to. Source: https://github.com/ovn-org/ovn/archive/v%{version}.tar.gz#/ovn-%{version}.tar.gz -%define ovscommit ac09cbfcb70ac6f443f039d5934448bd80f74493 -%define ovsshortcommit ac09cbf +%define ovscommit ac85cdb38c1f33e7952bc4c0347d6c7873fb56a1 +%define ovsshortcommit ac85cdb Source10: https://github.com/openvswitch/ovs/archive/%{ovscommit}.tar.gz#/openvswitch-%{ovsshortcommit}.tar.gz %define ovsdir ovs-%{ovscommit} @@ -526,6 +526,206 @@ fi %{_unitdir}/ovn-controller-vtep.service %changelog +* Thu May 27 2021 Dumitru Ceara - 20.12.0-135 +- if-status: Add OVS interface status management module. (#1952846) + [ddfe75df4b14b512867c588572b10d35ea0b50ca] + +* Thu May 27 2021 Dumitru Ceara - 20.12.0-134 +- ovn-controller.c: Remove extra local_lports_changed setting. + [bf4c5b17298bfcb1153f895ca31ca2feefa6e4aa] + +* Thu May 27 2021 Dumitru Ceara - 20.12.0-133 +- lflow-cache: Make max cache memory usage configurable. + [7b0ccd29113d506a54823b96ef017481c98d03c2] + +* Thu May 27 2021 Dumitru Ceara - 20.12.0-132 +- Add memory reports to all OVN processes. + [3524b7fc49fe022d06b940c772b758677b8ebbc3] + +* Thu May 27 2021 Dumitru Ceara - 20.12.0-131 +- lflow-cache: Make maximum number of cache entries configurable. + [38053ea1feeb1606b54803ac5d1aacc2fdd4504e] + +* Thu May 27 2021 Dumitru Ceara - 20.12.0-130 +- lflow-cache: Reclaim heap memory after cache flush. + [a3aed41abfd7afa3bbb42cc105b9b62f05717f3d] + +* Thu May 27 2021 Dumitru Ceara - 20.12.0-129 +- lflow-cache: Add coverage counters. + [258d9478fd874a9ec062b1f837a2d78c404eaaac] + +* Thu May 27 2021 Dumitru Ceara - 20.12.0-128 +- lflow: Do not cache non-conjunctive flows that use address sets/portgroups. + [3b2aa66f4aeb2b566c5200807f7894754d4c4fda] + +* Thu May 27 2021 Dumitru Ceara - 20.12.0-127 +- lflow-cache: Add unit tests. + [86703dd084b15e45307f91d96208400f9d0430ee] + +* Thu May 27 2021 Dumitru Ceara - 20.12.0-126 +- lflow-cache: Add lflow-cache/show-stats command. + [a19997f019f4e6633d67e0e312154d76726f2640] + +* Thu May 27 2021 Dumitru Ceara - 20.12.0-125 +- lflow-cache: Move the lflow cache to its own module. + [33268f1c94cef1a7d94318e1c90d9b855475b9f9] + +* Thu May 27 2021 Dumitru Ceara - 20.12.0-124 +- lflow: Refactor convert_match_to_expr() to explicitly consume prereqs. + [bef2f4cecdf131165fdc0c436bd546488d564830] + +* Thu May 27 2021 Dumitru Ceara - 20.12.0-123 +- lflow: Fix cache update when I-P engine aborts. + [ab4524752d7b86f7b710425f26419772914674da] + +* Fri May 21 2021 Lorenzo Bianconi - 20.12.0-122 +- physical: do not forward traffic from localport to a localnet one + [da4700e521479ddbb07761e01e82763a60a990fb] + +* Thu May 20 2021 Lorenzo Bianconi - 20.12.0-121 +- ovn-nbctl: do not report an error for duplicated ecmp routes with --may-exist + [b401645ad521562d7a4269f6fc3d874a57708483] + +* Wed May 19 2021 Lorenzo Bianconi - 20.12.0-120 +- controller: fix physical flow update for localport + [49f09532dfa56a85ad622dc47aad2d16ceb770aa] + +* Tue May 18 2021 Mark Michelson - 20.12.0-119 +- expr: crush the result of a sorted OR expression. + [801534ac9568dce795e55601717559dbca05f4f0] + +* Fri May 14 2021 Ilya Maximets - 20.12.0-118 +- Fix compilation error introduced in the previous commit. + [274a26cb2b88ac9d80d87ae46e32501fb9c08cdd] + +* Fri May 14 2021 Ilya Maximets - 20.12.0-117 +- northd: Combine router arp flows. (#1945415) + [9baa16e04aad24f700f655612e904b7e47d1f170] + +* Thu May 13 2021 Numan Siddique - 20.12.0-116 +- ovn-controller: Ensure br-int is using secure fail-mode (#1957025) + [be765562706390a20af4986f1cc8b3349ff1a027] + +* Tue May 11 2021 Numan Siddique - 20.12.0-115 +- northd: Support flow offloading for logical switches with no ACLs. (#1955191) + [a8ee0c731ab1fa1af6bcc00203d471b16ee62cf7] + +* Mon May 10 2021 Numan Siddique - 20.12.0-114 +- northd: Provide the option to not use ct.inv in lflows. + [0a2dc0c4ac49beda1d343a60d7022072704ca371] + +* Mon May 10 2021 Numan Siddique - 20.12.0-113 +- northd: Optimize ct nat for load balancer traffic. + [fad197f1349474b68a58040e071331e0cd97ddb9] + +* Fri Apr 30 2021 Dumitru Ceara - 20.12.0-112 +- binding: Don't reset expected seqno for interfaces already being installed. (#1946420) + [9b5f742edfb9c6b0e1c55c2c44d35a1f2190b82b] + +* Mon Apr 26 2021 Dumitru Ceara - 20.12.0-111 +- tests: Improve test "IGMP snoop/querier/relay". (#1941067) + [46bd5f2391aa7222dcb03300e2e8368621c8cd81] + +* Mon Apr 26 2021 Dumitru Ceara - 20.12.0-110 +- pinctrl: Fix race condition when explicitly clearing IGMP groups. + [985efa3be5a225b14e73304ca212e4e5b12f3642] + +* Thu Apr 22 2021 Lorenzo Bianconi - 20.12.0-109 +- ovn-nbctl: dump next-hop for router policies + [96b0f3c11474df6cc97b50697c2c762b81bbb77d] + +* Wed Apr 21 2021 Numan Siddique - 20.12.0-108 +- ovn-ctl: stop databases with stop_ovn_daemon() (#1944239) + [f136c580c542eda471847e2be27a71ae57a52bd7] + +* Wed Apr 21 2021 Numan Siddique - 20.12.0-107 +- ovn-lib: harmonize stop_ovn_daemon() with ovs-lib + [3ac0d56793347cf80f935afa41105e4567747356] + +* Wed Apr 21 2021 Numan Siddique - 20.12.0-106 +- tests: Fix frequent failure of "4 HV, 1 LS, 1 LR, packet test with HA distributed router gateway port:". + [adb1912ae08d15e55b621b431a0b5b1d51813d15] + +* Wed Apr 21 2021 Numan Siddique - 20.12.0-105 +- controller: Monitor all logical flows that refer to datapath groups. (#1947056) + [efb2e29b06abd765608e4b29cfd936ed34c46f19] + +* Wed Apr 14 2021 Numan Siddique - 20.12.0-104 +- controller: Fix virtual lport I-P handling. (#1947823) + [67ee7d519a98c2b4c25b122a9278214476820805] + +* Tue Apr 13 2021 Mark Michelson - 20.12.0-103 +- Re-bump the submodule to what it was previously. + [a295332936418f12a49beafdb88dfdbda71d5eb3] + +* Tue Apr 13 2021 Gerrit Code Review - 20.12.0-102 +- Merge changes from topic 'python_refactor' into ovn2.13 + [46c6f06eec15223d8df7800f5ed0c91ce1b52ac6] + +* Tue Apr 13 2021 Lorenzo Bianconi - 20.12.0-101 +- northd: introduce per-lb lb_skip_snat option + [3a4fa507bafbf139c354977801b0731edc5992f3] + +* Thu Apr 08 2021 Mark Michelson - 20.12.0-100 +- ovs: Bump submodule version to include latest ovsdb-idl fixes. + [962bb3c50df6b15c0b8055d6ce9db3cea58e1007] + +* Thu Apr 01 2021 Numan Siddique - 20.12.0-99 +- binding: Fix the crashes seen when port binding type changes. (#1936328) + [c65983b297e764d3c25ce9218caf376b3f310142] + +* Thu Apr 01 2021 Lorenzo Bianconi - 20.12.0-98 +- northd: Restore flows that recirculate packets in the router DNAT zone. + [dd5151cee607f52a9d19c88110b7b3eb8b504ad2] + +* Thu Mar 25 2021 Numan Siddique - 20.12.0-97 +- Fix connection string in case of changes in the ovndb-servers.ocf RA + [25a39da8045d4a4766a13994db9c2283a0f129aa] + +* Thu Mar 25 2021 Numan Siddique - 20.12.0-96 +- pinctrl: Don't send gARPs for localports (#1939470) + [2d1f464440a1dd63949fdb17ba7ad781fdef07a3] + +* Thu Mar 25 2021 Lorenzo Bianconi - 20.12.0-95 +- northd: introduce lrouter_check_nat_entry routine + [ce03aeb06f4f27eb6ba3018ad0f89970287b24af] + +* Thu Mar 25 2021 Lorenzo Bianconi - 20.12.0-94 +- northd: introduce build_lrouter_ingress_flow routine + [cacd1b1096739977885ce05885c3ac6f1002efbe] + +* Thu Mar 25 2021 Lorenzo Bianconi - 20.12.0-93 +- northd: introduce build_lrouter_out_snat_flow routine + [d2b51b2cb542f7043cc5bf2d186ca6722b12d737] + +* Thu Mar 25 2021 Lorenzo Bianconi - 20.12.0-92 +- northd: introduce build_lrouter_out_undnat_flow routine + [d4973babbe064652eed5c27a6a82bf995a6390ff] + +* Thu Mar 25 2021 Lorenzo Bianconi - 20.12.0-91 +- northd: introduce build_lrouter_in_dnat_flow routine + [a55aa2020232ba82c84cf321027ee9eef0334a1e] + +* Thu Mar 25 2021 Lorenzo Bianconi - 20.12.0-90 +- northd: introduce build_lrouter_in_unsnat_flow routine + [6b6a872f2ccce4df7ec91d60db8e569fd33f6e65] + +* Thu Mar 25 2021 Lorenzo Bianconi - 20.12.0-89 +- northd: introduce build_lrouter_lb_flows routine + [f10a2cf001ffb015c238f8a27daf16734bba16fc] + +* Thu Mar 25 2021 Lorenzo Bianconi - 20.12.0-88 +- northd: reduce indentation in build_lrouter_nat_defrag_and_lb + [f6c083e885938b3ec5ac03a382bc07f49e3fa7ea] + +* Wed Mar 17 2021 Mark Michelson - 20.12.0-87 +- Add 'redhat' folder to distribution exceptions. + [db67a2356f6d320025a5adcad5980d8dfa00d4ee] + +* Wed Mar 17 2021 Lorenzo Bianconi - 20.12.0-86 +- controller: introduce stats counters for ovn-controller incremental processing + [22b5f6a9989819e19e6ec85e70a74e8b3a09c8fb] + * Tue Mar 16 2021 Mark Michelson - 20.12.0-85 - ovs: Bump submodule version to latest ovsdb-cs changes. [a79aa8ecc00450ab9c672dbe8add9a8a231186ab]