diff --git a/.ci/linux-prepare.sh b/.ci/linux-prepare.sh index 0bb0ff096..83ad3958b 100755 --- a/.ci/linux-prepare.sh +++ b/.ci/linux-prepare.sh @@ -12,5 +12,5 @@ set -ev git clone git://git.kernel.org/pub/scm/devel/sparse/sparse.git cd sparse && make -j4 HAVE_LLVM= HAVE_SQLITE= install && cd .. -pip install --disable-pip-version-check --user six flake8 hacking -pip install --user --upgrade docutils +pip3 install --disable-pip-version-check --user flake8 hacking sphinx pyOpenSSL +pip3 install --upgrade --user docutils diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f3a53a8b6..91bd1e538 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,7 +13,6 @@ jobs: dependencies: | automake libtool gcc bc libjemalloc1 libjemalloc-dev \ libssl-dev llvm-dev libelf-dev libnuma-dev libpcap-dev \ - python3-openssl python3-pip python3-sphinx \ selinux-policy-dev m32_dependecies: gcc-multilib CC: ${{ matrix.compiler }} @@ -88,11 +87,21 @@ jobs: if: matrix.m32 != '' run: sudo apt install -y ${{ env.m32_dependecies }} + - name: update PATH + run: | + echo "$HOME/bin" >> $GITHUB_PATH + echo "$HOME/.local/bin" >> $GITHUB_PATH + + - name: set up python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: prepare run: ./.ci/linux-prepare.sh - name: build - run: PATH="$PATH:$HOME/bin" ./.ci/linux-build.sh + run: ./.ci/linux-build.sh - name: copy logs on failure if: failure() || cancelled() @@ -145,10 +154,18 @@ jobs: ref: 'master' - name: install dependencies run: brew install automake libtool + - name: update PATH + run: | + echo "$HOME/bin" >> $GITHUB_PATH + echo "$HOME/.local/bin" >> $GITHUB_PATH + - name: set up python + uses: actions/setup-python@v2 + with: + python-version: '3.x' - name: prepare run: ./.ci/osx-prepare.sh - name: build - run: PATH="$PATH:$HOME/bin" ./.ci/osx-build.sh + run: ./.ci/osx-build.sh - name: upload logs on failure if: failure() uses: actions/upload-artifact@v2 diff --git a/Makefile.am b/Makefile.am index 80247b62d..1fe730dc4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -221,6 +221,7 @@ dist-hook-git: distfiles grep -v '\.gitattributes$$' | \ grep -v '\.gitmodules$$' | \ grep -v "$(submodules)" | \ + grep -v 'redhat' | \ LC_ALL=C sort -u > all-gitfiles; \ LC_ALL=C comm -1 -3 distfiles all-gitfiles > missing-distfiles; \ if test -s missing-distfiles; then \ @@ -332,7 +333,7 @@ check-tabs: @cd $(srcdir); \ if test -e .git && (git --version) >/dev/null 2>&1 && \ grep -ln "^ " \ - `git ls-files | grep -v $(submodules) \ + `git ls-files | grep -v $(submodules) | grep -v redhat \ | grep -v -f build-aux/initial-tab-whitelist` /dev/null \ | $(EGREP) -v ':[ ]*/?\*'; \ then \ diff --git a/NEWS b/NEWS index 5372668bf..530c5d42f 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,13 @@ +Post-v21.03.0 +------------------------- + - ovn-northd-ddlog: New implementation of northd, based on DDlog. This + implementation is incremental, meaning that it only recalculates what is + needed for the southbound database when northbound changes occur. It is + expected to scale better than the C implementation, for large deployments. + (This may take testing and tuning to be effective.) This version of OVN + requires DDLog 0.36. + - Introduce ovn-controller incremetal processing engine statistics + OVN v21.03.0 - 12 Mar 2021 ------------------------- - Support ECMP multiple nexthops for reroute router policies. diff --git a/configure.ac b/configure.ac index 37b476d53..f3de6fef2 100644 --- a/configure.ac +++ b/configure.ac @@ -13,7 +13,7 @@ # limitations under the License. AC_PREREQ(2.63) -AC_INIT(ovn, 21.03.0, bugs@openvswitch.org) +AC_INIT(ovn, 21.03.1, bugs@openvswitch.org) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([build-aux]) AC_CONFIG_HEADERS([config.h]) diff --git a/controller/binding.c b/controller/binding.c index 4e6c75696..514f5f33f 100644 --- a/controller/binding.c +++ b/controller/binding.c @@ -597,6 +597,23 @@ remove_local_lport_ids(const struct sbrec_port_binding *pb, } } +/* Corresponds to each Port_Binding.type. */ +enum en_lport_type { + LP_UNKNOWN, + LP_VIF, + LP_CONTAINER, + LP_PATCH, + LP_L3GATEWAY, + LP_LOCALNET, + LP_LOCALPORT, + LP_L2GATEWAY, + LP_VTEP, + LP_CHASSISREDIRECT, + LP_VIRTUAL, + LP_EXTERNAL, + LP_REMOTE +}; + /* Local bindings. binding.c module binds the logical port (represented by * Port_Binding rows) and sets the 'chassis' column when it sees the * OVS interface row (of type "" or "internal") with the @@ -608,134 +625,180 @@ remove_local_lport_ids(const struct sbrec_port_binding *pb, * 'struct local_binding' is used. A shash of these local bindings is * maintained with the 'external_ids:iface-id' as the key to the shash. * - * struct local_binding (defined in binding.h) has 3 main fields: - * - type - * - OVS interface row object - * - Port_Binding row object - * - * An instance of 'struct local_binding' can be one of 3 types. - * - * BT_VIF: Represent a local binding for an OVS interface of - * type "" or "internal" with the external_ids:iface-id - * set. - * - * This can be a - * * probable local binding - external_ids:iface-id is - * set, but the corresponding Port_Binding row is not - * created or is not visible to the local ovn-controller - * instance. - * - * * a local binding - external_ids:iface-id is set and - * which is already bound to the corresponding Port_Binding - * row. - * - * It maintains a list of children - * (of type BT_CONTAINER/BT_VIRTUAL) if any. - * - * BT_CONTAINER: Represents a local binding which has a parent of type - * BT_VIF. Its Port_Binding row's 'parent' column is set to - * its parent's Port_Binding. It shares the OVS interface row - * with the parent. - * Each ovn-controller when it sees a container Port_Binding, - * it creates 'struct local_binding' for the parent - * Port_Binding and for its even if the OVS interface row for - * the parent is not present. - * - * BT_VIRTUAL: Represents a local binding which has a parent of type BT_VIF. - * Its Port_Binding type is "virtual" and it shares the OVS - * interface row with the parent. - * Port_Binding of type "virtual" is claimed by pinctrl module - * when it sees the ARP packet from the parent's VIF. - * + * struct local_binding has 3 main fields: + * - name : 'external_ids:iface-id' of the OVS interface (key). + * - OVS interface row object. + * - List of 'binding_lport' objects with the primary lport + * in the front of the list (if present). * * An object of 'struct local_binding' is created: - * - For each interface that has iface-id configured with the type - BT_VIF. - * - * - For each container Port Binding (of type BT_CONTAINER) and its - * parent Port_Binding (of type BT_VIF), no matter if - * they are bound to this chassis i.e even if OVS interface row for the - * parent is not present. + * - For each interface that has external_ids:iface-id configured. * - * - For each 'virtual' Port Binding (of type BT_VIRTUAL) provided its parent - * is bound to this chassis. + * - For each port binding (also referred as lport) of type 'LP_VIF' + * if it is a parent lport of container lports even if there is no + * corresponding OVS interface. */ +struct local_binding { + char *name; + const struct ovsrec_interface *iface; + struct ovs_list binding_lports; +}; -static struct local_binding * -local_binding_create(const char *name, const struct ovsrec_interface *iface, - const struct sbrec_port_binding *pb, - enum local_binding_type type) -{ - struct local_binding *lbinding = xzalloc(sizeof *lbinding); - lbinding->name = xstrdup(name); - lbinding->type = type; - lbinding->pb = pb; - lbinding->iface = iface; - shash_init(&lbinding->children); - return lbinding; -} - -static void -local_binding_add(struct shash *local_bindings, struct local_binding *lbinding) -{ - shash_add(local_bindings, lbinding->name, lbinding); -} +/* This structure represents a logical port (or port binding) + * which is associated with 'struct local_binding'. + * + * An instance of 'struct binding_lport' is created for a logical port + * - If the OVS interface's iface-id corresponds to the logical port. + * - If it is a container or virtual logical port and its parent + * has a 'local binding'. + * + */ +struct binding_lport { + struct ovs_list list_node; /* Node in local_binding.binding_lports. */ -static void -local_binding_destroy(struct local_binding *lbinding) -{ - local_bindings_destroy(&lbinding->children); + char *name; + const struct sbrec_port_binding *pb; + struct local_binding *lbinding; + enum en_lport_type type; +}; - free(lbinding->name); - free(lbinding); -} +static struct local_binding *local_binding_create( + const char *name, const struct ovsrec_interface *); +static void local_binding_add(struct shash *local_bindings, + struct local_binding *); +static struct local_binding *local_binding_find( + struct shash *local_bindings, const char *name); +static void local_binding_destroy(struct local_binding *, + struct shash *binding_lports); +static void local_binding_delete(struct local_binding *, + struct shash *local_bindings, + struct shash *binding_lports); +static struct binding_lport *local_binding_add_lport( + struct shash *binding_lports, + struct local_binding *, + const struct sbrec_port_binding *, + enum en_lport_type); +static struct binding_lport *local_binding_get_primary_lport( + struct local_binding *); +static bool local_binding_handle_stale_binding_lports( + struct local_binding *lbinding, struct binding_ctx_in *b_ctx_in, + struct binding_ctx_out *b_ctx_out, struct hmap *qos_map); + +static struct binding_lport *binding_lport_create( + const struct sbrec_port_binding *, + struct local_binding *, enum en_lport_type); +static void binding_lport_destroy(struct binding_lport *); +static void binding_lport_delete(struct shash *binding_lports, + struct binding_lport *); +static void binding_lport_add(struct shash *binding_lports, + struct binding_lport *); +static struct binding_lport *binding_lport_find( + struct shash *binding_lports, const char *lport_name); +static const struct sbrec_port_binding *binding_lport_get_parent_pb( + struct binding_lport *b_lprt); +static struct binding_lport *binding_lport_check_and_cleanup( + struct binding_lport *, struct shash *b_lports); + +static char *get_lport_type_str(enum en_lport_type lport_type); void -local_bindings_init(struct shash *local_bindings) +local_binding_data_init(struct local_binding_data *lbinding_data) { - shash_init(local_bindings); + shash_init(&lbinding_data->bindings); + shash_init(&lbinding_data->lports); } void -local_bindings_destroy(struct shash *local_bindings) +local_binding_data_destroy(struct local_binding_data *lbinding_data) { struct shash_node *node, *next; - SHASH_FOR_EACH_SAFE (node, next, local_bindings) { + + SHASH_FOR_EACH_SAFE (node, next, &lbinding_data->lports) { + struct binding_lport *b_lport = node->data; + binding_lport_destroy(b_lport); + shash_delete(&lbinding_data->lports, node); + } + + SHASH_FOR_EACH_SAFE (node, next, &lbinding_data->bindings) { struct local_binding *lbinding = node->data; - local_binding_destroy(lbinding); - shash_delete(local_bindings, node); + local_binding_destroy(lbinding, &lbinding_data->lports); + shash_delete(&lbinding_data->bindings, node); } - shash_destroy(local_bindings); + shash_destroy(&lbinding_data->lports); + shash_destroy(&lbinding_data->bindings); } -static -void local_binding_delete(struct shash *local_bindings, - struct local_binding *lbinding) +const struct sbrec_port_binding * +local_binding_get_primary_pb(struct shash *local_bindings, const char *pb_name) { - shash_find_and_delete(local_bindings, lbinding->name); - local_binding_destroy(lbinding); -} + struct local_binding *lbinding = + local_binding_find(local_bindings, pb_name); + struct binding_lport *b_lport = local_binding_get_primary_lport(lbinding); -static void -local_binding_add_child(struct local_binding *lbinding, - struct local_binding *child) -{ - local_binding_add(&lbinding->children, child); - child->parent = lbinding; + return b_lport ? b_lport->pb : NULL; } -static struct local_binding * -local_binding_find_child(struct local_binding *lbinding, - const char *child_name) +void +binding_dump_local_bindings(struct local_binding_data *lbinding_data, + struct ds *out_data) { - return local_binding_find(&lbinding->children, child_name); -} + const struct shash_node **nodes; -static void -local_binding_delete_child(struct local_binding *lbinding, - struct local_binding *child) -{ - shash_find_and_delete(&lbinding->children, child->name); + nodes = shash_sort(&lbinding_data->bindings); + size_t n = shash_count(&lbinding_data->bindings); + + ds_put_cstr(out_data, "Local bindings:\n"); + for (size_t i = 0; i < n; i++) { + const struct shash_node *node = nodes[i]; + struct local_binding *lbinding = node->data; + size_t num_lports = ovs_list_size(&lbinding->binding_lports); + ds_put_format(out_data, "name: [%s], OVS interface name : [%s], " + "num binding lports : [%"PRIuSIZE"]\n", + lbinding->name, + lbinding->iface ? lbinding->iface->name : "NULL", + num_lports); + + if (num_lports) { + struct shash child_lports = SHASH_INITIALIZER(&child_lports); + struct binding_lport *primary_lport = NULL; + struct binding_lport *b_lport; + bool first_elem = true; + + LIST_FOR_EACH (b_lport, list_node, &lbinding->binding_lports) { + if (first_elem && b_lport->type == LP_VIF) { + primary_lport = b_lport; + } else { + shash_add(&child_lports, b_lport->name, b_lport); + } + first_elem = false; + } + + if (primary_lport) { + ds_put_format(out_data, "primary lport : [%s]\n", + primary_lport->name); + } else { + ds_put_format(out_data, "no primary lport\n"); + } + + if (!shash_is_empty(&child_lports)) { + const struct shash_node **c_nodes = + shash_sort(&child_lports); + for (size_t j = 0; j < shash_count(&child_lports); j++) { + b_lport = c_nodes[j]->data; + ds_put_format(out_data, "child lport[%"PRIuSIZE"] : [%s], " + "type : [%s]\n", j + 1, b_lport->name, + get_lport_type_str(b_lport->type)); + } + free(c_nodes); + } + shash_destroy(&child_lports); + } + + ds_put_cstr(out_data, "----------------------------------------\n"); + } + + free(nodes); } static bool @@ -744,12 +807,6 @@ is_lport_vif(const struct sbrec_port_binding *pb) return !pb->type[0]; } -static bool -is_lport_container(const struct sbrec_port_binding *pb) -{ - return is_lport_vif(pb) && pb->parent_port && pb->parent_port[0]; -} - static struct tracked_binding_datapath * tracked_binding_datapath_create(const struct sbrec_datapath_binding *dp, bool is_new, @@ -818,26 +875,13 @@ binding_tracked_dp_destroy(struct hmap *tracked_datapaths) hmap_destroy(tracked_datapaths); } -/* Corresponds to each Port_Binding.type. */ -enum en_lport_type { - LP_UNKNOWN, - LP_VIF, - LP_PATCH, - LP_L3GATEWAY, - LP_LOCALNET, - LP_LOCALPORT, - LP_L2GATEWAY, - LP_VTEP, - LP_CHASSISREDIRECT, - LP_VIRTUAL, - LP_EXTERNAL, - LP_REMOTE -}; - static enum en_lport_type get_lport_type(const struct sbrec_port_binding *pb) { if (is_lport_vif(pb)) { + if (pb->parent_port && pb->parent_port[0]) { + return LP_CONTAINER; + } return LP_VIF; } else if (!strcmp(pb->type, "patch")) { return LP_PATCH; @@ -864,6 +908,41 @@ get_lport_type(const struct sbrec_port_binding *pb) return LP_UNKNOWN; } +static char * +get_lport_type_str(enum en_lport_type lport_type) +{ + switch (lport_type) { + case LP_VIF: + return "VIF"; + case LP_CONTAINER: + return "CONTAINER"; + case LP_VIRTUAL: + return "VIRTUAL"; + case LP_PATCH: + return "PATCH"; + case LP_CHASSISREDIRECT: + return "CHASSISREDIRECT"; + case LP_L3GATEWAY: + return "L3GATEWAT"; + case LP_LOCALNET: + return "PATCH"; + case LP_LOCALPORT: + return "LOCALPORT"; + case LP_L2GATEWAY: + return "L2GATEWAY"; + case LP_EXTERNAL: + return "EXTERNAL"; + case LP_REMOTE: + return "REMOTE"; + case LP_VTEP: + return "VTEP"; + case LP_UNKNOWN: + return "UNKNOWN"; + } + + OVS_NOT_REACHED(); +} + /* For newly claimed ports, if 'notify_up' is 'false': * - set the 'pb.up' field to true if 'pb' has no 'parent_pb'. * - set the 'pb.up' field to true if 'parent_pb.up' is 'true' (e.g., for @@ -991,14 +1070,15 @@ release_lport(const struct sbrec_port_binding *pb, bool sb_readonly, static bool is_lbinding_set(struct local_binding *lbinding) { - return lbinding && lbinding->pb && lbinding->iface; + return lbinding && lbinding->iface; } static bool -is_lbinding_this_chassis(struct local_binding *lbinding, - const struct sbrec_chassis *chassis) +is_binding_lport_this_chassis(struct binding_lport *b_lport, + const struct sbrec_chassis *chassis) { - return lbinding && lbinding->pb && lbinding->pb->chassis == chassis; + return (b_lport && b_lport->pb && chassis && + b_lport->pb->chassis == chassis); } static bool @@ -1010,15 +1090,14 @@ can_bind_on_this_chassis(const struct sbrec_chassis *chassis_rec, || !strcmp(requested_chassis, chassis_rec->hostname); } -/* Returns 'true' if the 'lbinding' has children of type BT_CONTAINER, +/* Returns 'true' if the 'lbinding' has binding lports of type LP_CONTAINER, * 'false' otherwise. */ static bool is_lbinding_container_parent(struct local_binding *lbinding) { - struct shash_node *node; - SHASH_FOR_EACH (node, &lbinding->children) { - struct local_binding *l = node->data; - if (l->type == BT_CONTAINER) { + struct binding_lport *b_lport; + LIST_FOR_EACH (b_lport, list_node, &lbinding->binding_lports) { + if (b_lport->type == LP_CONTAINER) { return true; } } @@ -1027,66 +1106,41 @@ is_lbinding_container_parent(struct local_binding *lbinding) } static bool -release_local_binding_children(const struct sbrec_chassis *chassis_rec, - struct local_binding *lbinding, - bool sb_readonly, - struct hmap *tracked_dp_bindings) -{ - struct shash_node *node; - SHASH_FOR_EACH (node, &lbinding->children) { - struct local_binding *l = node->data; - if (is_lbinding_this_chassis(l, chassis_rec)) { - if (!release_lport(l->pb, sb_readonly, tracked_dp_bindings)) { - return false; - } +release_binding_lport(const struct sbrec_chassis *chassis_rec, + struct binding_lport *b_lport, bool sb_readonly, + struct binding_ctx_out *b_ctx_out) +{ + if (is_binding_lport_this_chassis(b_lport, chassis_rec)) { + remove_local_lport_ids(b_lport->pb, b_ctx_out); + if (!release_lport(b_lport->pb, sb_readonly, + b_ctx_out->tracked_dp_bindings)) { + return false; } - - /* Clear the local bindings' 'iface'. */ - l->iface = NULL; } return true; } -static bool -release_local_binding(const struct sbrec_chassis *chassis_rec, - struct local_binding *lbinding, bool sb_readonly, - struct hmap *tracked_dp_bindings) -{ - if (!release_local_binding_children(chassis_rec, lbinding, - sb_readonly, tracked_dp_bindings)) { - return false; - } - - bool retval = true; - if (is_lbinding_this_chassis(lbinding, chassis_rec)) { - retval = release_lport(lbinding->pb, sb_readonly, tracked_dp_bindings); - } - - lbinding->pb = NULL; - lbinding->iface = NULL; - return retval; -} - static bool consider_vif_lport_(const struct sbrec_port_binding *pb, bool can_bind, const char *vif_chassis, struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out, - struct local_binding *lbinding, + struct binding_lport *b_lport, struct hmap *qos_map) { - bool lbinding_set = is_lbinding_set(lbinding); + bool lbinding_set = b_lport && is_lbinding_set(b_lport->lbinding); + if (lbinding_set) { if (can_bind) { /* We can claim the lport. */ const struct sbrec_port_binding *parent_pb = - lbinding->parent ? lbinding->parent->pb : NULL; + binding_lport_get_parent_pb(b_lport); if (!claim_lport(pb, parent_pb, b_ctx_in->chassis_rec, - lbinding->iface, !b_ctx_in->ovnsb_idl_txn, - !lbinding->parent, - b_ctx_out->tracked_dp_bindings)){ + b_lport->lbinding->iface, + !b_ctx_in->ovnsb_idl_txn, + !parent_pb, b_ctx_out->tracked_dp_bindings)){ return false; } @@ -1098,7 +1152,7 @@ consider_vif_lport_(const struct sbrec_port_binding *pb, b_ctx_out->tracked_dp_bindings); update_local_lport_ids(pb, b_ctx_out); update_local_lports(pb->logical_port, b_ctx_out); - if (lbinding->iface && qos_map && b_ctx_in->ovs_idl_txn) { + if (b_lport->lbinding->iface && qos_map && b_ctx_in->ovs_idl_txn) { get_qos_params(pb, qos_map); } } else { @@ -1136,16 +1190,19 @@ consider_vif_lport(const struct sbrec_port_binding *pb, vif_chassis); if (!lbinding) { - lbinding = local_binding_find(b_ctx_out->local_bindings, + lbinding = local_binding_find(&b_ctx_out->lbinding_data->bindings, pb->logical_port); } + struct binding_lport *b_lport = NULL; if (lbinding) { - lbinding->pb = pb; + struct shash *binding_lports = + &b_ctx_out->lbinding_data->lports; + b_lport = local_binding_add_lport(binding_lports, lbinding, pb, LP_VIF); } return consider_vif_lport_(pb, can_bind, vif_chassis, b_ctx_in, - b_ctx_out, lbinding, qos_map); + b_ctx_out, b_lport, qos_map); } static bool @@ -1154,9 +1211,9 @@ consider_container_lport(const struct sbrec_port_binding *pb, struct binding_ctx_out *b_ctx_out, struct hmap *qos_map) { + struct shash *local_bindings = &b_ctx_out->lbinding_data->bindings; struct local_binding *parent_lbinding; - parent_lbinding = local_binding_find(b_ctx_out->local_bindings, - pb->parent_port); + parent_lbinding = local_binding_find(local_bindings, pb->parent_port); if (!parent_lbinding) { /* There is no local_binding for parent port. Create it @@ -1171,54 +1228,61 @@ consider_container_lport(const struct sbrec_port_binding *pb, * we want the these container ports also be claimed by the * chassis. * */ - parent_lbinding = local_binding_create(pb->parent_port, NULL, NULL, - BT_VIF); - local_binding_add(b_ctx_out->local_bindings, parent_lbinding); + parent_lbinding = local_binding_create(pb->parent_port, NULL); + local_binding_add(local_bindings, parent_lbinding); } - struct local_binding *container_lbinding = - local_binding_find_child(parent_lbinding, pb->logical_port); + struct shash *binding_lports = &b_ctx_out->lbinding_data->lports; + struct binding_lport *container_b_lport = + local_binding_add_lport(binding_lports, parent_lbinding, pb, + LP_CONTAINER); - if (!container_lbinding) { - container_lbinding = local_binding_create(pb->logical_port, - parent_lbinding->iface, - pb, BT_CONTAINER); - local_binding_add_child(parent_lbinding, container_lbinding); - } else { - ovs_assert(container_lbinding->type == BT_CONTAINER); - container_lbinding->pb = pb; - container_lbinding->iface = parent_lbinding->iface; - } + struct binding_lport *parent_b_lport = + binding_lport_find(binding_lports, pb->parent_port); - if (!parent_lbinding->pb) { - parent_lbinding->pb = lport_lookup_by_name( + bool can_consider_c_lport = true; + if (!parent_b_lport || !parent_b_lport->pb) { + const struct sbrec_port_binding *parent_pb = lport_lookup_by_name( b_ctx_in->sbrec_port_binding_by_name, pb->parent_port); - if (parent_lbinding->pb) { + if (parent_pb && get_lport_type(parent_pb) == LP_VIF) { /* Its possible that the parent lport is not considered yet. * So call consider_vif_lport() to process it first. */ - consider_vif_lport(parent_lbinding->pb, b_ctx_in, b_ctx_out, + consider_vif_lport(parent_pb, b_ctx_in, b_ctx_out, parent_lbinding, qos_map); + parent_b_lport = binding_lport_find(binding_lports, + pb->parent_port); } else { - /* The parent lport doesn't exist. Call release_lport() to - * release the container lport, if it was bound earlier. */ - if (is_lbinding_this_chassis(container_lbinding, - b_ctx_in->chassis_rec)) { - return release_lport(pb, !b_ctx_in->ovnsb_idl_txn, - b_ctx_out->tracked_dp_bindings); - } + /* The parent lport doesn't exist. Cannot consider the container + * lport for binding. */ + can_consider_c_lport = false; + } + } - return true; + if (parent_b_lport && parent_b_lport->type != LP_VIF) { + can_consider_c_lport = false; + } + + if (!can_consider_c_lport) { + /* Call release_lport() to release the container lport, + * if it was bound earlier. */ + if (is_binding_lport_this_chassis(container_b_lport, + b_ctx_in->chassis_rec)) { + return release_lport(pb, !b_ctx_in->ovnsb_idl_txn, + b_ctx_out->tracked_dp_bindings); } + + return true; } - const char *vif_chassis = smap_get(&parent_lbinding->pb->options, + ovs_assert(parent_b_lport && parent_b_lport->pb); + const char *vif_chassis = smap_get(&parent_b_lport->pb->options, "requested-chassis"); bool can_bind = can_bind_on_this_chassis(b_ctx_in->chassis_rec, vif_chassis); return consider_vif_lport_(pb, can_bind, vif_chassis, b_ctx_in, b_ctx_out, - container_lbinding, qos_map); + container_b_lport, qos_map); } static bool @@ -1227,46 +1291,58 @@ consider_virtual_lport(const struct sbrec_port_binding *pb, struct binding_ctx_out *b_ctx_out, struct hmap *qos_map) { - struct local_binding * parent_lbinding = - pb->virtual_parent ? local_binding_find(b_ctx_out->local_bindings, + struct shash *local_bindings = &b_ctx_out->lbinding_data->bindings; + struct local_binding *parent_lbinding = + pb->virtual_parent ? local_binding_find(local_bindings, pb->virtual_parent) : NULL; - if (parent_lbinding && !parent_lbinding->pb) { - parent_lbinding->pb = lport_lookup_by_name( - b_ctx_in->sbrec_port_binding_by_name, pb->virtual_parent); - - if (parent_lbinding->pb) { - /* Its possible that the parent lport is not considered yet. - * So call consider_vif_lport() to process it first. */ - consider_vif_lport(parent_lbinding->pb, b_ctx_in, b_ctx_out, - parent_lbinding, qos_map); - } - } - + struct binding_lport *virtual_b_lport = NULL; /* Unlike container lports, we don't have to create parent_lbinding if * it is NULL. This is because, if parent_lbinding is not present, it * means the virtual port can't bind in this chassis. * Note: pinctrl module binds the virtual lport when it sees ARP * packet from the parent lport. */ - struct local_binding *virtual_lbinding = NULL; - if (is_lbinding_this_chassis(parent_lbinding, b_ctx_in->chassis_rec)) { - virtual_lbinding = - local_binding_find_child(parent_lbinding, pb->logical_port); - if (!virtual_lbinding) { - virtual_lbinding = local_binding_create(pb->logical_port, - parent_lbinding->iface, - pb, BT_VIRTUAL); - local_binding_add_child(parent_lbinding, virtual_lbinding); - } else { - ovs_assert(virtual_lbinding->type == BT_VIRTUAL); - virtual_lbinding->pb = pb; - virtual_lbinding->iface = parent_lbinding->iface; + if (parent_lbinding) { + struct shash *binding_lports = &b_ctx_out->lbinding_data->lports; + + struct binding_lport *parent_b_lport = + binding_lport_find(binding_lports, pb->virtual_parent); + + if (!parent_b_lport || !parent_b_lport->pb) { + const struct sbrec_port_binding *parent_pb = lport_lookup_by_name( + b_ctx_in->sbrec_port_binding_by_name, pb->virtual_parent); + + if (parent_pb && get_lport_type(parent_pb) == LP_VIF) { + /* Its possible that the parent lport is not considered yet. + * So call consider_vif_lport() to process it first. */ + consider_vif_lport(parent_pb, b_ctx_in, b_ctx_out, + parent_lbinding, qos_map); + } + } + + parent_b_lport = local_binding_get_primary_lport(parent_lbinding); + if (is_binding_lport_this_chassis(parent_b_lport, + b_ctx_in->chassis_rec)) { + virtual_b_lport = + local_binding_add_lport(binding_lports, parent_lbinding, pb, + LP_VIRTUAL); } } - return consider_vif_lport_(pb, true, NULL, b_ctx_in, b_ctx_out, - virtual_lbinding, qos_map); + if (!consider_vif_lport_(pb, true, NULL, b_ctx_in, b_ctx_out, + virtual_b_lport, qos_map)) { + return false; + } + + /* If the virtual lport is not bound to this chassis, then remove + * its entry from the local_lport_ids if present. This is required + * when a virtual port moves from one chassis to other.*/ + if (!virtual_b_lport) { + remove_local_lport_ids(pb, b_ctx_out); + } + + return true; } /* Considers either claiming the lport or releasing the lport @@ -1407,6 +1483,8 @@ build_local_bindings(struct binding_ctx_in *b_ctx_in, continue; } + struct shash *local_bindings = + &b_ctx_out->lbinding_data->bindings; for (j = 0; j < port_rec->n_interfaces; j++) { const struct ovsrec_interface *iface_rec; @@ -1416,11 +1494,10 @@ build_local_bindings(struct binding_ctx_in *b_ctx_in, if (iface_id && ofport > 0) { struct local_binding *lbinding = - local_binding_find(b_ctx_out->local_bindings, iface_id); + local_binding_find(local_bindings, iface_id); if (!lbinding) { - lbinding = local_binding_create(iface_id, iface_rec, NULL, - BT_VIF); - local_binding_add(b_ctx_out->local_bindings, lbinding); + lbinding = local_binding_create(iface_id, iface_rec); + local_binding_add(local_bindings, lbinding); } else { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); @@ -1431,7 +1508,6 @@ build_local_bindings(struct binding_ctx_in *b_ctx_in, "configuration on interface [%s]", lbinding->iface->name, iface_rec->name, iface_rec->name); - ovs_assert(lbinding->type == BT_VIF); } update_local_lports(iface_id, b_ctx_out); @@ -1494,11 +1570,11 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out) break; case LP_VIF: - if (is_lport_container(pb)) { - consider_container_lport(pb, b_ctx_in, b_ctx_out, qos_map_ptr); - } else { - consider_vif_lport(pb, b_ctx_in, b_ctx_out, NULL, qos_map_ptr); - } + consider_vif_lport(pb, b_ctx_in, b_ctx_out, NULL, qos_map_ptr); + break; + + case LP_CONTAINER: + consider_container_lport(pb, b_ctx_in, b_ctx_out, qos_map_ptr); break; case LP_VIRTUAL: @@ -1799,39 +1875,44 @@ consider_iface_claim(const struct ovsrec_interface *iface_rec, update_local_lports(iface_id, b_ctx_out); smap_replace(b_ctx_out->local_iface_ids, iface_rec->name, iface_id); - struct local_binding *lbinding = - local_binding_find(b_ctx_out->local_bindings, iface_id); + struct shash *local_bindings = &b_ctx_out->lbinding_data->bindings; + struct shash *binding_lports = &b_ctx_out->lbinding_data->lports; + struct local_binding *lbinding = local_binding_find(local_bindings, + iface_id); if (!lbinding) { - lbinding = local_binding_create(iface_id, iface_rec, NULL, BT_VIF); - local_binding_add(b_ctx_out->local_bindings, lbinding); + lbinding = local_binding_create(iface_id, iface_rec); + local_binding_add(local_bindings, lbinding); } else { lbinding->iface = iface_rec; } - if (!lbinding->pb || strcmp(lbinding->name, lbinding->pb->logical_port)) { - lbinding->pb = lport_lookup_by_name( - b_ctx_in->sbrec_port_binding_by_name, lbinding->name); - if (lbinding->pb && !strcmp(lbinding->pb->type, "virtual")) { - lbinding->pb = NULL; + struct binding_lport *b_lport = local_binding_get_primary_lport(lbinding); + const struct sbrec_port_binding *pb = NULL; + if (!b_lport) { + pb = lport_lookup_by_name(b_ctx_in->sbrec_port_binding_by_name, + lbinding->name); + if (pb && get_lport_type(pb) == LP_VIF) { + b_lport = local_binding_add_lport(binding_lports, lbinding, pb, + LP_VIF); } } - if (lbinding->pb) { - if (!consider_vif_lport(lbinding->pb, b_ctx_in, b_ctx_out, - lbinding, qos_map)) { - return false; - } + if (!b_lport) { + /* There is no binding lport for this local binding. */ + return true; + } + + if (!consider_vif_lport(b_lport->pb, b_ctx_in, b_ctx_out, + lbinding, qos_map)) { + return false; } /* Update the child local_binding's iface (if any children) and try to * claim the container lbindings. */ - struct shash_node *node; - SHASH_FOR_EACH (node, &lbinding->children) { - struct local_binding *child = node->data; - child->iface = iface_rec; - if (child->type == BT_CONTAINER) { - if (!consider_container_lport(child->pb, b_ctx_in, b_ctx_out, + LIST_FOR_EACH (b_lport, list_node, &lbinding->binding_lports) { + if (b_lport->type == LP_CONTAINER) { + if (!consider_container_lport(b_lport->pb, b_ctx_in, b_ctx_out, qos_map)) { return false; } @@ -1862,32 +1943,42 @@ consider_iface_release(const struct ovsrec_interface *iface_rec, struct binding_ctx_out *b_ctx_out) { struct local_binding *lbinding; - lbinding = local_binding_find(b_ctx_out->local_bindings, - iface_id); - if (is_lbinding_this_chassis(lbinding, b_ctx_in->chassis_rec)) { + struct shash *local_bindings = &b_ctx_out->lbinding_data->bindings; + struct shash *binding_lports = &b_ctx_out->lbinding_data->lports; + + lbinding = local_binding_find(local_bindings, iface_id); + struct binding_lport *b_lport = local_binding_get_primary_lport(lbinding); + if (is_binding_lport_this_chassis(b_lport, b_ctx_in->chassis_rec)) { struct local_datapath *ld = get_local_datapath(b_ctx_out->local_datapaths, - lbinding->pb->datapath->tunnel_key); + b_lport->pb->datapath->tunnel_key); if (ld) { - remove_pb_from_local_datapath(lbinding->pb, - b_ctx_in->chassis_rec, - b_ctx_out, ld); + remove_pb_from_local_datapath(b_lport->pb, + b_ctx_in->chassis_rec, + b_ctx_out, ld); } - /* Note: release_local_binding() resets lbinding->pb and - * lbinding->iface. - * Cannot access these members of lbinding after this call. */ - if (!release_local_binding(b_ctx_in->chassis_rec, lbinding, - !b_ctx_in->ovnsb_idl_txn, - b_ctx_out->tracked_dp_bindings)) { - return false; + /* Release the primary binding lport and other children lports if + * any. */ + LIST_FOR_EACH (b_lport, list_node, &lbinding->binding_lports) { + if (!release_binding_lport(b_ctx_in->chassis_rec, b_lport, + !b_ctx_in->ovnsb_idl_txn, + b_ctx_out)) { + return false; + } } + + } + + if (lbinding) { + /* Clear the iface of the local binding. */ + lbinding->iface = NULL; } /* Check if the lbinding has children of type PB_CONTAINER. * If so, don't delete the local_binding. */ if (lbinding && !is_lbinding_container_parent(lbinding)) { - local_binding_delete(b_ctx_out->local_bindings, lbinding); + local_binding_delete(lbinding, local_bindings, binding_lports); } remove_local_lports(iface_id, b_ctx_out); @@ -2088,56 +2179,35 @@ handle_deleted_lport(const struct sbrec_port_binding *pb, } } -static struct local_binding * -get_lbinding_for_lport(const struct sbrec_port_binding *pb, - enum en_lport_type lport_type, - struct binding_ctx_out *b_ctx_out) -{ - ovs_assert(lport_type == LP_VIF || lport_type == LP_VIRTUAL); - - if (lport_type == LP_VIF && !is_lport_container(pb)) { - return local_binding_find(b_ctx_out->local_bindings, pb->logical_port); - } - - struct local_binding *parent_lbinding = NULL; - - if (lport_type == LP_VIRTUAL) { - if (pb->virtual_parent) { - parent_lbinding = local_binding_find(b_ctx_out->local_bindings, - pb->virtual_parent); - } - } else { - if (pb->parent_port) { - parent_lbinding = local_binding_find(b_ctx_out->local_bindings, - pb->parent_port); - } - } - - return parent_lbinding - ? local_binding_find(&parent_lbinding->children, pb->logical_port) - : NULL; -} - static bool handle_deleted_vif_lport(const struct sbrec_port_binding *pb, enum en_lport_type lport_type, struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out) { - struct local_binding *lbinding = - get_lbinding_for_lport(pb, lport_type, b_ctx_out); + struct local_binding *lbinding = NULL; + bool bound = false; - if (lbinding) { - lbinding->pb = NULL; - /* The port_binding 'pb' is deleted. So there is no need to - * clear the 'chassis' column of 'pb'. But we need to do - * for the local_binding's children. */ - if (lbinding->type == BT_VIF && - !release_local_binding_children( - b_ctx_in->chassis_rec, lbinding, - !b_ctx_in->ovnsb_idl_txn, - b_ctx_out->tracked_dp_bindings)) { - return false; + struct shash *binding_lports = &b_ctx_out->lbinding_data->lports; + struct binding_lport *b_lport = binding_lport_find(binding_lports, pb->logical_port); + if (b_lport) { + lbinding = b_lport->lbinding; + bound = is_binding_lport_this_chassis(b_lport, b_ctx_in->chassis_rec); + + /* Remove b_lport from local_binding. */ + binding_lport_delete(binding_lports, b_lport); + } + + if (bound && lbinding && lport_type == LP_VIF) { + /* We need to release the container/virtual binding lports (if any) if + * deleted 'pb' type is LP_VIF. */ + struct binding_lport *c_lport; + LIST_FOR_EACH (c_lport, list_node, &lbinding->binding_lports) { + if (!release_binding_lport(b_ctx_in->chassis_rec, c_lport, + !b_ctx_in->ovnsb_idl_txn, + b_ctx_out)) { + return false; + } } } @@ -2147,18 +2217,8 @@ handle_deleted_vif_lport(const struct sbrec_port_binding *pb, * it from local_lports if there is a VIF entry. * consider_iface_release() takes care of removing from the local_lports * when the interface change happens. */ - if (is_lport_container(pb)) { + if (lport_type == LP_CONTAINER) { remove_local_lports(pb->logical_port, b_ctx_out); - - /* If the container port is removed we should also remove it from - * its parent's children set. - */ - if (lbinding) { - if (lbinding->parent) { - local_binding_delete_child(lbinding->parent, lbinding); - } - local_binding_destroy(lbinding); - } } handle_deleted_lport(pb, b_ctx_in, b_ctx_out); @@ -2177,7 +2237,7 @@ handle_updated_vif_lport(const struct sbrec_port_binding *pb, if (lport_type == LP_VIRTUAL) { handled = consider_virtual_lport(pb, b_ctx_in, b_ctx_out, qos_map); - } else if (lport_type == LP_VIF && is_lport_container(pb)) { + } else if (lport_type == LP_CONTAINER) { handled = consider_container_lport(pb, b_ctx_in, b_ctx_out, qos_map); } else { handled = consider_vif_lport(pb, b_ctx_in, b_ctx_out, NULL, qos_map); @@ -2189,14 +2249,14 @@ handle_updated_vif_lport(const struct sbrec_port_binding *pb, bool now_claimed = (pb->chassis == b_ctx_in->chassis_rec); - if (lport_type == LP_VIRTUAL || - (lport_type == LP_VIF && is_lport_container(pb)) || + if (lport_type == LP_VIRTUAL || lport_type == LP_CONTAINER || claimed == now_claimed) { return true; } - struct local_binding *lbinding = - local_binding_find(b_ctx_out->local_bindings, pb->logical_port); + struct shash *local_bindings = &b_ctx_out->lbinding_data->bindings; + struct local_binding *lbinding = local_binding_find(local_bindings, + pb->logical_port); /* If the ovs port backing this binding previously was removed in the * meantime, we won't have a local_binding for it. @@ -2206,12 +2266,11 @@ handle_updated_vif_lport(const struct sbrec_port_binding *pb, return true; } - struct shash_node *node; - SHASH_FOR_EACH (node, &lbinding->children) { - struct local_binding *child = node->data; - if (child->type == BT_CONTAINER) { - handled = consider_container_lport(child->pb, b_ctx_in, b_ctx_out, - qos_map); + struct binding_lport *b_lport; + LIST_FOR_EACH (b_lport, list_node, &lbinding->binding_lports) { + if (b_lport->type == LP_CONTAINER) { + handled = consider_container_lport(b_lport->pb, b_ctx_in, + b_ctx_out, qos_map); if (!handled) { return false; } @@ -2256,12 +2315,25 @@ binding_handle_port_binding_changes(struct binding_ctx_in *b_ctx_in, enum en_lport_type lport_type = get_lport_type(pb); - if (lport_type == LP_VIF) { - if (is_lport_container(pb)) { - shash_add(&deleted_container_pbs, pb->logical_port, pb); - } else { - shash_add(&deleted_vif_pbs, pb->logical_port, pb); + struct binding_lport *b_lport = + binding_lport_find(&b_ctx_out->lbinding_data->lports, + pb->logical_port); + if (b_lport) { + /* If the 'b_lport->type' and 'lport_type' don't match, then update + * the b_lport->type to the updated 'lport_type'. The function + * binding_lport_check_and_cleanup() will cleanup the 'b_lport' + * if required. */ + if (b_lport->type != lport_type) { + b_lport->type = lport_type; } + b_lport = binding_lport_check_and_cleanup( + b_lport, &b_ctx_out->lbinding_data->lports); + } + + if (lport_type == LP_VIF) { + shash_add(&deleted_vif_pbs, pb->logical_port, pb); + } else if (lport_type == LP_CONTAINER) { + shash_add(&deleted_container_pbs, pb->logical_port, pb); } else if (lport_type == LP_VIRTUAL) { shash_add(&deleted_virtual_pbs, pb->logical_port, pb); } else { @@ -2272,7 +2344,7 @@ binding_handle_port_binding_changes(struct binding_ctx_in *b_ctx_in, struct shash_node *node; struct shash_node *node_next; SHASH_FOR_EACH_SAFE (node, node_next, &deleted_container_pbs) { - handled = handle_deleted_vif_lport(node->data, LP_VIF, b_ctx_in, + handled = handle_deleted_vif_lport(node->data, LP_CONTAINER, b_ctx_in, b_ctx_out); shash_delete(&deleted_container_pbs, node); if (!handled) { @@ -2326,12 +2398,33 @@ delete_done: enum en_lport_type lport_type = get_lport_type(pb); + struct binding_lport *b_lport = + binding_lport_find(&b_ctx_out->lbinding_data->lports, + pb->logical_port); + if (b_lport) { + ovs_assert(b_lport->pb == pb); + + if (b_lport->type != lport_type) { + b_lport->type = lport_type; + } + + if (b_lport->lbinding) { + handled = local_binding_handle_stale_binding_lports( + b_lport->lbinding, b_ctx_in, b_ctx_out, qos_map_ptr); + if (!handled) { + /* Backout from the handling. */ + break; + } + } + } + struct local_datapath *ld = get_local_datapath(b_ctx_out->local_datapaths, pb->datapath->tunnel_key); switch (lport_type) { case LP_VIF: + case LP_CONTAINER: case LP_VIRTUAL: handled = handle_updated_vif_lport(pb, lport_type, b_ctx_in, b_ctx_out, qos_map_ptr); @@ -2468,11 +2561,11 @@ binding_init(void) * available. */ void -binding_seqno_run(struct shash *local_bindings) +binding_seqno_run(struct local_binding_data *lbinding_data) { const char *iface_id; const char *iface_id_next; - + struct shash *local_bindings = &lbinding_data->bindings; SSET_FOR_EACH_SAFE (iface_id, iface_id_next, &binding_iface_released_set) { struct shash_node *lb_node = shash_find(local_bindings, iface_id); @@ -2508,16 +2601,17 @@ binding_seqno_run(struct shash *local_bindings) * If so, then this is a newly bound interface, make sure we reset the * Port_Binding 'up' field and the OVS Interface 'external-id'. */ - if (lb && lb->pb && lb->iface) { + struct binding_lport *b_lport = local_binding_get_primary_lport(lb); + if (lb && b_lport && lb->iface) { new_ifaces = true; if (smap_get(&lb->iface->external_ids, OVN_INSTALLED_EXT_ID)) { ovsrec_interface_update_external_ids_delkey( lb->iface, OVN_INSTALLED_EXT_ID); } - if (lb->pb->n_up) { + if (b_lport->pb->n_up) { bool up = false; - sbrec_port_binding_set_up(lb->pb, &up, 1); + sbrec_port_binding_set_up(b_lport->pb, &up, 1); } simap_put(&binding_iface_seqno_map, lb->name, new_seqno); } @@ -2542,12 +2636,13 @@ binding_seqno_run(struct shash *local_bindings) * available. */ void -binding_seqno_install(struct shash *local_bindings) +binding_seqno_install(struct local_binding_data *lbinding_data) { struct ofctrl_acked_seqnos *acked_seqnos = ofctrl_acked_seqnos_get(binding_seq_type_pb_cfg); struct simap_node *node; struct simap_node *node_next; + struct shash *local_bindings = &lbinding_data->bindings; SIMAP_FOR_EACH_SAFE (node, node_next, &binding_iface_seqno_map) { struct shash_node *lb_node = shash_find(local_bindings, node->name); @@ -2557,7 +2652,8 @@ binding_seqno_install(struct shash *local_bindings) } struct local_binding *lb = lb_node->data; - if (!lb->pb || !lb->iface) { + struct binding_lport *b_lport = local_binding_get_primary_lport(lb); + if (!b_lport || !lb->iface) { goto del_seqno; } @@ -2568,14 +2664,12 @@ binding_seqno_install(struct shash *local_bindings) ovsrec_interface_update_external_ids_setkey(lb->iface, OVN_INSTALLED_EXT_ID, "true"); - if (lb->pb->n_up) { + if (b_lport->pb->n_up) { bool up = true; - sbrec_port_binding_set_up(lb->pb, &up, 1); - struct shash_node *child_node; - SHASH_FOR_EACH (child_node, &lb->children) { - struct local_binding *lb_child = child_node->data; - sbrec_port_binding_set_up(lb_child->pb, &up, 1); + sbrec_port_binding_set_up(b_lport->pb, &up, 1); + LIST_FOR_EACH (b_lport, list_node, &lb->binding_lports) { + sbrec_port_binding_set_up(b_lport->pb, &up, 1); } } @@ -2591,3 +2685,305 @@ binding_seqno_flush(void) { simap_clear(&binding_iface_seqno_map); } + +/* Static functions for local_lbindind and binding_lport. */ +static struct local_binding * +local_binding_create(const char *name, const struct ovsrec_interface *iface) +{ + struct local_binding *lbinding = xzalloc(sizeof *lbinding); + lbinding->name = xstrdup(name); + lbinding->iface = iface; + ovs_list_init(&lbinding->binding_lports); + + return lbinding; +} + +static struct local_binding * +local_binding_find(struct shash *local_bindings, const char *name) +{ + return shash_find_data(local_bindings, name); +} + +static void +local_binding_add(struct shash *local_bindings, struct local_binding *lbinding) +{ + shash_add(local_bindings, lbinding->name, lbinding); +} + +static void +local_binding_destroy(struct local_binding *lbinding, + struct shash *binding_lports) +{ + struct binding_lport *b_lport; + LIST_FOR_EACH_POP (b_lport, list_node, &lbinding->binding_lports) { + b_lport->lbinding = NULL; + binding_lport_delete(binding_lports, b_lport); + } + + free(lbinding->name); + free(lbinding); +} + +static void +local_binding_delete(struct local_binding *lbinding, + struct shash *local_bindings, + struct shash *binding_lports) +{ + shash_find_and_delete(local_bindings, lbinding->name); + local_binding_destroy(lbinding, binding_lports); +} + +/* Returns the primary binding lport if present in lbinding's + * binding lports list. A binding lport is considered primary + * if binding lport's type is LP_VIF and the name matches + * with the 'lbinding'. + */ +static struct binding_lport * +local_binding_get_primary_lport(struct local_binding *lbinding) +{ + if (!lbinding) { + return NULL; + } + + if (!ovs_list_is_empty(&lbinding->binding_lports)) { + struct binding_lport *b_lport = NULL; + b_lport = CONTAINER_OF(ovs_list_front(&lbinding->binding_lports), + struct binding_lport, list_node); + + if (b_lport->type == LP_VIF && + !strcmp(lbinding->name, b_lport->name)) { + return b_lport; + } + } + + return NULL; +} + +static struct binding_lport * +local_binding_add_lport(struct shash *binding_lports, + struct local_binding *lbinding, + const struct sbrec_port_binding *pb, + enum en_lport_type b_type) +{ + struct binding_lport *b_lport = + binding_lport_find(binding_lports, pb->logical_port); + bool add_to_lport_list = false; + if (!b_lport) { + b_lport = binding_lport_create(pb, lbinding, b_type); + binding_lport_add(binding_lports, b_lport); + add_to_lport_list = true; + } else if (b_lport->lbinding != lbinding) { + add_to_lport_list = true; + if (!ovs_list_is_empty(&b_lport->list_node)) { + ovs_list_remove(&b_lport->list_node); + } + b_lport->lbinding = lbinding; + b_lport->type = b_type; + } + + if (add_to_lport_list) { + if (b_type == LP_VIF) { + ovs_list_push_front(&lbinding->binding_lports, &b_lport->list_node); + } else { + ovs_list_push_back(&lbinding->binding_lports, &b_lport->list_node); + } + } + + return b_lport; +} + +/* This function handles the stale binding lports of 'lbinding' if 'lbinding' + * doesn't have a primary binding lport. + */ +static bool +local_binding_handle_stale_binding_lports(struct local_binding *lbinding, + struct binding_ctx_in *b_ctx_in, + struct binding_ctx_out *b_ctx_out, + struct hmap *qos_map) +{ + /* Check if this lbinding has a primary binding_lport or not. */ + struct binding_lport *p_lport = local_binding_get_primary_lport(lbinding); + if (p_lport) { + /* Nothing to be done. */ + return true; + } + + bool handled = true; + struct binding_lport *b_lport, *next; + const struct sbrec_port_binding *pb; + LIST_FOR_EACH_SAFE (b_lport, next, list_node, &lbinding->binding_lports) { + /* Get the lport type again from the pb. Its possible that the + * pb type has changed. */ + enum en_lport_type pb_lport_type = get_lport_type(b_lport->pb); + if (b_lport->type == LP_VIRTUAL && pb_lport_type == LP_VIRTUAL) { + pb = b_lport->pb; + binding_lport_delete(&b_ctx_out->lbinding_data->lports, + b_lport); + handled = consider_virtual_lport(pb, b_ctx_in, b_ctx_out, qos_map); + } else if (b_lport->type == LP_CONTAINER && + pb_lport_type == LP_CONTAINER) { + /* For container lport, binding_lport is preserved so that when + * the parent port is created, it can be considered. + * consider_container_lport() creates the binding_lport for the parent + * port (with iface set to NULL). */ + handled = consider_container_lport(b_lport->pb, b_ctx_in, b_ctx_out, qos_map); + } else { + /* This can happen when the lport type changes from one type + * to another. Eg. from normal lport to external. Release the + * lport if it was claimed earlier and delete the b_lport. */ + handled = release_binding_lport(b_ctx_in->chassis_rec, b_lport, + !b_ctx_in->ovnsb_idl_txn, + b_ctx_out); + binding_lport_delete(&b_ctx_out->lbinding_data->lports, + b_lport); + } + + if (!handled) { + return false; + } + } + + return handled; +} + +static struct binding_lport * +binding_lport_create(const struct sbrec_port_binding *pb, + struct local_binding *lbinding, + enum en_lport_type type) +{ + struct binding_lport *b_lport = xzalloc(sizeof *b_lport); + b_lport->name = xstrdup(pb->logical_port); + b_lport->pb = pb; + b_lport->type = type; + b_lport->lbinding = lbinding; + ovs_list_init(&b_lport->list_node); + + return b_lport; +} + +static void +binding_lport_add(struct shash *binding_lports, struct binding_lport *b_lport) +{ + shash_add(binding_lports, b_lport->pb->logical_port, b_lport); +} + +static struct binding_lport * +binding_lport_find(struct shash *binding_lports, const char *lport_name) +{ + if (!lport_name) { + return NULL; + } + + return shash_find_data(binding_lports, lport_name); +} + +static void +binding_lport_destroy(struct binding_lport *b_lport) +{ + if (!ovs_list_is_empty(&b_lport->list_node)) { + ovs_list_remove(&b_lport->list_node); + } + + free(b_lport->name); + free(b_lport); +} + +static void +binding_lport_delete(struct shash *binding_lports, + struct binding_lport *b_lport) +{ + shash_find_and_delete(binding_lports, b_lport->name); + binding_lport_destroy(b_lport); +} + + +static const struct sbrec_port_binding * +binding_lport_get_parent_pb(struct binding_lport *b_lport) +{ + if (!b_lport) { + return NULL; + } + + if (b_lport->type == LP_VIF) { + return NULL; + } + + struct local_binding *lbinding = b_lport->lbinding; + ovs_assert(lbinding); + + struct binding_lport *parent_b_lport = + local_binding_get_primary_lport(lbinding); + + return parent_b_lport ? parent_b_lport->pb : NULL; +} + +/* This function checks and cleans up the 'b_lport' if it is + * not in the correct state. + * + * If the 'b_lport' type is LP_VIF, then its name and its lbinding->name + * should match. Otherwise this should be cleaned up. + * + * If the 'b_lport' type is LP_CONTAINER, then its parent_port name should + * be the same as its lbinding's name. Otherwise this should be + * cleaned up. + * + * If the 'b_lport' type is LP_VIRTUAL, then its virtual parent name + * should be the same as its lbinding's name. Otherwise this + * should be cleaned up. + * + * If the 'b_lport' type is not LP_VIF, LP_CONTAINER or LP_VIRTUAL, it + * should be cleaned up. This can happen if the CMS changes + * the port binding type. + */ +static struct binding_lport * +binding_lport_check_and_cleanup(struct binding_lport *b_lport, + struct shash *binding_lports) +{ + bool cleanup_blport = false; + + if (!b_lport->lbinding) { + cleanup_blport = true; + goto cleanup; + } + + switch (b_lport->type) { + case LP_VIF: + if (strcmp(b_lport->name, b_lport->lbinding->name)) { + cleanup_blport = true; + } + break; + + case LP_CONTAINER: + if (strcmp(b_lport->pb->parent_port, b_lport->lbinding->name)) { + cleanup_blport = true; + } + break; + + case LP_VIRTUAL: + if (!b_lport->pb->virtual_parent || + strcmp(b_lport->pb->virtual_parent, b_lport->lbinding->name)) { + cleanup_blport = true; + } + break; + + case LP_PATCH: + case LP_LOCALPORT: + case LP_VTEP: + case LP_L2GATEWAY: + case LP_L3GATEWAY: + case LP_CHASSISREDIRECT: + case LP_EXTERNAL: + case LP_LOCALNET: + case LP_REMOTE: + case LP_UNKNOWN: + cleanup_blport = true; + } + +cleanup: + if (cleanup_blport) { + binding_lport_delete(binding_lports, b_lport); + return NULL; + } + + return b_lport; +} diff --git a/controller/binding.h b/controller/binding.h index c9ebef4b1..4fc9ef207 100644 --- a/controller/binding.h +++ b/controller/binding.h @@ -36,6 +36,7 @@ struct sbrec_chassis; struct sbrec_port_binding_table; struct sset; struct sbrec_port_binding; +struct ds; struct binding_ctx_in { struct ovsdb_idl_txn *ovnsb_idl_txn; @@ -56,7 +57,7 @@ struct binding_ctx_in { struct binding_ctx_out { struct hmap *local_datapaths; - struct shash *local_bindings; + struct local_binding_data *lbinding_data; /* sset of (potential) local lports. */ struct sset *local_lports; @@ -86,28 +87,16 @@ struct binding_ctx_out { struct hmap *tracked_dp_bindings; }; -enum local_binding_type { - BT_VIF, - BT_CONTAINER, - BT_VIRTUAL +struct local_binding_data { + struct shash bindings; + struct shash lports; }; -struct local_binding { - char *name; - enum local_binding_type type; - const struct ovsrec_interface *iface; - const struct sbrec_port_binding *pb; - - /* shash of 'struct local_binding' representing children. */ - struct shash children; - struct local_binding *parent; -}; +void local_binding_data_init(struct local_binding_data *); +void local_binding_data_destroy(struct local_binding_data *); -static inline struct local_binding * -local_binding_find(struct shash *local_bindings, const char *name) -{ - return shash_find_data(local_bindings, name); -} +const struct sbrec_port_binding *local_binding_get_primary_pb( + struct shash *local_bindings, const char *pb_name); /* Represents a tracked binding logical port. */ struct tracked_binding_lport { @@ -128,8 +117,6 @@ bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn, const struct sbrec_port_binding_table *, const struct sbrec_chassis *); -void local_bindings_init(struct shash *local_bindings); -void local_bindings_destroy(struct shash *local_bindings); bool binding_handle_ovs_interface_changes(struct binding_ctx_in *, struct binding_ctx_out *); bool binding_handle_port_binding_changes(struct binding_ctx_in *, @@ -137,7 +124,8 @@ bool binding_handle_port_binding_changes(struct binding_ctx_in *, void binding_tracked_dp_destroy(struct hmap *tracked_datapaths); void binding_init(void); -void binding_seqno_run(struct shash *local_bindings); -void binding_seqno_install(struct shash *local_bindings); +void binding_seqno_run(struct local_binding_data *lbinding_data); +void binding_seqno_install(struct local_binding_data *lbinding_data); void binding_seqno_flush(void); +void binding_dump_local_bindings(struct local_binding_data *, struct ds *); #endif /* controller/binding.h */ diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml index 51c0c372c..8886df568 100644 --- a/controller/ovn-controller.8.xml +++ b/controller/ovn-controller.8.xml @@ -578,6 +578,28 @@ Displays logical flow cache statistics: enabled/disabled, per cache type entry counts. + +
inc-engine/show-stats
+
+ Display ovn-controller engine counters. For each engine + node the following counters have been added: + +
+ +
inc-engine/clear-stats
+
+ Reset ovn-controller engine counters. +

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index 5dd643f52..9102b9903 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -81,6 +81,7 @@ static unixctl_cb_func cluster_state_reset_cmd; static unixctl_cb_func debug_pause_execution; static unixctl_cb_func debug_resume_execution; static unixctl_cb_func debug_status_execution; +static unixctl_cb_func debug_dump_local_bindings; static unixctl_cb_func lflow_cache_flush_cmd; static unixctl_cb_func lflow_cache_show_stats_cmd; static unixctl_cb_func debug_delay_nb_cfg_report; @@ -1182,8 +1183,7 @@ struct ed_type_runtime_data { /* Contains "struct local_datapath" nodes. */ struct hmap local_datapaths; - /* Contains "struct local_binding" nodes. */ - struct shash local_bindings; + struct local_binding_data lbinding_data; /* Contains the name of each logical port resident on the local * hypervisor. These logical ports include the VIFs (and their child @@ -1222,9 +1222,9 @@ struct ed_type_runtime_data { * | | Interface and Port Binding changes store the | * | @tracked_dp_bindings | changed datapaths (datapaths added/removed from | * | | local_datapaths) and changed port bindings | - * | | (added/updated/deleted in 'local_bindings'). | + * | | (added/updated/deleted in 'lbinding_data'). | * | | So any changes to the runtime data - | - * | | local_datapaths and local_bindings is captured | + * | | local_datapaths and lbinding_data is captured | * | | here. | * ------------------------------------------------------------------------ * | | This is a bool which represents if the runtime | @@ -1251,7 +1251,7 @@ struct ed_type_runtime_data { * * --------------------------------------------------------------------- * | local_datapaths | The changes to these runtime data is captured in | - * | local_bindings | the @tracked_dp_bindings indirectly and hence it | + * | lbinding_data | the @tracked_dp_bindings indirectly and hence it | * | local_lport_ids | is not tracked explicitly. | * --------------------------------------------------------------------- * | local_iface_ids | This is used internally within the runtime data | @@ -1294,7 +1294,7 @@ en_runtime_data_init(struct engine_node *node OVS_UNUSED, sset_init(&data->active_tunnels); sset_init(&data->egress_ifaces); smap_init(&data->local_iface_ids); - local_bindings_init(&data->local_bindings); + local_binding_data_init(&data->lbinding_data); /* Init the tracked data. */ hmap_init(&data->tracked_dp_bindings); @@ -1322,7 +1322,7 @@ en_runtime_data_cleanup(void *data) free(cur_node); } hmap_destroy(&rt_data->local_datapaths); - local_bindings_destroy(&rt_data->local_bindings); + local_binding_data_destroy(&rt_data->lbinding_data); hmapx_destroy(&rt_data->ct_updated_datapaths); } @@ -1405,7 +1405,7 @@ init_binding_ctx(struct engine_node *node, b_ctx_out->local_lport_ids_changed = false; b_ctx_out->non_vif_ports_changed = false; b_ctx_out->egress_ifaces = &rt_data->egress_ifaces; - b_ctx_out->local_bindings = &rt_data->local_bindings; + b_ctx_out->lbinding_data = &rt_data->lbinding_data; b_ctx_out->local_iface_ids = &rt_data->local_iface_ids; b_ctx_out->tracked_dp_bindings = NULL; b_ctx_out->local_lports_changed = NULL; @@ -1449,7 +1449,7 @@ en_runtime_data_run(struct engine_node *node, void *data) free(cur_node); } hmap_clear(local_datapaths); - local_bindings_destroy(&rt_data->local_bindings); + local_binding_data_destroy(&rt_data->lbinding_data); sset_destroy(local_lports); sset_destroy(local_lport_ids); sset_destroy(active_tunnels); @@ -1460,7 +1460,7 @@ en_runtime_data_run(struct engine_node *node, void *data) sset_init(active_tunnels); sset_init(&rt_data->egress_ifaces); smap_init(&rt_data->local_iface_ids); - local_bindings_init(&rt_data->local_bindings); + local_binding_data_init(&rt_data->lbinding_data); hmapx_clear(&rt_data->ct_updated_datapaths); } @@ -1822,7 +1822,7 @@ static void init_physical_ctx(struct engine_node *node, p_ctx->local_lports = &rt_data->local_lports; p_ctx->ct_zones = ct_zones; p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve; - p_ctx->local_bindings = &rt_data->local_bindings; + p_ctx->local_bindings = &rt_data->lbinding_data.bindings; p_ctx->ct_updated_datapaths = &rt_data->ct_updated_datapaths; } @@ -2685,7 +2685,8 @@ main(int argc, char *argv[]) engine_get_internal_data(&en_flow_output); struct ed_type_ct_zones *ct_zones_data = engine_get_internal_data(&en_ct_zones); - struct ed_type_runtime_data *runtime_data = NULL; + struct ed_type_runtime_data *runtime_data = + engine_get_internal_data(&en_runtime_data); ofctrl_init(&flow_output_data->group_table, &flow_output_data->meter_table, @@ -2738,6 +2739,10 @@ main(int argc, char *argv[]) unixctl_command_register("debug/delay-nb-cfg-report", "SECONDS", 1, 1, debug_delay_nb_cfg_report, &delay_nb_cfg_report); + unixctl_command_register("debug/dump-local-bindings", "", 0, 0, + debug_dump_local_bindings, + &runtime_data->lbinding_data); + unsigned int ovs_cond_seqno = UINT_MAX; unsigned int ovnsb_cond_seqno = UINT_MAX; unsigned int ovnsb_expected_cond_seqno = UINT_MAX; @@ -2955,7 +2960,7 @@ main(int argc, char *argv[]) ovnsb_cond_seqno, ovnsb_expected_cond_seqno)); if (runtime_data && ovs_idl_txn && ovnsb_idl_txn) { - binding_seqno_run(&runtime_data->local_bindings); + binding_seqno_run(&runtime_data->lbinding_data); } flow_output_data = engine_get_data(&en_flow_output); @@ -2968,7 +2973,7 @@ main(int argc, char *argv[]) } ofctrl_seqno_run(ofctrl_get_cur_cfg()); if (runtime_data && ovs_idl_txn && ovnsb_idl_txn) { - binding_seqno_install(&runtime_data->local_bindings); + binding_seqno_install(&runtime_data->lbinding_data); } } @@ -3408,3 +3413,13 @@ debug_delay_nb_cfg_report(struct unixctl_conn *conn, int argc OVS_UNUSED, unixctl_command_reply(conn, "no delay for nb_cfg report."); } } + +static void +debug_dump_local_bindings(struct unixctl_conn *conn, int argc OVS_UNUSED, + const char *argv[] OVS_UNUSED, void *local_bindings) +{ + struct ds binding_data = DS_EMPTY_INITIALIZER; + binding_dump_local_bindings(local_bindings, &binding_data); + unixctl_command_reply(conn, ds_cstr(&binding_data)); + ds_destroy(&binding_data); +} diff --git a/controller/physical.c b/controller/physical.c index fa5d0d692..874d1ee27 100644 --- a/controller/physical.c +++ b/controller/physical.c @@ -1839,20 +1839,19 @@ physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx, continue; } - const struct local_binding *lb = - local_binding_find(p_ctx->local_bindings, iface_id); - - if (!lb || !lb->pb) { + const struct sbrec_port_binding *lb_pb = + local_binding_get_primary_pb(p_ctx->local_bindings, iface_id); + if (!lb_pb) { continue; } int64_t ofport = iface_rec->n_ofport ? *iface_rec->ofport : 0; if (ovsrec_interface_is_deleted(iface_rec)) { - ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid); + ofctrl_remove_flows(flow_table, &lb_pb->header_.uuid); simap_find_and_delete(&localvif_to_ofport, iface_id); } else { if (!ovsrec_interface_is_new(iface_rec)) { - ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid); + ofctrl_remove_flows(flow_table, &lb_pb->header_.uuid); } simap_put(&localvif_to_ofport, iface_id, ofport); @@ -1860,7 +1859,7 @@ physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx, p_ctx->mff_ovn_geneve, p_ctx->ct_zones, p_ctx->active_tunnels, p_ctx->local_datapaths, - lb->pb, p_ctx->chassis, + lb_pb, p_ctx->chassis, flow_table, &ofpacts); } } diff --git a/controller/pinctrl.c b/controller/pinctrl.c index b42288ea5..523a45b9a 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c @@ -4240,6 +4240,12 @@ send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn, struct shash *nat_addresses) { volatile struct garp_rarp_data *garp_rarp = NULL; + + /* Skip localports as they don't need to be announced */ + if (!strcmp(binding_rec->type, "localport")) { + return; + } + /* Update GARP for NAT IP if it exists. Consider port bindings with type * "l3gateway" for logical switch ports attached to gateway routers, and * port bindings with type "patch" for logical switch ports attached to diff --git a/debian/changelog b/debian/changelog index 51f9bcc91..25a04f8ae 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +ovn (21.03.1-1) unstable; urgency=low + + * New upstream version + + -- OVN team Fri, 12 Mar 2021 12:00:00 -0500 + ovn (21.03.0-1) unstable; urgency=low * New upstream version diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h index 017176f98..d44b30b30 100644 --- a/include/ovn/logical-fields.h +++ b/include/ovn/logical-fields.h @@ -66,6 +66,7 @@ enum mff_log_flags_bits { MLF_LOOKUP_MAC_BIT = 6, MLF_LOOKUP_LB_HAIRPIN_BIT = 7, MLF_LOOKUP_FDB_BIT = 8, + MLF_SKIP_SNAT_FOR_LB_BIT = 9, }; /* MFF_LOG_FLAGS_REG flag assignments */ @@ -102,6 +103,10 @@ enum mff_log_flags { /* Indicate that the lookup in the fdb table was successful. */ MLF_LOOKUP_FDB = (1 << MLF_LOOKUP_FDB_BIT), + + /* Indicate that a packet must not SNAT in the gateway router when + * load-balancing has taken place. */ + MLF_SKIP_SNAT_FOR_LB = (1 << MLF_SKIP_SNAT_FOR_LB_BIT), }; /* OVN logical fields diff --git a/lib/inc-proc-eng.c b/lib/inc-proc-eng.c index 916dbbe39..a6337a1d9 100644 --- a/lib/inc-proc-eng.c +++ b/lib/inc-proc-eng.c @@ -27,6 +27,7 @@ #include "openvswitch/hmap.h" #include "openvswitch/vlog.h" #include "inc-proc-eng.h" +#include "unixctl.h" VLOG_DEFINE_THIS_MODULE(inc_proc_eng); @@ -102,6 +103,40 @@ engine_get_nodes(struct engine_node *node, size_t *n_count) return engine_topo_sort(node, NULL, n_count, &n_size); } +static void +engine_clear_stats(struct unixctl_conn *conn, int argc OVS_UNUSED, + const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED) +{ + for (size_t i = 0; i < engine_n_nodes; i++) { + struct engine_node *node = engine_nodes[i]; + + memset(&node->stats, 0, sizeof node->stats); + } + unixctl_command_reply(conn, NULL); +} + +static void +engine_dump_stats(struct unixctl_conn *conn, int argc OVS_UNUSED, + const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED) +{ + struct ds dump = DS_EMPTY_INITIALIZER; + + for (size_t i = 0; i < engine_n_nodes; i++) { + struct engine_node *node = engine_nodes[i]; + + ds_put_format(&dump, + "Node: %s\n" + "- recompute: %12"PRIu64"\n" + "- compute: %12"PRIu64"\n" + "- abort: %12"PRIu64"\n", + node->name, node->stats.recompute, + node->stats.compute, node->stats.abort); + } + unixctl_command_reply(conn, ds_cstr(&dump)); + + ds_destroy(&dump); +} + void engine_init(struct engine_node *node, struct engine_arg *arg) { @@ -115,6 +150,11 @@ engine_init(struct engine_node *node, struct engine_arg *arg) engine_nodes[i]->data = NULL; } } + + unixctl_command_register("inc-engine/show-stats", "", 0, 0, + engine_dump_stats, NULL); + unixctl_command_register("inc-engine/clear-stats", "", 0, 0, + engine_clear_stats, NULL); } void @@ -288,6 +328,7 @@ engine_recompute(struct engine_node *node, bool forced, bool allowed) /* Run the node handler which might change state. */ node->run(node, node->data); + node->stats.recompute++; } /* Return true if the node could be computed, false otherwise. */ @@ -312,6 +353,8 @@ engine_compute(struct engine_node *node, bool recompute_allowed) } } } + node->stats.compute++; + return true; } @@ -321,6 +364,7 @@ engine_run_node(struct engine_node *node, bool recompute_allowed) if (!node->n_inputs) { /* Run the node handler which might change state. */ node->run(node, node->data); + node->stats.recompute++; return; } @@ -377,6 +421,7 @@ engine_run(bool recompute_allowed) engine_run_node(engine_nodes[i], recompute_allowed); if (engine_nodes[i]->state == EN_ABORTED) { + engine_nodes[i]->stats.abort++; engine_run_aborted = true; return; } @@ -393,6 +438,7 @@ engine_need_run(void) } engine_nodes[i]->run(engine_nodes[i], engine_nodes[i]->data); + engine_nodes[i]->stats.recompute++; VLOG_DBG("input node: %s, state: %s", engine_nodes[i]->name, engine_node_state_name[engine_nodes[i]->state]); if (engine_nodes[i]->state == EN_UPDATED) { diff --git a/lib/inc-proc-eng.h b/lib/inc-proc-eng.h index 857234677..7e9f5bb70 100644 --- a/lib/inc-proc-eng.h +++ b/lib/inc-proc-eng.h @@ -107,6 +107,12 @@ enum engine_node_state { EN_STATE_MAX, }; +struct engine_stats { + uint64_t recompute; + uint64_t compute; + uint64_t abort; +}; + struct engine_node { /* A unique name for each node. */ char *name; @@ -154,6 +160,9 @@ struct engine_node { /* Method to clear up tracked data maintained by the engine node in the * engine 'data'. It may be NULL. */ void (*clear_tracked_data)(void *tracked_data); + + /* Engine stats. */ + struct engine_stats stats; }; /* Initialize the data for the engine nodes. It calls each node's diff --git a/lib/logical-fields.c b/lib/logical-fields.c index 9d08b44c2..72853013e 100644 --- a/lib/logical-fields.c +++ b/lib/logical-fields.c @@ -121,6 +121,10 @@ ovn_init_symtab(struct shash *symtab) MLF_FORCE_SNAT_FOR_LB_BIT); expr_symtab_add_subfield(symtab, "flags.force_snat_for_lb", NULL, flags_str); + snprintf(flags_str, sizeof flags_str, "flags[%d]", + MLF_SKIP_SNAT_FOR_LB_BIT); + expr_symtab_add_subfield(symtab, "flags.skip_snat_for_lb", NULL, + flags_str); /* Connection tracking state. */ expr_symtab_add_field_scoped(symtab, "ct_mark", MFF_CT_MARK, NULL, false, diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index c272cc922..3300f7180 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -2720,7 +2720,11 @@ icmp6 { (and optional port numbers) to load balance to. If the router is configured to force SNAT any load-balanced packets, the above action will be replaced by flags.force_snat_for_lb = 1; - ct_lb(args);. If health check is enabled, then + ct_lb(args);. + If the load balancing rule is configured with skip_snat + set to true, the above action will be replaced by + flags.skip_snat_for_lb = 1; ct_lb(args);. + If health check is enabled, then args will only contain those endpoints whose service monitor status entry in OVN_Southbound db is either online or empty. @@ -2737,6 +2741,9 @@ icmp6 { with an action of ct_dnat;. If the router is configured to force SNAT any load-balanced packets, the above action will be replaced by flags.force_snat_for_lb = 1; ct_dnat;. + If the load balancing rule is configured with skip_snat + set to true, the above action will be replaced by + flags.skip_snat_for_lb = 1; ct_dnat;.
  • @@ -2751,6 +2758,9 @@ icmp6 { to force SNAT any load-balanced packets, the above action will be replaced by flags.force_snat_for_lb = 1; ct_lb(args);. + If the load balancing rule is configured with skip_snat + set to true, the above action will be replaced by + flags.skip_snat_for_lb = 1; ct_lb(args);.
  • @@ -2763,6 +2773,9 @@ icmp6 { If the router is configured to force SNAT any load-balanced packets, the above action will be replaced by flags.force_snat_for_lb = 1; ct_dnat;. + If the load balancing rule is configured with skip_snat + set to true, the above action will be replaced by + flags.skip_snat_for_lb = 1; ct_dnat;.
  • @@ -3795,6 +3808,15 @@ nd_ns {

  • +
  • +

    + If a load balancer configured to skip snat has been applied to + the Gateway router pipeline, a priority-120 flow matches + flags.skip_snat_for_lb == 1 && ip with an + action next;. +

    +
  • +
  • If the Gateway router in the OVN Northbound database has been diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index 5a2018c2e..4e406c594 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -8573,10 +8573,16 @@ get_force_snat_ip(struct ovn_datapath *od, const char *key_type, return true; } +enum lb_snat_type { + NO_FORCE_SNAT, + FORCE_SNAT, + SKIP_SNAT, +}; + static void add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, struct ds *match, struct ds *actions, int priority, - bool force_snat_for_lb, struct ovn_lb_vip *lb_vip, + enum lb_snat_type snat_type, struct ovn_lb_vip *lb_vip, const char *proto, struct nbrec_load_balancer *lb, struct shash *meter_groups, struct sset *nat_entries) { @@ -8585,9 +8591,10 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, /* A match and actions for new connections. */ char *new_match = xasprintf("ct.new && %s", ds_cstr(match)); - if (force_snat_for_lb) { - char *new_actions = xasprintf("flags.force_snat_for_lb = 1; %s", - ds_cstr(actions)); + if (snat_type == FORCE_SNAT || snat_type == SKIP_SNAT) { + char *new_actions = xasprintf("flags.%s_snat_for_lb = 1; %s", + snat_type == SKIP_SNAT ? "skip" : "force", + ds_cstr(actions)); ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, priority, new_match, new_actions, &lb->header_); free(new_actions); @@ -8598,11 +8605,12 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, /* A match and actions for established connections. */ char *est_match = xasprintf("ct.est && %s", ds_cstr(match)); - if (force_snat_for_lb) { + if (snat_type == FORCE_SNAT || snat_type == SKIP_SNAT) { + char *est_actions = xasprintf("flags.%s_snat_for_lb = 1; ct_dnat;", + snat_type == SKIP_SNAT ? "skip" : "force"); ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, priority, - est_match, - "flags.force_snat_for_lb = 1; ct_dnat;", - &lb->header_); + est_match, est_actions, &lb->header_); + free(est_actions); } else { ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, priority, est_match, "ct_dnat;", &lb->header_); @@ -8675,11 +8683,13 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, ds_put_format(&undnat_match, ") && outport == %s && " "is_chassis_resident(%s)", od->l3dgw_port->json_key, od->l3redirect_port->json_key); - if (force_snat_for_lb) { + if (snat_type == FORCE_SNAT || snat_type == SKIP_SNAT) { + char *action = xasprintf("flags.%s_snat_for_lb = 1; ct_dnat;", + snat_type == SKIP_SNAT ? "skip" : "force"); ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 120, - ds_cstr(&undnat_match), - "flags.force_snat_for_lb = 1; ct_dnat;", + ds_cstr(&undnat_match), action, &lb->header_); + free(action); } else { ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 120, ds_cstr(&undnat_match), "ct_dnat;", @@ -8689,6 +8699,105 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, ds_destroy(&undnat_match); } +static void +build_lrouter_lb_flows(struct hmap *lflows, struct ovn_datapath *od, + struct hmap *lbs, struct shash *meter_groups, + struct sset *nat_entries, struct ds *match, + struct ds *actions) +{ + /* A set to hold all ips that need defragmentation and tracking. */ + struct sset all_ips = SSET_INITIALIZER(&all_ips); + bool lb_force_snat_ip = + !lport_addresses_is_empty(&od->lb_force_snat_addrs); + + for (int i = 0; i < od->nbr->n_load_balancer; i++) { + struct nbrec_load_balancer *nb_lb = od->nbr->load_balancer[i]; + struct ovn_northd_lb *lb = + ovn_northd_lb_find(lbs, &nb_lb->header_.uuid); + ovs_assert(lb); + + bool lb_skip_snat = smap_get_bool(&nb_lb->options, "skip_snat", false); + if (lb_skip_snat) { + ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, + "flags.skip_snat_for_lb == 1 && ip", "next;"); + } + + for (size_t j = 0; j < lb->n_vips; j++) { + struct ovn_lb_vip *lb_vip = &lb->vips[j]; + struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[j]; + ds_clear(actions); + build_lb_vip_actions(lb_vip, lb_vip_nb, actions, + lb->selection_fields, false); + + if (!sset_contains(&all_ips, lb_vip->vip_str)) { + sset_add(&all_ips, lb_vip->vip_str); + /* If there are any load balancing rules, we should send + * the packet to conntrack for defragmentation and + * tracking. This helps with two things. + * + * 1. With tracking, we can send only new connections to + * pick a DNAT ip address from a group. + * 2. If there are L4 ports in load balancing rules, we + * need the defragmentation to match on L4 ports. */ + ds_clear(match); + if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { + ds_put_format(match, "ip && ip4.dst == %s", + lb_vip->vip_str); + } else { + ds_put_format(match, "ip && ip6.dst == %s", + lb_vip->vip_str); + } + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, + 100, ds_cstr(match), "ct_next;", + &nb_lb->header_); + } + + /* Higher priority rules are added for load-balancing in DNAT + * table. For every match (on a VIP[:port]), we add two flows + * via add_router_lb_flow(). One flow is for specific matching + * on ct.new with an action of "ct_lb($targets);". The other + * flow is for ct.est with an action of "ct_dnat;". */ + ds_clear(match); + if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { + ds_put_format(match, "ip && ip4.dst == %s", + lb_vip->vip_str); + } else { + ds_put_format(match, "ip && ip6.dst == %s", + lb_vip->vip_str); + } + + int prio = 110; + bool is_udp = nullable_string_is_equal(nb_lb->protocol, "udp"); + bool is_sctp = nullable_string_is_equal(nb_lb->protocol, + "sctp"); + const char *proto = is_udp ? "udp" : is_sctp ? "sctp" : "tcp"; + + if (lb_vip->vip_port) { + ds_put_format(match, " && %s && %s.dst == %d", proto, + proto, lb_vip->vip_port); + prio = 120; + } + + if (od->l3redirect_port && + (lb_vip->n_backends || !lb_vip->empty_backend_rej)) { + ds_put_format(match, " && is_chassis_resident(%s)", + od->l3redirect_port->json_key); + } + + enum lb_snat_type snat_type = NO_FORCE_SNAT; + if (lb_skip_snat) { + snat_type = SKIP_SNAT; + } else if (lb_force_snat_ip || od->lb_force_snat_router_ip) { + snat_type = FORCE_SNAT; + } + add_router_lb_flow(lflows, od, match, actions, prio, + snat_type, lb_vip, proto, nb_lb, + meter_groups, nat_entries); + } + } + sset_destroy(&all_ips); +} + #define ND_RA_MAX_INTERVAL_MAX 1800 #define ND_RA_MAX_INTERVAL_MIN 4 @@ -11002,668 +11111,643 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op, } } -/* NAT, Defrag and load balancing. */ static void -build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, - struct hmap *lflows, - struct shash *meter_groups, - struct hmap *lbs, - struct ds *match, struct ds *actions) +build_lrouter_in_unsnat_flow(struct hmap *lflows, struct ovn_datapath *od, + const struct nbrec_nat *nat, struct ds *match, + struct ds *actions, bool distributed, bool is_v6) { - if (od->nbr) { + /* Ingress UNSNAT table: It is for already established connections' + * reverse traffic. i.e., SNAT has already been done in egress + * pipeline and now the packet has entered the ingress pipeline as + * part of a reply. We undo the SNAT here. + * + * Undoing SNAT has to happen before DNAT processing. This is + * because when the packet was DNATed in ingress pipeline, it did + * not know about the possibility of eventual additional SNAT in + * egress pipeline. */ + if (strcmp(nat->type, "snat") && strcmp(nat->type, "dnat_and_snat")) { + return; + } - /* Packets are allowed by default. */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;"); - ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;"); - ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;"); - ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;"); - ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;"); - ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;"); - ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;"); - - /* Send the IPv6 NS packets to next table. When ovn-controller - * generates IPv6 NS (for the action - nd_ns{}), the injected - * packet would go through conntrack - which is not required. */ - ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, "nd_ns", "next;"); - - /* NAT rules are only valid on Gateway routers and routers with - * l3dgw_port (router has a port with gateway chassis - * specified). */ - if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { - return; + bool stateless = lrouter_nat_is_stateless(nat); + if (!od->l3dgw_port) { + /* Gateway router. */ + ds_clear(match); + ds_clear(actions); + ds_put_format(match, "ip && ip%s.dst == %s", + is_v6 ? "6" : "4", nat->external_ip); + if (!strcmp(nat->type, "dnat_and_snat") && stateless) { + ds_put_format(actions, "ip%s.dst=%s; next;", + is_v6 ? "6" : "4", nat->logical_ip); + } else { + ds_put_cstr(actions, "ct_snat;"); } - struct sset nat_entries = SSET_INITIALIZER(&nat_entries); + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, + 90, ds_cstr(match), ds_cstr(actions), + &nat->header_); + } else { + /* Distributed router. */ - bool dnat_force_snat_ip = - !lport_addresses_is_empty(&od->dnat_force_snat_addrs); - bool lb_force_snat_ip = - !lport_addresses_is_empty(&od->lb_force_snat_addrs); + /* Traffic received on l3dgw_port is subject to NAT. */ + ds_clear(match); + ds_clear(actions); + ds_put_format(match, "ip && ip%s.dst == %s && inport == %s", + is_v6 ? "6" : "4", nat->external_ip, + od->l3dgw_port->json_key); + if (!distributed && od->l3redirect_port) { + /* Flows for NAT rules that are centralized are only + * programmed on the gateway chassis. */ + ds_put_format(match, " && is_chassis_resident(%s)", + od->l3redirect_port->json_key); + } - for (int i = 0; i < od->nbr->n_nat; i++) { - const struct nbrec_nat *nat; + if (!strcmp(nat->type, "dnat_and_snat") && stateless) { + ds_put_format(actions, "ip%s.dst=%s; next;", + is_v6 ? "6" : "4", nat->logical_ip); + } else { + ds_put_cstr(actions, "ct_snat;"); + } - nat = od->nbr->nat[i]; + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, + 100, ds_cstr(match), ds_cstr(actions), + &nat->header_); + } +} - ovs_be32 ip, mask; - struct in6_addr ipv6, mask_v6, v6_exact = IN6ADDR_EXACT_INIT; - bool is_v6 = false; - bool stateless = lrouter_nat_is_stateless(nat); - struct nbrec_address_set *allowed_ext_ips = - nat->allowed_ext_ips; - struct nbrec_address_set *exempted_ext_ips = - nat->exempted_ext_ips; +static void +build_lrouter_in_dnat_flow(struct hmap *lflows, struct ovn_datapath *od, + const struct nbrec_nat *nat, struct ds *match, + struct ds *actions, bool distributed, + ovs_be32 mask, bool is_v6) +{ + /* Ingress DNAT table: Packets enter the pipeline with destination + * IP address that needs to be DNATted from a external IP address + * to a logical IP address. */ + if (!strcmp(nat->type, "dnat") || !strcmp(nat->type, "dnat_and_snat")) { + bool stateless = lrouter_nat_is_stateless(nat); - if (allowed_ext_ips && exempted_ext_ips) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); - VLOG_WARN_RL(&rl, "NAT rule: "UUID_FMT" not applied, since " - "both allowed and exempt external ips set", - UUID_ARGS(&(nat->header_.uuid))); - continue; + if (!od->l3dgw_port) { + /* Gateway router. */ + /* Packet when it goes from the initiator to destination. + * We need to set flags.loopback because the router can + * send the packet back through the same interface. */ + ds_clear(match); + ds_put_format(match, "ip && ip%s.dst == %s", + is_v6 ? "6" : "4", nat->external_ip); + ds_clear(actions); + if (nat->allowed_ext_ips || nat->exempted_ext_ips) { + lrouter_nat_add_ext_ip_match(od, lflows, match, nat, + is_v6, true, mask); } - char *error = ip_parse_masked(nat->external_ip, &ip, &mask); - if (error || mask != OVS_BE32_MAX) { - free(error); - error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6); - if (error || memcmp(&mask_v6, &v6_exact, sizeof(mask_v6))) { - /* Invalid for both IPv4 and IPv6 */ - static struct vlog_rate_limit rl = - VLOG_RATE_LIMIT_INIT(5, 1); - VLOG_WARN_RL(&rl, "bad external ip %s for nat", - nat->external_ip); - free(error); - continue; - } - /* It was an invalid IPv4 address, but valid IPv6. - * Treat the rest of the handling of this NAT rule - * as IPv6. */ - is_v6 = true; - } - - /* Check the validity of nat->logical_ip. 'logical_ip' can - * be a subnet when the type is "snat". */ - int cidr_bits; - if (is_v6) { - error = ipv6_parse_masked(nat->logical_ip, &ipv6, &mask_v6); - cidr_bits = ipv6_count_cidr_bits(&mask_v6); - } else { - error = ip_parse_masked(nat->logical_ip, &ip, &mask); - cidr_bits = ip_count_cidr_bits(mask); + if (!lport_addresses_is_empty(&od->dnat_force_snat_addrs)) { + /* Indicate to the future tables that a DNAT has taken + * place and a force SNAT needs to be done in the + * Egress SNAT table. */ + ds_put_format(actions, "flags.force_snat_for_dnat = 1; "); } - if (!strcmp(nat->type, "snat")) { - if (error) { - /* Invalid for both IPv4 and IPv6 */ - static struct vlog_rate_limit rl = - VLOG_RATE_LIMIT_INIT(5, 1); - VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat " - "in router "UUID_FMT"", - nat->logical_ip, UUID_ARGS(&od->key)); - free(error); - continue; - } + + if (!strcmp(nat->type, "dnat_and_snat") && stateless) { + ds_put_format(actions, "flags.loopback = 1; " + "ip%s.dst=%s; next;", + is_v6 ? "6" : "4", nat->logical_ip); } else { - if (error || (!is_v6 && mask != OVS_BE32_MAX) - || (is_v6 && memcmp(&mask_v6, &v6_exact, - sizeof mask_v6))) { - /* Invalid for both IPv4 and IPv6 */ - static struct vlog_rate_limit rl = - VLOG_RATE_LIMIT_INIT(5, 1); - VLOG_WARN_RL(&rl, "bad ip %s for dnat in router " - ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key)); - free(error); - continue; + ds_put_format(actions, "flags.loopback = 1; ct_dnat(%s", + nat->logical_ip); + + if (nat->external_port_range[0]) { + ds_put_format(actions, ",%s", nat->external_port_range); } + ds_put_format(actions, ");"); } - /* For distributed router NAT, determine whether this NAT rule - * satisfies the conditions for distributed NAT processing. */ - bool distributed = false; - struct eth_addr mac; - if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") && - nat->logical_port && nat->external_mac) { - if (eth_addr_from_string(nat->external_mac, &mac)) { - distributed = true; - } else { - static struct vlog_rate_limit rl = - VLOG_RATE_LIMIT_INIT(5, 1); - VLOG_WARN_RL(&rl, "bad mac %s for dnat in router " - ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key)); - continue; + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100, + ds_cstr(match), ds_cstr(actions), + &nat->header_); + } else { + /* Distributed router. */ + + /* Traffic received on l3dgw_port is subject to NAT. */ + ds_clear(match); + ds_put_format(match, "ip && ip%s.dst == %s && inport == %s", + is_v6 ? "6" : "4", nat->external_ip, + od->l3dgw_port->json_key); + if (!distributed && od->l3redirect_port) { + /* Flows for NAT rules that are centralized are only + * programmed on the gateway chassis. */ + ds_put_format(match, " && is_chassis_resident(%s)", + od->l3redirect_port->json_key); + } + ds_clear(actions); + if (nat->allowed_ext_ips || nat->exempted_ext_ips) { + lrouter_nat_add_ext_ip_match(od, lflows, match, nat, + is_v6, true, mask); + } + + if (!strcmp(nat->type, "dnat_and_snat") && stateless) { + ds_put_format(actions, "ip%s.dst=%s; next;", + is_v6 ? "6" : "4", nat->logical_ip); + } else { + ds_put_format(actions, "ct_dnat(%s", nat->logical_ip); + if (nat->external_port_range[0]) { + ds_put_format(actions, ",%s", nat->external_port_range); } + ds_put_format(actions, ");"); } - /* Ingress UNSNAT table: It is for already established connections' - * reverse traffic. i.e., SNAT has already been done in egress - * pipeline and now the packet has entered the ingress pipeline as - * part of a reply. We undo the SNAT here. - * - * Undoing SNAT has to happen before DNAT processing. This is - * because when the packet was DNATed in ingress pipeline, it did - * not know about the possibility of eventual additional SNAT in - * egress pipeline. */ - if (!strcmp(nat->type, "snat") - || !strcmp(nat->type, "dnat_and_snat")) { - if (!od->l3dgw_port) { - /* Gateway router. */ - ds_clear(match); - ds_clear(actions); - ds_put_format(match, "ip && ip%s.dst == %s", - is_v6 ? "6" : "4", - nat->external_ip); - if (!strcmp(nat->type, "dnat_and_snat") && stateless) { - ds_put_format(actions, "ip%s.dst=%s; next;", - is_v6 ? "6" : "4", nat->logical_ip); - } else { - ds_put_cstr(actions, "ct_snat;"); - } + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100, + ds_cstr(match), ds_cstr(actions), + &nat->header_); + } + } +} - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, - 90, ds_cstr(match), - ds_cstr(actions), - &nat->header_); - } else { - /* Distributed router. */ +static void +build_lrouter_out_undnat_flow(struct hmap *lflows, struct ovn_datapath *od, + const struct nbrec_nat *nat, struct ds *match, + struct ds *actions, bool distributed, + struct eth_addr mac, bool is_v6) +{ + /* Egress UNDNAT table: It is for already established connections' + * reverse traffic. i.e., DNAT has already been done in ingress + * pipeline and now the packet has entered the egress pipeline as + * part of a reply. We undo the DNAT here. + * + * Note that this only applies for NAT on a distributed router. + * Undo DNAT on a gateway router is done in the ingress DNAT + * pipeline stage. */ + if (!od->l3dgw_port || + (strcmp(nat->type, "dnat") && strcmp(nat->type, "dnat_and_snat"))) { + return; + } - /* Traffic received on l3dgw_port is subject to NAT. */ - ds_clear(match); - ds_clear(actions); - ds_put_format(match, "ip && ip%s.dst == %s" - " && inport == %s", - is_v6 ? "6" : "4", - nat->external_ip, - od->l3dgw_port->json_key); - if (!distributed && od->l3redirect_port) { - /* Flows for NAT rules that are centralized are only - * programmed on the gateway chassis. */ - ds_put_format(match, " && is_chassis_resident(%s)", - od->l3redirect_port->json_key); - } + ds_clear(match); + ds_put_format(match, "ip && ip%s.src == %s && outport == %s", + is_v6 ? "6" : "4", nat->logical_ip, + od->l3dgw_port->json_key); + if (!distributed && od->l3redirect_port) { + /* Flows for NAT rules that are centralized are only + * programmed on the gateway chassis. */ + ds_put_format(match, " && is_chassis_resident(%s)", + od->l3redirect_port->json_key); + } + ds_clear(actions); + if (distributed) { + ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ", + ETH_ADDR_ARGS(mac)); + } - if (!strcmp(nat->type, "dnat_and_snat") && stateless) { - ds_put_format(actions, "ip%s.dst=%s; next;", - is_v6 ? "6" : "4", nat->logical_ip); - } else { - ds_put_cstr(actions, "ct_snat;"); - } + if (!strcmp(nat->type, "dnat_and_snat") && + lrouter_nat_is_stateless(nat)) { + ds_put_format(actions, "ip%s.src=%s; next;", + is_v6 ? "6" : "4", nat->external_ip); + } else { + ds_put_format(actions, "ct_dnat;"); + } - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, - 100, - ds_cstr(match), ds_cstr(actions), - &nat->header_); - } - } + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 100, + ds_cstr(match), ds_cstr(actions), + &nat->header_); +} - /* Ingress DNAT table: Packets enter the pipeline with destination - * IP address that needs to be DNATted from a external IP address - * to a logical IP address. */ - if (!strcmp(nat->type, "dnat") - || !strcmp(nat->type, "dnat_and_snat")) { - if (!od->l3dgw_port) { - /* Gateway router. */ - /* Packet when it goes from the initiator to destination. - * We need to set flags.loopback because the router can - * send the packet back through the same interface. */ - ds_clear(match); - ds_put_format(match, "ip && ip%s.dst == %s", - is_v6 ? "6" : "4", - nat->external_ip); - ds_clear(actions); - if (allowed_ext_ips || exempted_ext_ips) { - lrouter_nat_add_ext_ip_match(od, lflows, match, nat, - is_v6, true, mask); - } +static void +build_lrouter_out_snat_flow(struct hmap *lflows, struct ovn_datapath *od, + const struct nbrec_nat *nat, struct ds *match, + struct ds *actions, bool distributed, + struct eth_addr mac, ovs_be32 mask, + int cidr_bits, bool is_v6) +{ + /* Egress SNAT table: Packets enter the egress pipeline with + * source ip address that needs to be SNATted to a external ip + * address. */ + if (strcmp(nat->type, "snat") && strcmp(nat->type, "dnat_and_snat")) { + return; + } - if (dnat_force_snat_ip) { - /* Indicate to the future tables that a DNAT has taken - * place and a force SNAT needs to be done in the - * Egress SNAT table. */ - ds_put_format(actions, - "flags.force_snat_for_dnat = 1; "); - } + bool stateless = lrouter_nat_is_stateless(nat); + if (!od->l3dgw_port) { + /* Gateway router. */ + ds_clear(match); + ds_put_format(match, "ip && ip%s.src == %s", + is_v6 ? "6" : "4", nat->logical_ip); + ds_clear(actions); - if (!strcmp(nat->type, "dnat_and_snat") && stateless) { - ds_put_format(actions, "flags.loopback = 1; " - "ip%s.dst=%s; next;", - is_v6 ? "6" : "4", nat->logical_ip); - } else { - ds_put_format(actions, "flags.loopback = 1; " - "ct_dnat(%s", nat->logical_ip); + if (nat->allowed_ext_ips || nat->exempted_ext_ips) { + lrouter_nat_add_ext_ip_match(od, lflows, match, nat, + is_v6, false, mask); + } - if (nat->external_port_range[0]) { - ds_put_format(actions, ",%s", - nat->external_port_range); - } - ds_put_format(actions, ");"); - } + if (!strcmp(nat->type, "dnat_and_snat") && stateless) { + ds_put_format(actions, "ip%s.src=%s; next;", + is_v6 ? "6" : "4", nat->external_ip); + } else { + ds_put_format(actions, "ct_snat(%s", nat->external_ip); - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100, - ds_cstr(match), ds_cstr(actions), - &nat->header_); - } else { - /* Distributed router. */ + if (nat->external_port_range[0]) { + ds_put_format(actions, ",%s", + nat->external_port_range); + } + ds_put_format(actions, ");"); + } - /* Traffic received on l3dgw_port is subject to NAT. */ - ds_clear(match); - ds_put_format(match, "ip && ip%s.dst == %s" - " && inport == %s", - is_v6 ? "6" : "4", - nat->external_ip, - od->l3dgw_port->json_key); - if (!distributed && od->l3redirect_port) { - /* Flows for NAT rules that are centralized are only - * programmed on the gateway chassis. */ - ds_put_format(match, " && is_chassis_resident(%s)", - od->l3redirect_port->json_key); - } - ds_clear(actions); - if (allowed_ext_ips || exempted_ext_ips) { - lrouter_nat_add_ext_ip_match(od, lflows, match, nat, - is_v6, true, mask); - } + /* The priority here is calculated such that the + * nat->logical_ip with the longest mask gets a higher + * priority. */ + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT, + cidr_bits + 1, ds_cstr(match), + ds_cstr(actions), &nat->header_); + } else { + uint16_t priority = cidr_bits + 1; - if (!strcmp(nat->type, "dnat_and_snat") && stateless) { - ds_put_format(actions, "ip%s.dst=%s; next;", - is_v6 ? "6" : "4", nat->logical_ip); - } else { - ds_put_format(actions, "ct_dnat(%s", nat->logical_ip); - if (nat->external_port_range[0]) { - ds_put_format(actions, ",%s", - nat->external_port_range); - } - ds_put_format(actions, ");"); - } + /* Distributed router. */ + ds_clear(match); + ds_put_format(match, "ip && ip%s.src == %s && outport == %s", + is_v6 ? "6" : "4", nat->logical_ip, + od->l3dgw_port->json_key); + if (!distributed && od->l3redirect_port) { + /* Flows for NAT rules that are centralized are only + * programmed on the gateway chassis. */ + priority += 128; + ds_put_format(match, " && is_chassis_resident(%s)", + od->l3redirect_port->json_key); + } + ds_clear(actions); - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100, - ds_cstr(match), ds_cstr(actions), - &nat->header_); - } - } + if (nat->allowed_ext_ips || nat->exempted_ext_ips) { + lrouter_nat_add_ext_ip_match(od, lflows, match, nat, + is_v6, false, mask); + } - /* ARP resolve for NAT IPs. */ - if (od->l3dgw_port) { - if (!strcmp(nat->type, "snat")) { - ds_clear(match); - ds_put_format( - match, "inport == %s && %s == %s", - od->l3dgw_port->json_key, - is_v6 ? "ip6.src" : "ip4.src", nat->external_ip); - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT, - 120, ds_cstr(match), "next;", - &nat->header_); - } + if (distributed) { + ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ", + ETH_ADDR_ARGS(mac)); + } - if (!sset_contains(&nat_entries, nat->external_ip)) { - ds_clear(match); - ds_put_format( - match, "outport == %s && %s == %s", - od->l3dgw_port->json_key, - is_v6 ? REG_NEXT_HOP_IPV6 : REG_NEXT_HOP_IPV4, + if (!strcmp(nat->type, "dnat_and_snat") && stateless) { + ds_put_format(actions, "ip%s.src=%s; next;", + is_v6 ? "6" : "4", nat->external_ip); + } else { + ds_put_format(actions, "ct_snat(%s", nat->external_ip); - ds_clear(actions); - ds_put_format( - actions, "eth.dst = %s; next;", - distributed ? nat->external_mac : - od->l3dgw_port->lrp_networks.ea_s); - ovn_lflow_add_with_hint(lflows, od, - S_ROUTER_IN_ARP_RESOLVE, - 100, ds_cstr(match), - ds_cstr(actions), - &nat->header_); - sset_add(&nat_entries, nat->external_ip); - } - } else { - /* Add the NAT external_ip to the nat_entries even for - * gateway routers. This is required for adding load balancer - * flows.*/ - sset_add(&nat_entries, nat->external_ip); + if (nat->external_port_range[0]) { + ds_put_format(actions, ",%s", nat->external_port_range); } + ds_put_format(actions, ");"); + } - /* Egress UNDNAT table: It is for already established connections' - * reverse traffic. i.e., DNAT has already been done in ingress - * pipeline and now the packet has entered the egress pipeline as - * part of a reply. We undo the DNAT here. - * - * Note that this only applies for NAT on a distributed router. - * Undo DNAT on a gateway router is done in the ingress DNAT - * pipeline stage. */ - if (od->l3dgw_port && (!strcmp(nat->type, "dnat") - || !strcmp(nat->type, "dnat_and_snat"))) { - ds_clear(match); - ds_put_format(match, "ip && ip%s.src == %s" - " && outport == %s", - is_v6 ? "6" : "4", - nat->logical_ip, - od->l3dgw_port->json_key); - if (!distributed && od->l3redirect_port) { - /* Flows for NAT rules that are centralized are only - * programmed on the gateway chassis. */ - ds_put_format(match, " && is_chassis_resident(%s)", - od->l3redirect_port->json_key); - } - ds_clear(actions); - if (distributed) { - ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ", - ETH_ADDR_ARGS(mac)); - } + /* The priority here is calculated such that the + * nat->logical_ip with the longest mask gets a higher + * priority. */ + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT, + priority, ds_cstr(match), + ds_cstr(actions), &nat->header_); + } +} - if (!strcmp(nat->type, "dnat_and_snat") && stateless) { - ds_put_format(actions, "ip%s.src=%s; next;", - is_v6 ? "6" : "4", nat->external_ip); - } else { - ds_put_format(actions, "ct_dnat;"); - } +static void +build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od, + const struct nbrec_nat *nat, struct ds *match, + struct ds *actions, struct eth_addr mac, + bool distributed, bool is_v6) +{ + if (od->l3dgw_port && !strcmp(nat->type, "snat")) { + ds_clear(match); + ds_put_format( + match, "inport == %s && %s == %s", + od->l3dgw_port->json_key, + is_v6 ? "ip6.src" : "ip4.src", nat->external_ip); + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT, + 120, ds_cstr(match), "next;", + &nat->header_); + } + /* Logical router ingress table 0: + * For NAT on a distributed router, add rules allowing + * ingress traffic with eth.dst matching nat->external_mac + * on the l3dgw_port instance where nat->logical_port is + * resident. */ + if (distributed) { + /* Store the ethernet address of the port receiving the packet. + * This will save us from having to match on inport further + * down in the pipeline. + */ + ds_clear(actions); + ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;", + od->l3dgw_port->lrp_networks.ea_s); - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 100, - ds_cstr(match), ds_cstr(actions), - &nat->header_); - } + ds_clear(match); + ds_put_format(match, + "eth.dst == "ETH_ADDR_FMT" && inport == %s" + " && is_chassis_resident(\"%s\")", + ETH_ADDR_ARGS(mac), + od->l3dgw_port->json_key, + nat->logical_port); + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ADMISSION, 50, + ds_cstr(match), ds_cstr(actions), + &nat->header_); + } +} - /* Egress SNAT table: Packets enter the egress pipeline with - * source ip address that needs to be SNATted to a external ip - * address. */ - if (!strcmp(nat->type, "snat") - || !strcmp(nat->type, "dnat_and_snat")) { - if (!od->l3dgw_port) { - /* Gateway router. */ - ds_clear(match); - ds_put_format(match, "ip && ip%s.src == %s", - is_v6 ? "6" : "4", - nat->logical_ip); - ds_clear(actions); +static int +lrouter_check_nat_entry(struct ovn_datapath *od, const struct nbrec_nat *nat, + ovs_be32 *mask, bool *is_v6, int *cidr_bits, + struct eth_addr *mac, bool *distributed) +{ + struct in6_addr ipv6, mask_v6, v6_exact = IN6ADDR_EXACT_INIT; + ovs_be32 ip; - if (allowed_ext_ips || exempted_ext_ips) { - lrouter_nat_add_ext_ip_match(od, lflows, match, nat, - is_v6, false, mask); - } + if (nat->allowed_ext_ips && nat->exempted_ext_ips) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_WARN_RL(&rl, "NAT rule: "UUID_FMT" not applied, since " + "both allowed and exempt external ips set", + UUID_ARGS(&(nat->header_.uuid))); + return -EINVAL; + } - if (!strcmp(nat->type, "dnat_and_snat") && stateless) { - ds_put_format(actions, "ip%s.src=%s; next;", - is_v6 ? "6" : "4", nat->external_ip); - } else { - ds_put_format(actions, "ct_snat(%s", - nat->external_ip); + char *error = ip_parse_masked(nat->external_ip, &ip, mask); + *is_v6 = false; - if (nat->external_port_range[0]) { - ds_put_format(actions, ",%s", - nat->external_port_range); - } - ds_put_format(actions, ");"); - } + if (error || *mask != OVS_BE32_MAX) { + free(error); + error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6); + if (error || memcmp(&mask_v6, &v6_exact, sizeof(mask_v6))) { + /* Invalid for both IPv4 and IPv6 */ + static struct vlog_rate_limit rl = + VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "bad external ip %s for nat", + nat->external_ip); + free(error); + return -EINVAL; + } + /* It was an invalid IPv4 address, but valid IPv6. + * Treat the rest of the handling of this NAT rule + * as IPv6. */ + *is_v6 = true; + } - /* The priority here is calculated such that the - * nat->logical_ip with the longest mask gets a higher - * priority. */ - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT, - cidr_bits + 1, - ds_cstr(match), ds_cstr(actions), - &nat->header_); - } else { - uint16_t priority = cidr_bits + 1; + /* Check the validity of nat->logical_ip. 'logical_ip' can + * be a subnet when the type is "snat". */ + if (*is_v6) { + error = ipv6_parse_masked(nat->logical_ip, &ipv6, &mask_v6); + *cidr_bits = ipv6_count_cidr_bits(&mask_v6); + } else { + error = ip_parse_masked(nat->logical_ip, &ip, mask); + *cidr_bits = ip_count_cidr_bits(*mask); + } + if (!strcmp(nat->type, "snat")) { + if (error) { + /* Invalid for both IPv4 and IPv6 */ + static struct vlog_rate_limit rl = + VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat " + "in router "UUID_FMT"", + nat->logical_ip, UUID_ARGS(&od->key)); + free(error); + return -EINVAL; + } + } else { + if (error || (*is_v6 == false && *mask != OVS_BE32_MAX) + || (*is_v6 && memcmp(&mask_v6, &v6_exact, + sizeof mask_v6))) { + /* Invalid for both IPv4 and IPv6 */ + static struct vlog_rate_limit rl = + VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "bad ip %s for dnat in router " + ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key)); + free(error); + return -EINVAL; + } + } - /* Distributed router. */ - ds_clear(match); - ds_put_format(match, "ip && ip%s.src == %s" - " && outport == %s", - is_v6 ? "6" : "4", - nat->logical_ip, - od->l3dgw_port->json_key); - if (!distributed && od->l3redirect_port) { - /* Flows for NAT rules that are centralized are only - * programmed on the gateway chassis. */ - priority += 128; - ds_put_format(match, " && is_chassis_resident(%s)", - od->l3redirect_port->json_key); - } - ds_clear(actions); + /* For distributed router NAT, determine whether this NAT rule + * satisfies the conditions for distributed NAT processing. */ + *distributed = false; + if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") && + nat->logical_port && nat->external_mac) { + if (eth_addr_from_string(nat->external_mac, mac)) { + *distributed = true; + } else { + static struct vlog_rate_limit rl = + VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "bad mac %s for dnat in router " + ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key)); + return -EINVAL; + } + } - if (allowed_ext_ips || exempted_ext_ips) { - lrouter_nat_add_ext_ip_match(od, lflows, match, nat, - is_v6, false, mask); - } + return 0; +} - if (distributed) { - ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ", - ETH_ADDR_ARGS(mac)); - } +/* NAT, Defrag and load balancing. */ +static void +build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, + struct hmap *lflows, + struct shash *meter_groups, + struct hmap *lbs, + struct ds *match, struct ds *actions) +{ + if (!od->nbr) { + return; + } - if (!strcmp(nat->type, "dnat_and_snat") && stateless) { - ds_put_format(actions, "ip%s.src=%s; next;", - is_v6 ? "6" : "4", nat->external_ip); - } else { - ds_put_format(actions, "ct_snat(%s", - nat->external_ip); - if (nat->external_port_range[0]) { - ds_put_format(actions, ",%s", - nat->external_port_range); - } - ds_put_format(actions, ");"); - } + /* Packets are allowed by default. */ + ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;"); + ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;"); + ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;"); + ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;"); + ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;"); + ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;"); + ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;"); + + /* Send the IPv6 NS packets to next table. When ovn-controller + * generates IPv6 NS (for the action - nd_ns{}), the injected + * packet would go through conntrack - which is not required. */ + ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, "nd_ns", "next;"); + + /* NAT rules are only valid on Gateway routers and routers with + * l3dgw_port (router has a port with gateway chassis + * specified). */ + if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { + return; + } - /* The priority here is calculated such that the - * nat->logical_ip with the longest mask gets a higher - * priority. */ - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT, - priority, ds_cstr(match), - ds_cstr(actions), - &nat->header_); - } - } + struct sset nat_entries = SSET_INITIALIZER(&nat_entries); - /* Logical router ingress table 0: - * For NAT on a distributed router, add rules allowing - * ingress traffic with eth.dst matching nat->external_mac - * on the l3dgw_port instance where nat->logical_port is - * resident. */ - if (distributed) { - /* Store the ethernet address of the port receiving the packet. - * This will save us from having to match on inport further - * down in the pipeline. - */ - ds_clear(actions); - ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;", - od->l3dgw_port->lrp_networks.ea_s); + bool dnat_force_snat_ip = + !lport_addresses_is_empty(&od->dnat_force_snat_addrs); + bool lb_force_snat_ip = + !lport_addresses_is_empty(&od->lb_force_snat_addrs); - ds_clear(match); - ds_put_format(match, - "eth.dst == "ETH_ADDR_FMT" && inport == %s" - " && is_chassis_resident(\"%s\")", - ETH_ADDR_ARGS(mac), - od->l3dgw_port->json_key, - nat->logical_port); - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ADMISSION, 50, - ds_cstr(match), ds_cstr(actions), - &nat->header_); - } + for (int i = 0; i < od->nbr->n_nat; i++) { + const struct nbrec_nat *nat = nat = od->nbr->nat[i]; + struct eth_addr mac = eth_addr_broadcast; + bool is_v6, distributed; + ovs_be32 mask; + int cidr_bits; - /* Ingress Gateway Redirect Table: For NAT on a distributed - * router, add flows that are specific to a NAT rule. These - * flows indicate the presence of an applicable NAT rule that - * can be applied in a distributed manner. - * In particulr REG_SRC_IPV4/REG_SRC_IPV6 and eth.src are set to - * NAT external IP and NAT external mac so the ARP request - * generated in the following stage is sent out with proper IP/MAC - * src addresses. - */ - if (distributed) { - ds_clear(match); - ds_clear(actions); - ds_put_format(match, - "ip%s.src == %s && outport == %s && " - "is_chassis_resident(\"%s\")", - is_v6 ? "6" : "4", nat->logical_ip, - od->l3dgw_port->json_key, nat->logical_port); - ds_put_format(actions, "eth.src = %s; %s = %s; next;", - nat->external_mac, - is_v6 ? REG_SRC_IPV6 : REG_SRC_IPV4, - nat->external_ip); - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, - 100, ds_cstr(match), - ds_cstr(actions), &nat->header_); - } + if (lrouter_check_nat_entry(od, nat, &mask, &is_v6, &cidr_bits, + &mac, &distributed) < 0) { + continue; + } - /* Egress Loopback table: For NAT on a distributed router. - * If packets in the egress pipeline on the distributed - * gateway port have ip.dst matching a NAT external IP, then - * loop a clone of the packet back to the beginning of the - * ingress pipeline with inport = outport. */ - if (od->l3dgw_port) { - /* Distributed router. */ - ds_clear(match); - ds_put_format(match, "ip%s.dst == %s && outport == %s", - is_v6 ? "6" : "4", - nat->external_ip, - od->l3dgw_port->json_key); - if (!distributed) { - ds_put_format(match, " && is_chassis_resident(%s)", - od->l3redirect_port->json_key); - } else { - ds_put_format(match, " && is_chassis_resident(\"%s\")", - nat->logical_port); - } + /* S_ROUTER_IN_UNSNAT */ + build_lrouter_in_unsnat_flow(lflows, od, nat, match, actions, distributed, + is_v6); + /* S_ROUTER_IN_DNAT */ + build_lrouter_in_dnat_flow(lflows, od, nat, match, actions, distributed, + mask, is_v6); + /* ARP resolve for NAT IPs. */ + if (od->l3dgw_port) { + if (!sset_contains(&nat_entries, nat->external_ip)) { + ds_clear(match); + ds_put_format( + match, "outport == %s && %s == %s", + od->l3dgw_port->json_key, + is_v6 ? REG_NEXT_HOP_IPV6 : REG_NEXT_HOP_IPV4, + nat->external_ip); ds_clear(actions); - ds_put_format(actions, - "clone { ct_clear; " - "inport = outport; outport = \"\"; " - "flags = 0; flags.loopback = 1; "); - for (int j = 0; j < MFF_N_LOG_REGS; j++) { - ds_put_format(actions, "reg%d = 0; ", j); - } - ds_put_format(actions, REGBIT_EGRESS_LOOPBACK" = 1; " - "next(pipeline=ingress, table=%d); };", - ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100, - ds_cstr(match), ds_cstr(actions), + ds_put_format( + actions, "eth.dst = %s; next;", + distributed ? nat->external_mac : + od->l3dgw_port->lrp_networks.ea_s); + ovn_lflow_add_with_hint(lflows, od, + S_ROUTER_IN_ARP_RESOLVE, + 100, ds_cstr(match), + ds_cstr(actions), &nat->header_); + sset_add(&nat_entries, nat->external_ip); } - } - - /* Handle force SNAT options set in the gateway router. */ - if (!od->l3dgw_port) { - if (dnat_force_snat_ip) { - if (od->dnat_force_snat_addrs.n_ipv4_addrs) { - build_lrouter_force_snat_flows(lflows, od, "4", - od->dnat_force_snat_addrs.ipv4_addrs[0].addr_s, - "dnat"); - } - if (od->dnat_force_snat_addrs.n_ipv6_addrs) { - build_lrouter_force_snat_flows(lflows, od, "6", - od->dnat_force_snat_addrs.ipv6_addrs[0].addr_s, - "dnat"); - } - } - if (lb_force_snat_ip) { - if (od->lb_force_snat_addrs.n_ipv4_addrs) { - build_lrouter_force_snat_flows(lflows, od, "4", - od->lb_force_snat_addrs.ipv4_addrs[0].addr_s, "lb"); - } - if (od->lb_force_snat_addrs.n_ipv6_addrs) { - build_lrouter_force_snat_flows(lflows, od, "6", - od->lb_force_snat_addrs.ipv6_addrs[0].addr_s, "lb"); - } + } else { + /* Add the NAT external_ip to the nat_entries even for + * gateway routers. This is required for adding load balancer + * flows.*/ + sset_add(&nat_entries, nat->external_ip); + } + + /* S_ROUTER_OUT_UNDNAT */ + build_lrouter_out_undnat_flow(lflows, od, nat, match, actions, distributed, + mac, is_v6); + /* S_ROUTER_OUT_SNAT */ + build_lrouter_out_snat_flow(lflows, od, nat, match, actions, distributed, + mac, mask, cidr_bits, is_v6); + + /* S_ROUTER_IN_ADMISSION - S_ROUTER_IN_IP_INPUT */ + build_lrouter_ingress_flow(lflows, od, nat, match, actions, + mac, distributed, is_v6); + + /* Ingress Gateway Redirect Table: For NAT on a distributed + * router, add flows that are specific to a NAT rule. These + * flows indicate the presence of an applicable NAT rule that + * can be applied in a distributed manner. + * In particulr REG_SRC_IPV4/REG_SRC_IPV6 and eth.src are set to + * NAT external IP and NAT external mac so the ARP request + * generated in the following stage is sent out with proper IP/MAC + * src addresses. + */ + if (distributed) { + ds_clear(match); + ds_clear(actions); + ds_put_format(match, + "ip%s.src == %s && outport == %s && " + "is_chassis_resident(\"%s\")", + is_v6 ? "6" : "4", nat->logical_ip, + od->l3dgw_port->json_key, nat->logical_port); + ds_put_format(actions, "eth.src = %s; %s = %s; next;", + nat->external_mac, + is_v6 ? REG_SRC_IPV6 : REG_SRC_IPV4, + nat->external_ip); + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, + 100, ds_cstr(match), + ds_cstr(actions), &nat->header_); + } + + /* Egress Loopback table: For NAT on a distributed router. + * If packets in the egress pipeline on the distributed + * gateway port have ip.dst matching a NAT external IP, then + * loop a clone of the packet back to the beginning of the + * ingress pipeline with inport = outport. */ + if (od->l3dgw_port) { + /* Distributed router. */ + ds_clear(match); + ds_put_format(match, "ip%s.dst == %s && outport == %s", + is_v6 ? "6" : "4", + nat->external_ip, + od->l3dgw_port->json_key); + if (!distributed) { + ds_put_format(match, " && is_chassis_resident(%s)", + od->l3redirect_port->json_key); + } else { + ds_put_format(match, " && is_chassis_resident(\"%s\")", + nat->logical_port); } - /* For gateway router, re-circulate every packet through - * the DNAT zone. This helps with the following. - * - * Any packet that needs to be unDNATed in the reverse - * direction gets unDNATed. Ideally this could be done in - * the egress pipeline. But since the gateway router - * does not have any feature that depends on the source - * ip address being external IP address for IP routing, - * we can do it here, saving a future re-circulation. */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50, - "ip", "flags.loopback = 1; ct_dnat;"); + ds_clear(actions); + ds_put_format(actions, + "clone { ct_clear; " + "inport = outport; outport = \"\"; " + "flags = 0; flags.loopback = 1; "); + for (int j = 0; j < MFF_N_LOG_REGS; j++) { + ds_put_format(actions, "reg%d = 0; ", j); + } + ds_put_format(actions, REGBIT_EGRESS_LOOPBACK" = 1; " + "next(pipeline=ingress, table=%d); };", + ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100, + ds_cstr(match), ds_cstr(actions), + &nat->header_); } + } - /* Load balancing and packet defrag are only valid on - * Gateway routers or router with gateway port. */ - if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { - sset_destroy(&nat_entries); - return; + /* Handle force SNAT options set in the gateway router. */ + if (!od->l3dgw_port) { + if (dnat_force_snat_ip) { + if (od->dnat_force_snat_addrs.n_ipv4_addrs) { + build_lrouter_force_snat_flows(lflows, od, "4", + od->dnat_force_snat_addrs.ipv4_addrs[0].addr_s, + "dnat"); + } + if (od->dnat_force_snat_addrs.n_ipv6_addrs) { + build_lrouter_force_snat_flows(lflows, od, "6", + od->dnat_force_snat_addrs.ipv6_addrs[0].addr_s, + "dnat"); + } } - - /* A set to hold all ips that need defragmentation and tracking. */ - struct sset all_ips = SSET_INITIALIZER(&all_ips); - - for (int i = 0; i < od->nbr->n_load_balancer; i++) { - struct nbrec_load_balancer *nb_lb = od->nbr->load_balancer[i]; - struct ovn_northd_lb *lb = - ovn_northd_lb_find(lbs, &nb_lb->header_.uuid); - ovs_assert(lb); - - for (size_t j = 0; j < lb->n_vips; j++) { - struct ovn_lb_vip *lb_vip = &lb->vips[j]; - struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[j]; - ds_clear(actions); - build_lb_vip_actions(lb_vip, lb_vip_nb, actions, - lb->selection_fields, false); - - if (!sset_contains(&all_ips, lb_vip->vip_str)) { - sset_add(&all_ips, lb_vip->vip_str); - /* If there are any load balancing rules, we should send - * the packet to conntrack for defragmentation and - * tracking. This helps with two things. - * - * 1. With tracking, we can send only new connections to - * pick a DNAT ip address from a group. - * 2. If there are L4 ports in load balancing rules, we - * need the defragmentation to match on L4 ports. */ - ds_clear(match); - if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { - ds_put_format(match, "ip && ip4.dst == %s", - lb_vip->vip_str); - } else { - ds_put_format(match, "ip && ip6.dst == %s", - lb_vip->vip_str); - } - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, - 100, ds_cstr(match), "ct_next;", - &nb_lb->header_); - } - - /* Higher priority rules are added for load-balancing in DNAT - * table. For every match (on a VIP[:port]), we add two flows - * via add_router_lb_flow(). One flow is for specific matching - * on ct.new with an action of "ct_lb($targets);". The other - * flow is for ct.est with an action of "ct_dnat;". */ - ds_clear(match); - if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { - ds_put_format(match, "ip && ip4.dst == %s", - lb_vip->vip_str); - } else { - ds_put_format(match, "ip && ip6.dst == %s", - lb_vip->vip_str); - } - - int prio = 110; - bool is_udp = nullable_string_is_equal(nb_lb->protocol, "udp"); - bool is_sctp = nullable_string_is_equal(nb_lb->protocol, - "sctp"); - const char *proto = is_udp ? "udp" : is_sctp ? "sctp" : "tcp"; - - if (lb_vip->vip_port) { - ds_put_format(match, " && %s && %s.dst == %d", proto, - proto, lb_vip->vip_port); - prio = 120; - } - - if (od->l3redirect_port && - (lb_vip->n_backends || !lb_vip->empty_backend_rej)) { - ds_put_format(match, " && is_chassis_resident(%s)", - od->l3redirect_port->json_key); - } - bool force_snat_for_lb = - lb_force_snat_ip || od->lb_force_snat_router_ip; - add_router_lb_flow(lflows, od, match, actions, prio, - force_snat_for_lb, lb_vip, proto, - nb_lb, meter_groups, &nat_entries); + if (lb_force_snat_ip) { + if (od->lb_force_snat_addrs.n_ipv4_addrs) { + build_lrouter_force_snat_flows(lflows, od, "4", + od->lb_force_snat_addrs.ipv4_addrs[0].addr_s, "lb"); + } + if (od->lb_force_snat_addrs.n_ipv6_addrs) { + build_lrouter_force_snat_flows(lflows, od, "6", + od->lb_force_snat_addrs.ipv6_addrs[0].addr_s, "lb"); } } - sset_destroy(&all_ips); + + /* For gateway router, re-circulate every packet through + * the DNAT zone. This helps with the following. + * + * Any packet that needs to be unDNATed in the reverse + * direction gets unDNATed. Ideally this could be done in + * the egress pipeline. But since the gateway router + * does not have any feature that depends on the source + * ip address being external IP address for IP routing, + * we can do it here, saving a future re-circulation. */ + ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50, + "ip", "flags.loopback = 1; ct_dnat;"); + } + + /* Load balancing and packet defrag are only valid on + * Gateway routers or router with gateway port. */ + if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { sset_destroy(&nat_entries); + return; } + + build_lrouter_lb_flows(lflows, od, lbs, meter_groups, &nat_entries, + match, actions); + + sset_destroy(&nat_entries); } diff --git a/ovn-nb.xml b/ovn-nb.xml index b0a4adffe..408c98090 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -1653,6 +1653,12 @@ exactly one IPv4 and/or one IPv6 address on it, separated by a space character. + + + If the load balancing rule is configured with skip_snat + option, the force_snat_for_lb option configured for the router + pipeline will not be applied for this load balancer. + diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at index 2cd3e261f..5c64fff12 100644 --- a/tests/ovn-controller.at +++ b/tests/ovn-controller.at @@ -431,3 +431,83 @@ OVS_WAIT_UNTIL([ OVN_CLEANUP([hv1]) AT_CLEANUP + +# Test that changes of a port binding from one type to another doesn'that +# result in any ovn-controller asserts or crashes. +AT_SETUP([ovn-controller - port binding type change handling]) +AT_KEYWORDS([ovn]) +ovn_start + +net_add n1 +sim_add hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 + +check ovn-nbctl ls-add ls1 -- lsp-add ls1 lsp1 + +as hv1 +check ovs-vsctl \ + -- add-port br-int vif1 \ + -- set Interface vif1 external_ids:iface-id=lsp1 + +# ovn-controller should bind the interface. +wait_for_ports_up +hv_uuid=$(fetch_column Chassis _uuid name=hv1) +check_column "$hv_uuid" Port_Binding chassis logical_port=lsp1 + +AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[lsp1]], OVS interface name : [[vif1]], num binding lports : [[1]] +primary lport : [[lsp1]] +---------------------------------------- +]) + +# pause ovn-northd +check as northd ovn-appctl -t ovn-northd pause +check as northd-backup ovn-appctl -t ovn-northd pause + +as northd ovn-appctl -t ovn-northd status +as northd-backup ovn-appctl -t ovn-northd status + +pb_types=(patch chassisredirect l3gateway localnet localport l2gateway + virtual external remote vtep) +for type in ${pb_types[[@]]} +do + for update_type in ${pb_types[[@]]} + do + check ovn-sbctl set port_binding lsp1 type=$type + check as hv1 ovs-vsctl set open . external_ids:ovn-cms-options=$type + OVS_WAIT_UNTIL([test $type = $(ovn-sbctl get chassis . other_config:ovn-cms-options)]) + + AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[lsp1]], OVS interface name : [[vif1]], num binding lports : [[0]] +---------------------------------------- +]) + + echo "Updating to $update_type from $type" + check ovn-sbctl set port_binding lsp1 type=$update_type + check as hv1 ovs-vsctl set open . external_ids:ovn-cms-options=$update_type + OVS_WAIT_UNTIL([test $update_type = $(ovn-sbctl get chassis . other_config:ovn-cms-options)]) + + AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[lsp1]], OVS interface name : [[vif1]], num binding lports : [[0]] +---------------------------------------- +]) + # Set the port binding type back to VIF. + check ovn-sbctl set port_binding lsp1 type=\"\" + check as hv1 ovs-vsctl set open . external_ids:ovn-cms-options=foo + OVS_WAIT_UNTIL([test foo = $(ovn-sbctl get chassis . other_config:ovn-cms-options)]) + + AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[lsp1]], OVS interface name : [[vif1]], num binding lports : [[1]] +primary lport : [[lsp1]] +---------------------------------------- +]) + done +done + +OVN_CLEANUP([hv1]) +AT_CLEANUP diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index b78baa708..6d5dce668 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -2551,7 +2551,7 @@ wait_row_count nb:Logical_Switch_Port 1 up=false name=lsp1 AT_CLEANUP -AT_SETUP([ovn -- lb_force_snat_ip for Gateway Routers]) +AT_SETUP([ovn -- Load Balancers and lb_force_snat_ip for Gateway Routers]) ovn_start check ovn-nbctl ls-add sw0 @@ -2589,11 +2589,11 @@ AT_CHECK([grep "lr_in_unsnat" lr0flows | sort], [0], [dnl table=5 (lr_in_unsnat ), priority=0 , match=(1), action=(next;) ]) -AT_CHECK([grep "lr_in_dnat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl -]) - - -AT_CHECK([grep "lr_out_snat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl +AT_CHECK([grep "lr_in_dnat" lr0flows | sort], [0], [dnl + table=6 (lr_in_dnat ), priority=0 , match=(1), action=(next;) + table=6 (lr_in_dnat ), priority=120 , match=(ct.est && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80), action=(ct_dnat;) + table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80), action=(ct_lb(backends=10.0.0.4:8080);) + table=6 (lr_in_dnat ), priority=50 , match=(ip), action=(flags.loopback = 1; ct_dnat;) ]) check ovn-nbctl --wait=sb set logical_router lr0 options:lb_force_snat_ip="20.0.0.4 aef0::4" @@ -2608,14 +2608,18 @@ AT_CHECK([grep "lr_in_unsnat" lr0flows | sort], [0], [dnl table=5 (lr_in_unsnat ), priority=110 , match=(ip6 && ip6.dst == aef0::4), action=(ct_snat;) ]) -AT_CHECK([grep "lr_in_dnat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl +AT_CHECK([grep "lr_in_dnat" lr0flows | sort], [0], [dnl + table=6 (lr_in_dnat ), priority=0 , match=(1), action=(next;) table=6 (lr_in_dnat ), priority=120 , match=(ct.est && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80), action=(flags.force_snat_for_lb = 1; ct_dnat;) table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80), action=(flags.force_snat_for_lb = 1; ct_lb(backends=10.0.0.4:8080);) + table=6 (lr_in_dnat ), priority=50 , match=(ip), action=(flags.loopback = 1; ct_dnat;) ]) -AT_CHECK([grep "lr_out_snat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl +AT_CHECK([grep "lr_out_snat" lr0flows | sort], [0], [dnl + table=1 (lr_out_snat ), priority=0 , match=(1), action=(next;) table=1 (lr_out_snat ), priority=100 , match=(flags.force_snat_for_lb == 1 && ip4), action=(ct_snat(20.0.0.4);) table=1 (lr_out_snat ), priority=100 , match=(flags.force_snat_for_lb == 1 && ip6), action=(ct_snat(aef0::4);) + table=1 (lr_out_snat ), priority=120 , match=(nd_ns), action=(next;) ]) check ovn-nbctl --wait=sb set logical_router lr0 options:lb_force_snat_ip="router_ip" @@ -2633,15 +2637,19 @@ AT_CHECK([grep "lr_in_unsnat" lr0flows | sort], [0], [dnl table=5 (lr_in_unsnat ), priority=110 , match=(inport == "lr0-sw1" && ip4.dst == 20.0.0.1), action=(ct_snat;) ]) -AT_CHECK([grep "lr_in_dnat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl +AT_CHECK([grep "lr_in_dnat" lr0flows | sort], [0], [dnl + table=6 (lr_in_dnat ), priority=0 , match=(1), action=(next;) table=6 (lr_in_dnat ), priority=120 , match=(ct.est && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80), action=(flags.force_snat_for_lb = 1; ct_dnat;) table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80), action=(flags.force_snat_for_lb = 1; ct_lb(backends=10.0.0.4:8080);) + table=6 (lr_in_dnat ), priority=50 , match=(ip), action=(flags.loopback = 1; ct_dnat;) ]) -AT_CHECK([grep "lr_out_snat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl +AT_CHECK([grep "lr_out_snat" lr0flows | sort], [0], [dnl + table=1 (lr_out_snat ), priority=0 , match=(1), action=(next;) table=1 (lr_out_snat ), priority=110 , match=(flags.force_snat_for_lb == 1 && ip4 && outport == "lr0-public"), action=(ct_snat(172.168.0.100);) table=1 (lr_out_snat ), priority=110 , match=(flags.force_snat_for_lb == 1 && ip4 && outport == "lr0-sw0"), action=(ct_snat(10.0.0.1);) table=1 (lr_out_snat ), priority=110 , match=(flags.force_snat_for_lb == 1 && ip4 && outport == "lr0-sw1"), action=(ct_snat(20.0.0.1);) + table=1 (lr_out_snat ), priority=120 , match=(nd_ns), action=(next;) ]) check ovn-nbctl --wait=sb remove logical_router lr0 options chassis @@ -2653,7 +2661,9 @@ AT_CHECK([grep "lr_in_unsnat" lr0flows | sort], [0], [dnl table=5 (lr_in_unsnat ), priority=0 , match=(1), action=(next;) ]) -AT_CHECK([grep "lr_out_snat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl +AT_CHECK([grep "lr_out_snat" lr0flows | sort], [0], [dnl + table=1 (lr_out_snat ), priority=0 , match=(1), action=(next;) + table=1 (lr_out_snat ), priority=120 , match=(nd_ns), action=(next;) ]) check ovn-nbctl set logical_router lr0 options:chassis=ch1 @@ -2670,16 +2680,43 @@ AT_CHECK([grep "lr_in_unsnat" lr0flows | sort], [0], [dnl table=5 (lr_in_unsnat ), priority=110 , match=(inport == "lr0-sw1" && ip6.dst == bef0::1), action=(ct_snat;) ]) -AT_CHECK([grep "lr_in_dnat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl +AT_CHECK([grep "lr_in_dnat" lr0flows | sort], [0], [dnl + table=6 (lr_in_dnat ), priority=0 , match=(1), action=(next;) table=6 (lr_in_dnat ), priority=120 , match=(ct.est && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80), action=(flags.force_snat_for_lb = 1; ct_dnat;) table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80), action=(flags.force_snat_for_lb = 1; ct_lb(backends=10.0.0.4:8080);) + table=6 (lr_in_dnat ), priority=50 , match=(ip), action=(flags.loopback = 1; ct_dnat;) ]) -AT_CHECK([grep "lr_out_snat" lr0flows | grep force_snat_for_lb | sort], [0], [dnl +AT_CHECK([grep "lr_out_snat" lr0flows | sort], [0], [dnl + table=1 (lr_out_snat ), priority=0 , match=(1), action=(next;) table=1 (lr_out_snat ), priority=110 , match=(flags.force_snat_for_lb == 1 && ip4 && outport == "lr0-public"), action=(ct_snat(172.168.0.100);) table=1 (lr_out_snat ), priority=110 , match=(flags.force_snat_for_lb == 1 && ip4 && outport == "lr0-sw0"), action=(ct_snat(10.0.0.1);) table=1 (lr_out_snat ), priority=110 , match=(flags.force_snat_for_lb == 1 && ip4 && outport == "lr0-sw1"), action=(ct_snat(20.0.0.1);) table=1 (lr_out_snat ), priority=110 , match=(flags.force_snat_for_lb == 1 && ip6 && outport == "lr0-sw1"), action=(ct_snat(bef0::1);) + table=1 (lr_out_snat ), priority=120 , match=(nd_ns), action=(next;) +]) + +check ovn-nbctl --wait=sb lb-add lb2 10.0.0.20:80 10.0.0.40:8080 +check ovn-nbctl --wait=sb set load_balancer lb2 options:skip_snat=true +check ovn-nbctl lr-lb-add lr0 lb2 +check ovn-nbctl --wait=sb lb-del lb1 +ovn-sbctl dump-flows lr0 > lr0flows + +AT_CHECK([grep "lr_in_unsnat" lr0flows | sort], [0], [dnl + table=5 (lr_in_unsnat ), priority=0 , match=(1), action=(next;) + table=5 (lr_in_unsnat ), priority=110 , match=(inport == "lr0-public" && ip4.dst == 172.168.0.100), action=(ct_snat;) + table=5 (lr_in_unsnat ), priority=110 , match=(inport == "lr0-sw0" && ip4.dst == 10.0.0.1), action=(ct_snat;) + table=5 (lr_in_unsnat ), priority=110 , match=(inport == "lr0-sw1" && ip4.dst == 20.0.0.1), action=(ct_snat;) + table=5 (lr_in_unsnat ), priority=110 , match=(inport == "lr0-sw1" && ip6.dst == bef0::1), action=(ct_snat;) +]) + +AT_CHECK([grep "lr_in_dnat" lr0flows | grep skip_snat_for_lb | sort], [0], [dnl + table=6 (lr_in_dnat ), priority=120 , match=(ct.est && ip && ip4.dst == 10.0.0.20 && tcp && tcp.dst == 80), action=(flags.skip_snat_for_lb = 1; ct_dnat;) + table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 10.0.0.20 && tcp && tcp.dst == 80), action=(flags.skip_snat_for_lb = 1; ct_lb(backends=10.0.0.40:8080);) +]) + +AT_CHECK([grep "lr_out_snat" lr0flows | grep skip_snat_for_lb | sort], [0], [dnl + table=1 (lr_out_snat ), priority=120 , match=(flags.skip_snat_for_lb == 1 && ip), action=(next;) ]) AT_CLEANUP diff --git a/tests/ovn.at b/tests/ovn.at index b465784cd..dbc6e549b 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -11494,6 +11494,59 @@ OVN_CLEANUP([hv1],[hv2]) AT_CLEANUP +AT_SETUP([ovn -- localport suppress gARP]) +ovn_start + +net_add n1 +sim_add hv1 +as hv1 +check ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 + +check ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys + +check ovn-nbctl ls-add ls \ + -- lsp-add ls lp \ + -- lsp-set-type lp localport \ + -- lsp-set-addresses lp "00:00:00:00:00:01 10.0.0.1" \ + -- lsp-add ls ln \ + -- lsp-set-type ln localnet \ + -- lsp-set-options ln network_name=phys \ + -- lsp-add ls lsp \ + -- lsp-set-addresses lsp "00:00:00:00:00:02 10.0.0.2" + +dnl First bind the localport. +check ovs-vsctl add-port br-int vif1 \ + -- set Interface vif1 external-ids:iface-id=lp +check ovn-nbctl --wait=hv sync + +dnl Then bind the regular vif. +check ovs-vsctl add-port br-int vif2 \ + -- set Interface vif2 external-ids:iface-id=lsp \ + options:tx_pcap=hv1/vif2-tx.pcap \ + options:rxq_pcap=hv1/vif2-rx.pcap + +wait_for_ports_up lsp +check ovn-nbctl --wait=hv sync + +dnl Wait for at least two gARPs from lsp (10.0.0.2). +lsp_garp=ffffffffffff000000000002080600010800060400010000000000020a0000020000000000000a000002 +OVS_WAIT_UNTIL([ + garps=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/br-phys-tx.pcap | grep ${lsp_garp} -c` + test $garps -ge 2 +]) + +dnl At this point it's safe to assume that ovn-controller skipped sending gARP +dnl for the localport. Check that there are no other packets than the gARPs +dnl for the regular vif. +AT_CHECK([ + pkts=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/br-phys-tx.pcap | grep -v ${lsp_garp} -c` + test 0 -eq $pkts +]) + +OVN_CLEANUP([hv1]) +AT_CLEANUP + AT_SETUP([ovn -- 1 LR with HA distributed router gateway port]) ovn_start @@ -16647,56 +16700,67 @@ ovs-vsctl -- add-port br-int hv2-vif2 -- \ ovn-nbctl ls-add sw0 -ovn-nbctl lsp-add sw0 sw0-vir -ovn-nbctl lsp-set-addresses sw0-vir "50:54:00:00:00:10 10.0.0.10" -ovn-nbctl lsp-set-port-security sw0-vir "50:54:00:00:00:10 10.0.0.10" -ovn-nbctl lsp-set-type sw0-vir virtual -ovn-nbctl set logical_switch_port sw0-vir options:virtual-ip=10.0.0.10 -ovn-nbctl set logical_switch_port sw0-vir options:virtual-parents=sw0-p1,sw0-p2,sw0-p3 +check ovn-nbctl lsp-add sw0 sw0-vir +check ovn-nbctl lsp-set-addresses sw0-vir "50:54:00:00:00:10 10.0.0.10" +check ovn-nbctl lsp-set-port-security sw0-vir "50:54:00:00:00:10 10.0.0.10" +check ovn-nbctl lsp-set-type sw0-vir virtual +check ovn-nbctl set logical_switch_port sw0-vir options:virtual-ip=10.0.0.10 +check ovn-nbctl set logical_switch_port sw0-vir options:virtual-parents=sw0-p1,sw0-p2,sw0-p3 -ovn-nbctl lsp-add sw0 sw0-p1 -ovn-nbctl lsp-set-addresses sw0-p1 "50:54:00:00:00:03 10.0.0.3" -ovn-nbctl lsp-set-port-security sw0-p1 "50:54:00:00:00:03 10.0.0.3 10.0.0.10" +check ovn-nbctl lsp-add sw0 sw0-p1 +check ovn-nbctl lsp-set-addresses sw0-p1 "50:54:00:00:00:03 10.0.0.3" +check ovn-nbctl lsp-set-port-security sw0-p1 "50:54:00:00:00:03 10.0.0.3 10.0.0.10" -ovn-nbctl lsp-add sw0 sw0-p2 -ovn-nbctl lsp-set-addresses sw0-p2 "50:54:00:00:00:04 10.0.0.4" -ovn-nbctl lsp-set-port-security sw0-p2 "50:54:00:00:00:04 10.0.0.4 10.0.0.10" +check ovn-nbctl lsp-add sw0 sw0-p2 +check ovn-nbctl lsp-set-addresses sw0-p2 "50:54:00:00:00:04 10.0.0.4" +check ovn-nbctl lsp-set-port-security sw0-p2 "50:54:00:00:00:04 10.0.0.4 10.0.0.10" -ovn-nbctl lsp-add sw0 sw0-p3 -ovn-nbctl lsp-set-addresses sw0-p3 "50:54:00:00:00:05 10.0.0.5" -ovn-nbctl lsp-set-port-security sw0-p3 "50:54:00:00:00:05 10.0.0.5 10.0.0.10" +check ovn-nbctl lsp-add sw0 sw0-p3 +check ovn-nbctl lsp-set-addresses sw0-p3 "50:54:00:00:00:05 10.0.0.5" +check ovn-nbctl lsp-set-port-security sw0-p3 "50:54:00:00:00:05 10.0.0.5 10.0.0.10" # Create the second logical switch with one port -ovn-nbctl ls-add sw1 -ovn-nbctl lsp-add sw1 sw1-p1 -ovn-nbctl lsp-set-addresses sw1-p1 "40:54:00:00:00:03 20.0.0.3" -ovn-nbctl lsp-set-port-security sw1-p1 "40:54:00:00:00:03 20.0.0.3" +check ovn-nbctl ls-add sw1 +check ovn-nbctl lsp-add sw1 sw1-p1 +check ovn-nbctl lsp-set-addresses sw1-p1 "40:54:00:00:00:03 20.0.0.3" +check ovn-nbctl lsp-set-port-security sw1-p1 "40:54:00:00:00:03 20.0.0.3" # Create a logical router and attach both logical switches -ovn-nbctl lr-add lr0 -ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 -ovn-nbctl lsp-add sw0 sw0-lr0 -ovn-nbctl lsp-set-type sw0-lr0 router -ovn-nbctl lsp-set-addresses sw0-lr0 00:00:00:00:ff:01 -ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 +check ovn-nbctl lr-add lr0 +check ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 +check ovn-nbctl lsp-add sw0 sw0-lr0 +check ovn-nbctl lsp-set-type sw0-lr0 router +check ovn-nbctl lsp-set-addresses sw0-lr0 00:00:00:00:ff:01 +check ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 -ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 -ovn-nbctl lsp-add sw1 sw1-lr0 -ovn-nbctl lsp-set-type sw1-lr0 router -ovn-nbctl lsp-set-addresses sw1-lr0 00:00:00:00:ff:02 -ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1 +check ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 +check ovn-nbctl lsp-add sw1 sw1-lr0 +check ovn-nbctl lsp-set-type sw1-lr0 router +check ovn-nbctl lsp-set-addresses sw1-lr0 00:00:00:00:ff:02 +check ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1 -OVN_POPULATE_ARP +# Add an ACL that matches on sw0-vir being bound locally. +check ovn-nbctl acl-add sw0 to-lport 1000 'is_chassis_resident("sw0-vir") && ip' allow -# Delete sw0-vir and add again. -ovn-nbctl lsp-del sw0-vir +check ovn-nbctl ls-add public +check ovn-nbctl lrp-add lr0 lr0-public 00:00:20:20:12:13 172.168.0.100/24 +check ovn-nbctl lsp-add public public-lr0 +check ovn-nbctl lsp-set-type public-lr0 router +check ovn-nbctl lsp-set-addresses public-lr0 router +check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public -ovn-nbctl lsp-add sw0 sw0-vir -ovn-nbctl lsp-set-addresses sw0-vir "50:54:00:00:00:10 10.0.0.10" -ovn-nbctl lsp-set-port-security sw0-vir "50:54:00:00:00:10 10.0.0.10" -ovn-nbctl lsp-set-type sw0-vir virtual -ovn-nbctl set logical_switch_port sw0-vir options:virtual-ip=10.0.0.10 -ovn-nbctl set logical_switch_port sw0-vir options:virtual-parents=sw0-p1,sw0-p2,sw0-p3 +# localnet port +check ovn-nbctl lsp-add public ln-public +check ovn-nbctl lsp-set-type ln-public localnet +check ovn-nbctl lsp-set-addresses ln-public unknown +check ovn-nbctl lsp-set-options ln-public network_name=public + +# schedule the gw router port to a chassis. Change the name of the chassis +check ovn-nbctl --wait=hv lrp-set-gateway-chassis lr0-public hv1 20 + +check ovn-nbctl lr-nat-add lr0 dnat_and_snat 172.168.0.50 10.0.0.10 sw0-vir 10:54:00:00:00:10 + +OVN_POPULATE_ARP wait_for_ports_up ovn-nbctl --wait=hv sync @@ -16746,6 +16810,30 @@ ovs-vsctl del-port hv1-vif3 AT_CHECK([test x$(ovn-sbctl --bare --columns chassis find port_binding \ logical_port=sw0-vir) = x], [0], []) +check_virtual_offlows_present() { + hv=$1 + + AT_CHECK([as $hv ovs-ofctl dump-flows br-int table=45 | ofctl_strip_all | grep "priority=2000"], [0], [dnl + table=45, priority=2000,ip,metadata=0x1 actions=resubmit(,46) + table=45, priority=2000,ipv6,metadata=0x1 actions=resubmit(,46) +]) + + AT_CHECK([as $hv ovs-ofctl dump-flows br-int table=11 | ofctl_strip_all | \ + grep "priority=92" | grep 172.168.0.50], [0], [dnl + table=11, priority=92,arp,reg14=0x3,metadata=0x3,arp_tpa=172.168.0.50,arp_op=1 actions=move:NXM_OF_ETH_SRC[[]]->NXM_OF_ETH_DST[[]],mod_dl_src:10:54:00:00:00:10,load:0x2->NXM_OF_ARP_OP[[]],move:NXM_NX_ARP_SHA[[]]->NXM_NX_ARP_THA[[]],load:0x105400000010->NXM_NX_ARP_SHA[[]],move:NXM_OF_ARP_SPA[[]]->NXM_OF_ARP_TPA[[]],load:0xaca80032->NXM_OF_ARP_SPA[[]],move:NXM_NX_REG14[[]]->NXM_NX_REG15[[]],load:0x1->NXM_NX_REG10[[0]],resubmit(,37) +]) +} + +check_virtual_offlows_not_present() { + hv=$1 + AT_CHECK([as $hv ovs-ofctl dump-flows br-int table=45 | ofctl_strip_all | grep "priority=2000"], [1], [dnl +]) + + AT_CHECK([as $hv ovs-ofctl dump-flows br-int table=11 | ofctl_strip_all | \ + grep "priority=92" | grep 172.168.0.50], [1], [dnl +]) +} + # From sw0-p0 send GARP for 10.0.0.10. hv1 should claim sw0-vir # and sw0-p1 should be its virtual_parent. eth_src=505400000003 @@ -16767,6 +16855,13 @@ AT_CHECK([grep lr_in_arp_resolve lr0-flows2 | grep "reg0 == 10.0.0.10" | sed 's/ table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:03; next;) ]) +# hv1 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and +# arp responder flow in lr0 pipeline. +check_virtual_offlows_present hv1 + +# hv2 should not have the above flows. +check_virtual_offlows_not_present hv2 + # Forcibly clear virtual_parent. ovn-controller should release the binding # gracefully. pb_uuid=$(ovn-sbctl --bare --columns _uuid find port_binding logical_port=sw0-vir) @@ -16777,6 +16872,13 @@ logical_port=sw0-vir) = x]) wait_row_count nb:Logical_Switch_Port 1 up=false name=sw0-vir +check ovn-nbctl --wait=hv sync +# hv1 should remove the flow for the ACL with is_chassis_redirect check for sw0-vir. +check_virtual_offlows_not_present hv1 + +# hv2 should not have the flow for ACL. +check_virtual_offlows_not_present hv2 + # From sw0-p0 resend GARP for 10.0.0.10. hv1 should reclaim sw0-vir # and sw0-p1 should be its virtual_parent. send_garp 1 1 $eth_src $eth_dst $spa $tpa @@ -16789,6 +16891,58 @@ logical_port=sw0-vir) = xsw0-p1]) wait_for_ports_up sw0-vir +check ovn-nbctl --wait=hv sync +# hv1 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and +# arp responder flow in lr0 pipeline. +check_virtual_offlows_present hv1 + +# hv2 should not have the above flows. +check_virtual_offlows_not_present hv2 + +# Release sw0-p1. +as hv1 ovs-vsctl set interface hv1-vif1 external-ids:iface-id=sw0-px +wait_column "false" nb:Logical_Switch_Port up name=sw0-p1 +wait_column "false" nb:Logical_Switch_Port up name=sw0-vir + +check ovn-nbctl --wait=hv sync +# hv1 should remove the flow for the ACL with is_chassis_redirect check for sw0-vir and +# arp responder flow in lr0 pipeline. +check_virtual_offlows_not_present hv1 + +# hv2 should not have the above flows. +check_virtual_offlows_not_present hv2 + +# Claim sw0-p1 again. +as hv1 ovs-vsctl set interface hv1-vif1 external-ids:iface-id=sw0-p1 +wait_for_ports_up sw0-p1 + +# hv1 should not have the flow for the ACL with is_chassis_redirect check for sw0-vir and +# arp responder flow in lr0 pipeline. +check_virtual_offlows_not_present hv1 + +# hv2 should not have the above flows. +check_virtual_offlows_not_present hv2 + +# From sw0-p0 send GARP for 10.0.0.10. hv1 should claim sw0-vir +# and sw0-p1 should be its virtual_parent. +eth_src=505400000003 +eth_dst=ffffffffffff +spa=$(ip_to_hex 10 0 0 10) +tpa=$(ip_to_hex 10 0 0 10) +send_garp 1 1 $eth_src $eth_dst $spa $tpa + +wait_row_count Port_Binding 1 logical_port=sw0-vir chassis=$hv1_ch_uuid +check_row_count Port_Binding 1 logical_port=sw0-vir virtual_parent=sw0-p1 +wait_for_ports_up sw0-vir +check ovn-nbctl --wait=hv sync + +# hv1 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and +# arp responder flow in lr0 pipeline. +check_virtual_offlows_present hv1 + +# hv2 should not have the above flows. +check_virtual_offlows_not_present hv2 + # From sw0-p3 send GARP for 10.0.0.10. hv1 should claim sw0-vir # and sw0-p3 should be its virtual_parent. eth_src=505400000005 @@ -16806,8 +16960,8 @@ logical_port=sw0-vir) = xsw0-p3]) wait_for_ports_up sw0-vir # There should be an arp resolve flow to resolve the virtual_ip with the -# sw0-p2's MAC. -sleep 1 +# sw0-p3's MAC. +check ovn-nbctl --wait=hv sync ovn-sbctl dump-flows lr0 > lr0-flows3 AT_CAPTURE_FILE([lr0-flows3]) cp ovn-sb/ovn-sb.db lr0-flows3.db @@ -16815,6 +16969,13 @@ AT_CHECK([grep lr_in_arp_resolve lr0-flows3 | grep "reg0 == 10.0.0.10" | sed 's table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:05; next;) ]) +# hv1 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and +# arp responder flow in lr0 pipeline. +check_virtual_offlows_present hv1 + +# hv2 should not have the above flows. +check_virtual_offlows_not_present hv2 + # send the garp from sw0-p2 (in hv2). hv2 should claim sw0-vir # and sw0-p2 shpuld be its virtual_parent. eth_src=505400000004 @@ -16832,14 +16993,21 @@ logical_port=sw0-vir) = xsw0-p2]) wait_for_ports_up sw0-vir # There should be an arp resolve flow to resolve the virtual_ip with the -# sw0-p3's MAC. -sleep 1 +# sw0-p2's MAC. +check ovn-nbctl --wait=hv sync ovn-sbctl dump-flows lr0 > lr0-flows4 AT_CAPTURE_FILE([lr0-flows4]) AT_CHECK([grep lr_in_arp_resolve lr0-flows4 | grep "reg0 == 10.0.0.10" | sed 's/table=../table=??/'], [0], [dnl table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:04; next;) ]) +# hv2 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and +# arp responder flow in lr0 pipeline. +check_virtual_offlows_present hv2 + +# hv1 should not have the above flows. +check_virtual_offlows_not_present hv1 + # Now send arp reply from sw0-p1. hv1 should claim sw0-vir # and sw0-p1 shpuld be its virtual_parent. eth_src=505400000003 @@ -16863,6 +17031,14 @@ AT_CHECK([grep lr_in_arp_resolve lr0-flows5 | grep "reg0 == 10.0.0.10" | sed 's/ table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:03; next;) ]) +check ovn-nbctl --wait=hv sync +# hv1 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and +# arp responder flow in lr0 pipeline. +check_virtual_offlows_present hv1 + +# hv2 should not have the above flows. +check_virtual_offlows_not_present hv2 + # Delete hv1-vif1 port. hv1 should release sw0-vir as hv1 ovs-vsctl del-port hv1-vif1 @@ -16883,6 +17059,15 @@ AT_CHECK([grep lr_in_arp_resolve lr0-flows6 | grep "reg0 == 10.0.0.10" | sed 's/ table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 00:00:00:00:00:00; next;) ]) +check ovn-nbctl --wait=hv sync +# hv1 should remove the flow for the ACL with is_chassis_redirect check for sw0-vir and +# arp responder flow in lr0 pipeline. +check_virtual_offlows_not_present hv1 + +# hv2 should not have the above flows. +check_virtual_offlows_not_present hv2 + + # Now send arp reply from sw0-p2. hv2 should claim sw0-vir # and sw0-p2 should be its virtual_parent. eth_src=505400000004 @@ -16906,6 +17091,14 @@ AT_CHECK([grep lr_in_arp_resolve lr0-flows7 | grep "reg0 == 10.0.0.10" | sed 's/ table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:04; next;) ]) +check ovn-nbctl --wait=hv sync +# hv2 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and +# arp responder flow in lr0 pipeline. +check_virtual_offlows_present hv2 + +# hv1 should not have the above flows. +check_virtual_offlows_not_present hv1 + # Delete sw0-p2 logical port ovn-nbctl lsp-del sw0-p2 @@ -16933,6 +17126,14 @@ AT_CHECK([grep ls_in_arp_rsp sw0-flows3 | grep bind_vport | sed 's/table=../tabl table=??(ls_in_arp_rsp ), priority=100 , match=(inport == "sw0-p3" && ((arp.op == 1 && arp.spa == 10.0.0.10 && arp.tpa == 10.0.0.10) || (arp.op == 2 && arp.spa == 10.0.0.10))), action=(bind_vport("sw0-vir", inport); next;) ]) +check ovn-nbctl --wait=hv sync +# hv2 should remove the flow for the ACL with is_chassis_redirect check for sw0-vir and +# arp responder flow in lr0 pipeline. +check_virtual_offlows_not_present hv2 + +# hv1 should not have the above flows. +check_virtual_offlows_not_present hv2 + ovn-nbctl --wait=hv remove logical_switch_port sw0-vir options virtual-parents ovn-sbctl dump-flows sw0 > sw0-flows4 AT_CAPTURE_FILE([sw0-flows4]) @@ -16942,6 +17143,38 @@ ovn-sbctl dump-flows lr0 > lr0-flows8 AT_CAPTURE_FILE([lr0-flows8]) AT_CHECK([grep lr_in_arp_resolve lr0-flows8 | grep "reg0 == 10.0.0.10"], [1]) +# Delete sw0-vir and add again. +ovn-nbctl lsp-del sw0-vir + +ovn-nbctl lsp-add sw0 sw0-vir +ovn-nbctl lsp-set-addresses sw0-vir "50:54:00:00:00:10 10.0.0.10" +ovn-nbctl lsp-set-port-security sw0-vir "50:54:00:00:00:10 10.0.0.10" +ovn-nbctl lsp-set-type sw0-vir virtual +ovn-nbctl set logical_switch_port sw0-vir options:virtual-ip=10.0.0.10 +ovn-nbctl set logical_switch_port sw0-vir options:virtual-parents=sw0-p1,sw0-p2,sw0-p3 + +ovn-nbctl --wait=hv sync + +# Check that logical flows are added for sw0-vir in lsp_in_arp_rsp pipeline +# with bind_vport action. + +ovn-sbctl dump-flows sw0 > sw0-flows +AT_CAPTURE_FILE([sw0-flows]) + +AT_CHECK([grep ls_in_arp_rsp sw0-flows | grep bind_vport | sed 's/table=../table=??/' | sort], [0], [dnl + table=??(ls_in_arp_rsp ), priority=100 , match=(inport == "sw0-p1" && ((arp.op == 1 && arp.spa == 10.0.0.10 && arp.tpa == 10.0.0.10) || (arp.op == 2 && arp.spa == 10.0.0.10))), action=(bind_vport("sw0-vir", inport); next;) + table=??(ls_in_arp_rsp ), priority=100 , match=(inport == "sw0-p3" && ((arp.op == 1 && arp.spa == 10.0.0.10 && arp.tpa == 10.0.0.10) || (arp.op == 2 && arp.spa == 10.0.0.10))), action=(bind_vport("sw0-vir", inport); next;) +]) + +ovn-sbctl dump-flows lr0 > lr0-flows +AT_CAPTURE_FILE([lr0-flows]) + +# Since the sw0-vir is not claimed by any chassis, eth.dst should be set to +# zero if the ip4.dst is the virtual ip in the router pipeline. +AT_CHECK([grep lr_in_arp_resolve lr0-flows | grep "reg0 == 10.0.0.10" | sed 's/table=../table=??/'], [0], [dnl + table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 00:00:00:00:00:00; next;) +]) + OVN_CLEANUP([hv1], [hv2]) AT_CLEANUP @@ -24918,3 +25151,633 @@ AT_CHECK([cat hv2_offlows_table72.txt | grep -v NXST], [1], [dnl OVN_CLEANUP([hv1], [hv2]) AT_CLEANUP + +AT_SETUP([ovn -- container port changed to normal port and then deleted]) +ovn_start + +net_add n1 + +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 +ovs-vsctl -- add-port br-int vm1 + +check ovn-nbctl ls-add ls +check ovn-nbctl lsp-add ls vm1 +check ovn-nbctl lsp-add ls vm-cont vm1 1 +check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=vm1 + +wait_for_ports_up + +check as hv1 ovn-appctl -t ovn-controller debug/pause +check ovn-nbctl clear logical_switch_port vm-cont parent_name +check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=foo +check ovn-nbctl lsp-del vm-cont +check as hv1 ovn-appctl -t ovn-controller debug/resume + +ovn-nbctl --wait=hv sync + +# Make sure that ovn-controller has not asserted. +AT_CHECK([kill -0 $(cat hv1/ovn-controller.pid)]) + +wait_column "false" nb:Logical_Switch_Port up name=vm1 + +check ovn-nbctl lsp-add ls vm-cont1 vm1 1 +check ovn-nbctl lsp-add ls vm-cont2 vm1 2 + +check ovn-nbctl --wait=sb lsp-del vm1 + +check as hv1 ovn-appctl -t ovn-controller debug/pause +check ovn-nbctl clear logical_switch_port vm-cont1 parent_name +check ovn-nbctl clear logical_switch_port vm-cont2 parent_name + +check as hv1 ovn-appctl -t ovn-controller debug/resume + +check ovn-nbctl --wait=hv sync + +# Make sure that ovn-controller has not crashed. +AT_CHECK([kill -0 $(cat hv1/ovn-controller.pid)]) + +check ovn-nbctl lsp-add ls vm1 +check ovn-nbctl set logical_switch_port vm-cont1 parent_name=vm1 +check ovn-nbctl --wait=sb set logical_switch_port vm-cont2 parent_name=vm1 +check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=vm1 + +wait_for_ports_up + +check as hv1 ovn-appctl -t ovn-controller debug/pause +check ovn-nbctl --wait=sb lsp-del vm1 +check ovn-nbctl clear logical_switch_port vm-cont1 parent_name +check ovn-nbctl --wait=sb clear logical_switch_port vm-cont2 parent_name +check ovn-nbctl lsp-del vm-cont1 +check ovn-nbctl --wait=sb lsp-del vm-cont2 +check as hv1 ovn-appctl -t ovn-controller debug/resume + +check ovn-nbctl --wait=hv sync + +# Make sure that ovn-controller has not crashed. +AT_CHECK([kill -0 $(cat hv1/ovn-controller.pid)]) + +check ovn-nbctl lsp-add ls vm1 +check ovn-nbctl lsp-add ls vm-cont1 vm1 1 +check ovn-nbctl lsp-add ls vm-cont2 vm1 2 + +wait_for_ports_up + +check as hv1 ovn-appctl -t ovn-controller debug/pause +check ovn-nbctl clear logical_switch_port vm-cont1 parent_name +check ovn-nbctl --wait=sb clear logical_switch_port vm-cont2 parent_name +check ovn-nbctl lsp-del vm-cont1 +check ovn-nbctl lsp-del vm-cont2 +check as hv1 ovn-appctl -t ovn-controller debug/resume + +check ovn-nbctl --wait=hv sync + +# Make sure that ovn-controller has not crashed. +AT_CHECK([kill -0 $(cat hv1/ovn-controller.pid)]) + +check ovn-nbctl lsp-add ls vm-cont1 vm1 1 +check ovn-nbctl lsp-add ls vm-cont2 vm1 2 + +wait_for_ports_up + +check as hv1 ovn-appctl -t ovn-controller debug/pause +check ovn-nbctl clear logical_switch_port vm-cont1 parent_name +check ovn-nbctl --wait=sb clear logical_switch_port vm-cont2 parent_name + +check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=foo + +check as hv1 ovn-appctl -t ovn-controller debug/resume + +wait_column "false" nb:Logical_Switch_Port up name=vm1 +wait_column "false" nb:Logical_Switch_Port up name=vm-cont1 +wait_column "false" nb:Logical_Switch_Port up name=vm-cont2 + +check ovn-nbctl set logical_switch_port vm-cont1 parent_name=vm1 +check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=vm1 +check ovn-nbctl --wait=sb set logical_switch_port vm-cont2 parent_name=vm1 + +wait_for_ports_up + +check as hv1 ovn-appctl -t ovn-controller debug/pause +check ovn-nbctl clear logical_switch_port vm-cont1 parent_name +check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=vm-cont1 +check as hv1 ovn-appctl -t ovn-controller debug/resume + +wait_column "false" nb:Logical_Switch_Port up name=vm1 +wait_column "true" nb:Logical_Switch_Port up name=vm-cont1 +wait_column "false" nb:Logical_Switch_Port up name=vm-cont2 + +check ovn-nbctl --wait=sb set logical_switch_port vm-cont2 parent_name=vm-cont1 +check ovn-nbctl --wait=sb set logical_switch_port vm1 parent_name=vm-cont1 + +wait_for_ports_up + +# Delete vm1, vm-cont1 and vm-cont2 and recreate again. +check ovn-nbctl lsp-del vm1 +check ovn-nbctl lsp-del vm-cont1 +check ovn-nbctl --wait=hv lsp-del vm-cont2 + +check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=vm1 +check ovn-nbctl lsp-add ls vm1 +check ovn-nbctl lsp-add ls vm-cont1 vm1 1 +check ovn-nbctl lsp-add ls vm-cont2 vm1 2 + +wait_for_ports_up + +# Make vm1 as a child port of some non existent lport - foo. vm1, vm1-cont1 and +# vm1-cont2 should be released. +check ovn-nbctl --wait=sb set logical_switch_port vm1 parent_name=bar +wait_column "false" nb:Logical_Switch_Port up name=vm1 +wait_column "false" nb:Logical_Switch_Port up name=vm-cont1 +wait_column "false" nb:Logical_Switch_Port up name=vm-cont2 + +OVN_CLEANUP([hv1]) +AT_CLEANUP + +AT_SETUP([ovn -- container port changed from one parent to another]) +ovn_start + +net_add n1 + +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 +ovs-vsctl -- add-port br-int vm1 -- set interface vm1 ofport-request=1 +ovs-vsctl -- add-port br-int vm2 -- set interface vm1 ofport-request=2 + +check ovn-nbctl ls-add ls +check ovn-nbctl lsp-add ls vm1 +check ovn-nbctl lsp-add ls vm1-cont vm1 1 +check ovn-nbctl lsp-add ls vm2 +check ovn-nbctl lsp-add ls vm2-cont vm2 2 + +check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=vm1 +check as hv1 ovs-vsctl set Interface vm2 external_ids:iface-id=vm2 + +wait_for_ports_up + +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep -c dl_vlan=1], [0], [dnl +1 +]) + +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep -c dl_vlan=2], [0], [dnl +1 +]) + +# change the parent of vm1-cont to vm2. +as hv1 ovn-appctl -t ovn-controller vlog/set dbg +check ovn-nbctl --wait=sb set logical_switch_port vm1-cont parent_name=vm2 \ +-- set logical_switch_port vm1-cont tag_request=3 + +wait_for_ports_up + +check ovn-nbctl --wait=hv sync + +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep -c dl_vlan=1], [1], [dnl +0 +]) + +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep -c dl_vlan=2], [0], [dnl +1 +]) + +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep -c dl_vlan=3], [0], [dnl +1 +]) + +OVN_CLEANUP([hv1]) +AT_CLEANUP + +AT_SETUP([ovn -- container port use-after-free test]) +ovn_start + +net_add n1 + +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 +ovs-vsctl -- add-port br-int vm1 + +check ovn-nbctl ls-add ls +check ovn-nbctl lsp-add ls vm1 +check ovn-nbctl lsp-add ls vm-cont vm1 1 +check ovs-vsctl set Interface vm1 external_ids:iface-id=vm1 +check ovn-nbctl clear logical_switch_port vm-cont parent_name +check ovs-vsctl set Interface vm1 external_ids:iface-id=foo +check ovn-nbctl lsp-del vm-cont +check ovn-nbctl ls-del ls +check ovn-nbctl ls-add ls +check ovn-nbctl lsp-add ls vm1 +check ovn-nbctl lsp-add ls vm-cont vm1 1 +check ovs-vsctl set Interface vm1 external_ids:iface-id=vm1 +check as hv1 ovn-appctl -t ovn-controller debug/pause +check ovn-nbctl clear logical_switch_port vm-cont parent_name +check ovn-nbctl lsp-del vm-cont +check as hv1 ovn-appctl -t ovn-controller debug/resume +check as hv1 ovs-vsctl set Interface vm1 external_ids:iface-id=foo + +ovn-nbctl --wait=hv sync + +# Make sure that ovn-controller has not asserted. +AT_CHECK([kill -0 $(cat hv1/ovn-controller.pid)]) + +wait_column "false" nb:Logical_Switch_Port up name=vm1 + +OVN_CLEANUP([hv1]) +AT_CLEANUP + +# Test that OVS.external_ids:iface-id doesn't affect non-VIF port bindings. +AT_SETUP([ovn -- Non-VIF ports incremental processing]) +ovn_start + +net_add n1 +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.10 + +check ovn-nbctl ls-add ls1 -- lsp-add ls1 lsp1 + +as hv1 +check ovs-vsctl \ + -- add-port br-int vif1 \ + -- set Interface vif1 external_ids:iface-id=lsp1 + +# ovn-controller should bind the interface. +wait_for_ports_up +hv_uuid=$(fetch_column Chassis _uuid name=hv1) +check_column "$hv_uuid" Port_Binding chassis logical_port=lsp1 + +# Change the port type to router, ovn-controller should release it. +check ovn-nbctl --wait=hv lsp-set-type lsp1 router +check_column "" Port_Binding chassis logical_port=lsp1 + +# Clear port type, ovn-controller should rebind it. +check ovn-nbctl --wait=hv lsp-set-type lsp1 '' +check_column "$hv_uuid" Port_Binding chassis logical_port=lsp1 + +# Change the port type to localnet, ovn-controller should release it. +check ovn-nbctl --wait=hv lsp-set-type lsp1 localnet +check_column "" Port_Binding chassis logical_port=lsp1 + +# Clear port type, ovn-controller should rebind it. +check ovn-nbctl --wait=hv lsp-set-type lsp1 '' +check_column "$hv_uuid" Port_Binding chassis logical_port=lsp1 + +# Change the port type to localport, ovn-controller should release it. +check ovn-nbctl --wait=hv lsp-set-type lsp1 localport +check_column "" Port_Binding chassis logical_port=lsp1 + +# Clear port type, ovn-controller should rebind it. +check ovn-nbctl --wait=hv lsp-set-type lsp1 '' +check_column "$hv_uuid" Port_Binding chassis logical_port=lsp1 + +# Change the port type to localnet and then delete it. +# ovn-controller should handle this properly. +check as hv1 ovn-appctl -t ovn-controller debug/pause +check ovn-nbctl --wait=sb lsp-set-type lsp1 localport +check ovn-nbctl --wait=sb lsp-del lsp1 +check as hv1 ovn-appctl -t ovn-controller debug/resume + +check ovn-nbctl --wait=hv sync + +# Make sure that ovn-controller has not asserted. +AT_CHECK([kill -0 $(cat hv1/ovn-controller.pid)]) + +check ovn-nbctl lsp-add ls1 lsp1 +wait_for_ports_up + +# Change the port type to virtual and then delete it. +# ovn-controller should handle this properly. +check as hv1 ovn-appctl -t ovn-controller debug/pause +check ovn-nbctl --wait=sb lsp-set-type lsp1 virtual +check ovn-nbctl --wait=sb lsp-del lsp1 +check as hv1 ovn-appctl -t ovn-controller debug/resume + +check ovn-nbctl --wait=hv sync + +# Make sure that ovn-controller has not asserted. +AT_CHECK([kill -0 $(cat hv1/ovn-controller.pid)]) + +OVN_CLEANUP([hv1]) +AT_CLEANUP + +# Tests that ovn-controller creates local bindings correctly by running +# ovn-appctl -t ovn-controller debug/dump-local-bindings. +# Ideally this test case should have been a unit test case. +AT_SETUP([ovn -- ovn-controller local bindings]) +ovn_start + +net_add n1 + +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 +ovs-vsctl -- add-port br-int hv1-vm1 + +sim_add hv2 +as hv2 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.2 +ovs-vsctl -- add-port br-int hv2-vm1 + +check ovn-nbctl ls-add sw0 +check ovn-nbctl lsp-add sw0 sw0p1 +check ovn-nbctl lsp-add sw0 sw0p2 + +check as hv1 ovs-vsctl set interface hv1-vm1 external_ids:iface-id=sw0p1 +check as hv2 ovs-vsctl set interface hv2-vm1 external_ids:iface-id=sw0p2 + +wait_for_ports_up + +AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[sw0p1]], OVS interface name : [[hv1-vm1]], num binding lports : [[1]] +primary lport : [[sw0p1]] +---------------------------------------- +]) + +AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[sw0p2]], OVS interface name : [[hv2-vm1]], num binding lports : [[1]] +primary lport : [[sw0p2]] +---------------------------------------- +]) + +# Create an ovs interface in hv1 +check as hv1 ovs-vsctl add-port br-int hv1-vm2 -- set interface hv1-vm2 external_ids:iface-id=sw1p1 +check ovn-nbctl --wait=hv sync +AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[sw0p1]], OVS interface name : [[hv1-vm1]], num binding lports : [[1]] +primary lport : [[sw0p1]] +---------------------------------------- +name: [[sw1p1]], OVS interface name : [[hv1-vm2]], num binding lports : [[0]] +---------------------------------------- +]) + +# Create lport sw1p1 +check ovn-nbctl ls-add sw1 -- lsp-add sw1 sw1p1 + +wait_for_ports_up + +AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[sw0p1]], OVS interface name : [[hv1-vm1]], num binding lports : [[1]] +primary lport : [[sw0p1]] +---------------------------------------- +name: [[sw1p1]], OVS interface name : [[hv1-vm2]], num binding lports : [[1]] +primary lport : [[sw1p1]] +---------------------------------------- +]) + +# Swap sw0p1 and sw0p2. +check as hv1 ovs-vsctl set interface hv1-vm1 external_ids:iface-id=sw0p2 +check as hv2 ovs-vsctl set interface hv2-vm1 external_ids:iface-id=sw0p1 + +check ovn-nbctl --wait=hv sync + +AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[sw0p2]], OVS interface name : [[hv1-vm1]], num binding lports : [[1]] +primary lport : [[sw0p2]] +---------------------------------------- +name: [[sw1p1]], OVS interface name : [[hv1-vm2]], num binding lports : [[1]] +primary lport : [[sw1p1]] +---------------------------------------- +]) + +AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[sw0p1]], OVS interface name : [[hv2-vm1]], num binding lports : [[1]] +primary lport : [[sw0p1]] +---------------------------------------- +]) + +# Create child port for sw0p1 +check ovn-nbctl --wait=hv lsp-add sw0 sw0p1-c1 sw0p1 1 +AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[2]] +primary lport : [[sw0p1]] +child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw0p2]], OVS interface name : [[hv1-vm1]], num binding lports : [[1]] +primary lport : [[sw0p2]] +---------------------------------------- +name: [[sw1p1]], OVS interface name : [[hv1-vm2]], num binding lports : [[1]] +primary lport : [[sw1p1]] +---------------------------------------- +]) + +AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[sw0p1]], OVS interface name : [[hv2-vm1]], num binding lports : [[2]] +primary lport : [[sw0p1]] +child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] +---------------------------------------- +]) + +# Create another child port for sw0p1 +check ovn-nbctl --wait=hv lsp-add sw0 sw0p1-c2 sw0p1 2 + +wait_for_ports_up + +AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[3]] +primary lport : [[sw0p1]] +child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] +child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw0p2]], OVS interface name : [[hv1-vm1]], num binding lports : [[1]] +primary lport : [[sw0p2]] +---------------------------------------- +name: [[sw1p1]], OVS interface name : [[hv1-vm2]], num binding lports : [[1]] +primary lport : [[sw1p1]] +---------------------------------------- +]) + +AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[sw0p1]], OVS interface name : [[hv2-vm1]], num binding lports : [[3]] +primary lport : [[sw0p1]] +child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] +child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] +---------------------------------------- +]) + +# Swap sw0p1 and sw0p2 again. +check as hv1 ovs-vsctl set interface hv1-vm1 external_ids:iface-id=sw0p1 +check as hv2 ovs-vsctl set interface hv2-vm1 external_ids:iface-id=sw0p2 + +check ovn-nbctl --wait=hv sync + +AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[sw0p1]], OVS interface name : [[hv1-vm1]], num binding lports : [[3]] +primary lport : [[sw0p1]] +child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] +child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw1p1]], OVS interface name : [[hv1-vm2]], num binding lports : [[1]] +primary lport : [[sw1p1]] +---------------------------------------- +]) + +AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[3]] +primary lport : [[sw0p1]] +child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] +child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw0p2]], OVS interface name : [[hv2-vm1]], num binding lports : [[1]] +primary lport : [[sw0p2]] +---------------------------------------- +]) + +# Make sw0p1 as child port of non existent lport - foo +check ovn-nbctl --wait=hv set logical_switch_port sw0p1 parent_name=foo + +AT_CHECK([as hv1 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[foo]], OVS interface name : [[NULL]], num binding lports : [[1]] +no primary lport +child lport[[1]] : [[sw0p1]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw0p1]], OVS interface name : [[hv1-vm1]], num binding lports : [[2]] +no primary lport +child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] +child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw1p1]], OVS interface name : [[hv1-vm2]], num binding lports : [[1]] +primary lport : [[sw1p1]] +---------------------------------------- +]) + +AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[foo]], OVS interface name : [[NULL]], num binding lports : [[1]] +no primary lport +child lport[[1]] : [[sw0p1]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[2]] +no primary lport +child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] +child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw0p2]], OVS interface name : [[hv2-vm1]], num binding lports : [[1]] +primary lport : [[sw0p2]] +---------------------------------------- +]) + +# Change the lport type of sw0p2 to different types and make sure that +# local bindings are correct. + +hv2_uuid=$(fetch_column Chassis _uuid name=hv2) +check_column "$hv2_uuid" Port_Binding chassis logical_port=sw0p2 + +# Change the port type to router, ovn-controller should release it. +check ovn-nbctl --wait=hv lsp-set-type sw0p2 router +check_column "" Port_Binding chassis logical_port=sw0p2 + +AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[foo]], OVS interface name : [[NULL]], num binding lports : [[1]] +no primary lport +child lport[[1]] : [[sw0p1]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[2]] +no primary lport +child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] +child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw0p2]], OVS interface name : [[hv2-vm1]], num binding lports : [[0]] +---------------------------------------- +]) + +# change the port type to external from router. +check ovn-nbctl --wait=hv lsp-set-type sw0p2 external +check_column "" Port_Binding chassis logical_port=sw0p2 + +AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[foo]], OVS interface name : [[NULL]], num binding lports : [[1]] +no primary lport +child lport[[1]] : [[sw0p1]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[2]] +no primary lport +child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] +child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw0p2]], OVS interface name : [[hv2-vm1]], num binding lports : [[0]] +---------------------------------------- +]) + +# change the port type to localnet from external. +check ovn-nbctl --wait=hv lsp-set-type sw0p2 localnet +check_column "" Port_Binding chassis logical_port=sw0p2 + +AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[foo]], OVS interface name : [[NULL]], num binding lports : [[1]] +no primary lport +child lport[[1]] : [[sw0p1]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[2]] +no primary lport +child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] +child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw0p2]], OVS interface name : [[hv2-vm1]], num binding lports : [[0]] +---------------------------------------- +]) + +# change the port type to localport from localnet. +check ovn-nbctl --wait=hv lsp-set-type sw0p2 localnet +check_column "" Port_Binding chassis logical_port=sw0p2 + +AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[foo]], OVS interface name : [[NULL]], num binding lports : [[1]] +no primary lport +child lport[[1]] : [[sw0p1]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[2]] +no primary lport +child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] +child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw0p2]], OVS interface name : [[hv2-vm1]], num binding lports : [[0]] +---------------------------------------- +]) + +# change the port type back to vif. +check ovn-nbctl --wait=hv lsp-set-type sw0p2 "" +wait_column "$hv2_uuid" Port_Binding chassis logical_port=sw0p2 + +AT_CHECK([as hv2 ovn-appctl -t ovn-controller debug/dump-local-bindings], [0], [dnl +Local bindings: +name: [[foo]], OVS interface name : [[NULL]], num binding lports : [[1]] +no primary lport +child lport[[1]] : [[sw0p1]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw0p1]], OVS interface name : [[NULL]], num binding lports : [[2]] +no primary lport +child lport[[1]] : [[sw0p1-c1]], type : [[CONTAINER]] +child lport[[2]] : [[sw0p1-c2]], type : [[CONTAINER]] +---------------------------------------- +name: [[sw0p2]], OVS interface name : [[hv2-vm1]], num binding lports : [[1]] +primary lport : [[sw0p2]] +---------------------------------------- +]) + +OVN_CLEANUP([hv1], [hv2]) +AT_CLEANUP diff --git a/utilities/ovndb-servers.ocf b/utilities/ovndb-servers.ocf index 7351c7d64..eba9c97a1 100755 --- a/utilities/ovndb-servers.ocf +++ b/utilities/ovndb-servers.ocf @@ -259,6 +259,9 @@ ovsdb_server_notify() { ovn-nbctl -- --id=@conn_uuid create Connection \ target="p${NB_MASTER_PROTO}\:${NB_MASTER_PORT}\:${LISTEN_ON_IP}" \ inactivity_probe=$INACTIVE_PROBE -- set NB_Global . connections=@conn_uuid + else + CONN_UID=$(sed -e 's/^\[//' -e 's/\]$//' <<< ${conn}) + ovn-nbctl set connection "${CONN_UID}" target="p${NB_MASTER_PROTO}\:${NB_MASTER_PORT}\:${LISTEN_ON_IP}" fi conn=`ovn-sbctl get SB_global . connections` @@ -267,6 +270,9 @@ inactivity_probe=$INACTIVE_PROBE -- set NB_Global . connections=@conn_uuid ovn-sbctl -- --id=@conn_uuid create Connection \ target="p${SB_MASTER_PROTO}\:${SB_MASTER_PORT}\:${LISTEN_ON_IP}" \ inactivity_probe=$INACTIVE_PROBE -- set SB_Global . connections=@conn_uuid + else + CONN_UID=$(sed -e 's/^\[//' -e 's/\]$//' <<< ${conn}) + ovn-sbctl set connection "${CONN_UID}" target="p${SB_MASTER_PROTO}\:${SB_MASTER_PORT}\:${LISTEN_ON_IP}" fi else