diff --git a/.ovn.metadata b/.ovn.metadata index 391011c..8399f7b 100644 --- a/.ovn.metadata +++ b/.ovn.metadata @@ -1,5 +1,5 @@ 002450621b33c5690060345b0aac25bc2426d675 SOURCES/docutils-0.12.tar.gz -fe7e5ee85616c793341be24fc46c11ff320922a7 SOURCES/openvswitch-c9c602b.tar.gz +155f423dbb5434315caac2e453d7f1361c4ab747 SOURCES/openvswitch-2410b95.tar.gz cae717fbee361a235064a1d79b012b2590908f7c SOURCES/ovn-22.09.0.tar.gz d34f96421a86004aa5d26ecf975edefd09f948b1 SOURCES/Pygments-1.4.tar.gz 6beb30f18ffac3de7689b7fd63e9a8a7d9c8df3a SOURCES/Sphinx-1.1.3.tar.gz diff --git a/SOURCES/ovn22.09.patch b/SOURCES/ovn22.09.patch index 4238f4a..8e656d1 100644 --- a/SOURCES/ovn22.09.patch +++ b/SOURCES/ovn22.09.patch @@ -28,6 +28,116 @@ index 2b0782aea..dc1ca5240 100755 then # testsuite.log is necessary for debugging. cat */_build/sub/tests/testsuite.log +diff --git a/.ci/ovn-kubernetes/Dockerfile b/.ci/ovn-kubernetes/Dockerfile +index e74b620be..7edf86a13 100644 +--- a/.ci/ovn-kubernetes/Dockerfile ++++ b/.ci/ovn-kubernetes/Dockerfile +@@ -47,9 +47,17 @@ RUN GO111MODULE=on go install github.com/ovn-org/libovsdb/cmd/modelgen@${LIBOVSD + # Clone OVN Kubernetes and build the binary based on the commit passed as argument + WORKDIR /root + RUN git clone https://github.com/ovn-org/ovn-kubernetes.git +-WORKDIR /root/ovn-kubernetes/go-controller ++WORKDIR /root/ovn-kubernetes + RUN git checkout ${OVNKUBE_COMMIT} && git log -n 1 + ++# Copy the ovn-kubernetes scripts from the OVN sources and apply any ++# custom changes if needed. ++RUN mkdir -p /tmp/ovn/.ci/ovn-kubernetes ++COPY .ci/ovn-kubernetes /tmp/ovn/.ci/ovn-kubernetes ++WORKDIR /tmp/ovn ++RUN .ci/ovn-kubernetes/prepare.sh /root/ovn-kubernetes ++ ++WORKDIR /root/ovn-kubernetes/go-controller + # Make sure we use the OVN NB/SB schema from the local code. + COPY --from=ovnbuilder /tmp/ovn/ovn-nb.ovsschema pkg/nbdb/ovn-nb.ovsschema + COPY --from=ovnbuilder /tmp/ovn/ovn-sb.ovsschema pkg/sbdb/ovn-sb.ovsschema +diff --git a/.ci/ovn-kubernetes/custom.patch b/.ci/ovn-kubernetes/custom.patch +new file mode 100644 +index 000000000..ea5dd7540 +--- /dev/null ++++ b/.ci/ovn-kubernetes/custom.patch +@@ -0,0 +1,31 @@ ++From 903eef2dd6f9fec818a580760f4757d8137b9974 Mon Sep 17 00:00:00 2001 ++From: Dumitru Ceara ++Date: Mon, 19 Dec 2022 12:18:55 +0100 ++Subject: [PATCH] DOWNSTREAM: Disable session affinity tests. ++ ++Commit https://github.com/ovn-org/ovn-kubernetes/commit/898d2f8f10c4 ++enabled affinity timeout tests but the underlying OVN feature is ++not supported in this branch. Disable affinity tests. ++ ++Signed-off-by: Dumitru Ceara ++--- ++ test/scripts/e2e-kind.sh | 3 +++ ++ 1 file changed, 3 insertions(+) ++ ++diff --git a/test/scripts/e2e-kind.sh b/test/scripts/e2e-kind.sh ++index 69959fa1b..c3b2a5c3e 100755 ++--- a/test/scripts/e2e-kind.sh +++++ b/test/scripts/e2e-kind.sh ++@@ -26,6 +26,9 @@ kube-proxy ++ should set TCP CLOSE_WAIT timeout ++ \[Feature:ProxyTerminatingEndpoints\] ++ +++# Disable session affinity tests completely. +++session affinity +++ ++ # NOT IMPLEMENTED; SEE DISCUSSION IN https://github.com/ovn-org/ovn-kubernetes/pull/1225 ++ named port.+\[Feature:NetworkPolicy\] ++ ++-- ++2.31.1 ++ +diff --git a/.ci/ovn-kubernetes/prepare.sh b/.ci/ovn-kubernetes/prepare.sh +new file mode 100755 +index 000000000..8fc9652af +--- /dev/null ++++ b/.ci/ovn-kubernetes/prepare.sh +@@ -0,0 +1,20 @@ ++#!/bin/bash ++ ++set -ev ++ ++ovnk8s_path=$1 ++topdir=$PWD ++ ++pushd ${ovnk8s_path} ++ ++# Add here any custom operations that need to performed on the ++# ovn-kubernetes cloned repo, e.g., custom patches. ++ ++# git apply --allow-empty is too new so not all git versions from major ++# distros support it, just check if the custom patch file is not empty ++# before applying it. ++[ -s ${topdir}/.ci/ovn-kubernetes/custom.patch ] && \ ++ git apply -v ${topdir}/.ci/ovn-kubernetes/custom.patch ++ ++popd # ${ovnk8s_path} ++exit 0 +diff --git a/.github/workflows/ovn-kubernetes.yml b/.github/workflows/ovn-kubernetes.yml +index ba6b291ff..34ff2cdda 100644 +--- a/.github/workflows/ovn-kubernetes.yml ++++ b/.github/workflows/ovn-kubernetes.yml +@@ -91,12 +91,19 @@ jobs: + go-version: ${{ env.GO_VERSION }} + id: go + ++ - name: Check out ovn ++ uses: actions/checkout@v3 ++ + - name: Check out ovn-kubernetes + uses: actions/checkout@v2 + with: + path: src/github.com/ovn-org/ovn-kubernetes + repository: ovn-org/ovn-kubernetes + ++ - name: Prepare ++ run: | ++ .ci/ovn-kubernetes/prepare.sh src/github.com/ovn-org/ovn-kubernetes ++ + - name: Set up environment + run: | + export GOPATH=$(go env GOPATH) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7a59cd478..88c48dd2c 100644 --- a/.github/workflows/test.yml @@ -90,19 +200,56 @@ index 7a59cd478..88c48dd2c 100644 - { compiler: gcc, m32: m32, opts: --disable-ssl} steps: +diff --git a/Makefile.am b/Makefile.am +index 3b0df8393..f7758d114 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -85,12 +85,13 @@ EXTRA_DIST = \ + MAINTAINERS.rst \ + README.rst \ + NOTICE \ +- .cirrus.yml \ + .ci/linux-build.sh \ + .ci/linux-prepare.sh \ + .ci/osx-build.sh \ + .ci/osx-prepare.sh \ + .ci/ovn-kubernetes/Dockerfile \ ++ .ci/ovn-kubernetes/prepare.sh \ ++ .ci/ovn-kubernetes/custom.patch \ + .github/workflows/test.yml \ + .github/workflows/ovn-kubernetes.yml \ + boot.sh \ diff --git a/NEWS b/NEWS -index ef6a99fed..0392d8d23 100644 +index ef6a99fed..1a7a7855d 100644 --- a/NEWS +++ b/NEWS -@@ -1,3 +1,6 @@ -+OVN v22.09.1 - xx xxx xxxx +@@ -1,3 +1,10 @@ ++OVN v22.09.2 - xx xxx xxxx +-------------------------- + ++OVN v22.09.1 - 20 Dec 2022 ++-------------------------- ++ - Bug fixes ++ OVN v22.09.0 - 16 Sep 2022 -------------------------- - ovn-controller: Add configuration knob, through OVS external-id +diff --git a/build-aux/sodepends.py b/build-aux/sodepends.py +index 343fda1af..7b1f9c840 100755 +--- a/build-aux/sodepends.py ++++ b/build-aux/sodepends.py +@@ -63,7 +63,8 @@ def sodepends(include_info, filenames, dst): + continue + + # Open file. +- include_dirs = [info[0] for info in include_info] ++ include_dirs = [info[1] if len(info) == 2 else info[0] ++ for info in include_info] + fn = soutil.find_file(include_dirs, toplevel) + if not fn: + ok = False diff --git a/configure.ac b/configure.ac -index 765aacb17..c79d79ffe 100644 +index 765aacb17..408184649 100644 --- a/configure.ac +++ b/configure.ac @@ -13,7 +13,7 @@ @@ -110,7 +257,7 @@ index 765aacb17..c79d79ffe 100644 AC_PREREQ(2.63) -AC_INIT(ovn, 22.09.0, bugs@openvswitch.org) -+AC_INIT(ovn, 22.09.1, bugs@openvswitch.org) ++AC_INIT(ovn, 22.09.2, bugs@openvswitch.org) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([build-aux]) AC_CONFIG_HEADERS([config.h]) @@ -297,10 +444,52 @@ index d898c8aa5..b5429eb58 100644 const struct sbrec_port_binding *, const struct sbrec_chassis *); diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c -index 43fbf2ba3..afefe61db 100644 +index 43fbf2ba3..a92fc895c 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c -@@ -658,7 +658,8 @@ update_ct_zones(const struct shash *binding_lports, +@@ -151,6 +151,14 @@ struct pending_pkt { + /* Registered ofctrl seqno type for nb_cfg propagation. */ + static size_t ofctrl_seq_type_nb_cfg; + ++/* Only set monitor conditions on tables that are available in the ++ * server schema. ++ */ ++#define sb_table_set_monitor_condition(idl, table, cond) \ ++ (sbrec_server_has_##table##_table(idl) \ ++ ? sbrec_##table##_set_condition(idl, cond) \ ++ : 0) ++ + static unsigned int + update_sb_monitors(struct ovsdb_idl *ovnsb_idl, + const struct sbrec_chassis *chassis, +@@ -279,16 +287,16 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, + + out:; + unsigned int cond_seqnos[] = { +- sbrec_port_binding_set_condition(ovnsb_idl, &pb), +- sbrec_logical_flow_set_condition(ovnsb_idl, &lf), +- sbrec_logical_dp_group_set_condition(ovnsb_idl, &ldpg), +- sbrec_mac_binding_set_condition(ovnsb_idl, &mb), +- sbrec_multicast_group_set_condition(ovnsb_idl, &mg), +- sbrec_dns_set_condition(ovnsb_idl, &dns), +- sbrec_controller_event_set_condition(ovnsb_idl, &ce), +- sbrec_ip_multicast_set_condition(ovnsb_idl, &ip_mcast), +- sbrec_igmp_group_set_condition(ovnsb_idl, &igmp), +- sbrec_chassis_private_set_condition(ovnsb_idl, &chprv), ++ sb_table_set_monitor_condition(ovnsb_idl, port_binding, &pb), ++ sb_table_set_monitor_condition(ovnsb_idl, logical_flow, &lf), ++ sb_table_set_monitor_condition(ovnsb_idl, logical_dp_group, &ldpg), ++ sb_table_set_monitor_condition(ovnsb_idl, mac_binding, &mb), ++ sb_table_set_monitor_condition(ovnsb_idl, multicast_group, &mg), ++ sb_table_set_monitor_condition(ovnsb_idl, dns, &dns), ++ sb_table_set_monitor_condition(ovnsb_idl, controller_event, &ce), ++ sb_table_set_monitor_condition(ovnsb_idl, ip_multicast, &ip_mcast), ++ sb_table_set_monitor_condition(ovnsb_idl, igmp_group, &igmp), ++ sb_table_set_monitor_condition(ovnsb_idl, chassis_private, &chprv), + }; + + unsigned int expected_cond_seqno = 0; +@@ -658,7 +666,8 @@ update_ct_zones(const struct shash *binding_lports, const char *user; struct sset all_users = SSET_INITIALIZER(&all_users); struct simap req_snat_zones = SIMAP_INITIALIZER(&req_snat_zones); @@ -310,7 +499,7 @@ index 43fbf2ba3..afefe61db 100644 struct shash_node *shash_node; SHASH_FOR_EACH (shash_node, binding_lports) { -@@ -698,49 +699,46 @@ update_ct_zones(const struct shash *binding_lports, +@@ -698,49 +707,46 @@ update_ct_zones(const struct shash *binding_lports, bitmap_set0(ct_zone_bitmap, ct_zone->data); simap_delete(ct_zones, ct_zone); } else if (!simap_find(&req_snat_zones, ct_zone->name)) { @@ -383,7 +572,7 @@ index 43fbf2ba3..afefe61db 100644 } /* xxx This is wasteful to assign a zone to each port--even if no -@@ -758,6 +756,7 @@ update_ct_zones(const struct shash *binding_lports, +@@ -758,6 +764,7 @@ update_ct_zones(const struct shash *binding_lports, } simap_destroy(&req_snat_zones); @@ -391,7 +580,7 @@ index 43fbf2ba3..afefe61db 100644 sset_destroy(&all_users); shash_destroy(&all_lds); } -@@ -799,11 +798,36 @@ commit_ct_zones(const struct ovsrec_bridge *br_int, +@@ -799,11 +806,36 @@ commit_ct_zones(const struct ovsrec_bridge *br_int, } } @@ -429,7 +618,7 @@ index 43fbf2ba3..afefe61db 100644 const struct ovsrec_open_vswitch *cfg; cfg = ovsrec_open_vswitch_table_first(ovs_table); if (!cfg) { -@@ -829,14 +853,18 @@ restore_ct_zones(const struct ovsrec_bridge_table *bridge_table, +@@ -829,14 +861,18 @@ restore_ct_zones(const struct ovsrec_bridge_table *bridge_table, continue; } @@ -450,7 +639,7 @@ index 43fbf2ba3..afefe61db 100644 } } -@@ -2058,16 +2086,6 @@ out: +@@ -2058,16 +2094,6 @@ out: return true; } @@ -467,7 +656,7 @@ index 43fbf2ba3..afefe61db 100644 static void * en_ct_zones_init(struct engine_node *node, struct engine_arg *arg OVS_UNUSED) { -@@ -2082,9 +2100,7 @@ en_ct_zones_init(struct engine_node *node, struct engine_arg *arg OVS_UNUSED) +@@ -2082,9 +2108,7 @@ en_ct_zones_init(struct engine_node *node, struct engine_arg *arg OVS_UNUSED) shash_init(&data->pending); simap_init(&data->current); @@ -478,7 +667,7 @@ index 43fbf2ba3..afefe61db 100644 return data; } -@@ -2111,6 +2127,12 @@ en_ct_zones_run(struct engine_node *node, void *data) +@@ -2111,6 +2135,12 @@ en_ct_zones_run(struct engine_node *node, void *data) struct ed_type_runtime_data *rt_data = engine_get_input_data("runtime_data", node); @@ -491,7 +680,7 @@ index 43fbf2ba3..afefe61db 100644 update_ct_zones(&rt_data->lbinding_data.lports, &rt_data->local_datapaths, &ct_zones_data->current, ct_zones_data->bitmap, &ct_zones_data->pending); -@@ -2188,7 +2210,7 @@ ct_zones_runtime_data_handler(struct engine_node *node, void *data) +@@ -2188,7 +2218,7 @@ ct_zones_runtime_data_handler(struct engine_node *node, void *data) struct hmap *tracked_dp_bindings = &rt_data->tracked_dp_bindings; struct tracked_datapath *tdp; @@ -500,6 +689,14 @@ index 43fbf2ba3..afefe61db 100644 bool updated = false; +@@ -4197,6 +4227,7 @@ main(int argc, char *argv[]) + } + stopwatch_start(PINCTRL_RUN_STOPWATCH_NAME, + time_msec()); ++ pinctrl_update(ovnsb_idl_loop.idl, br_int->name); + pinctrl_run(ovnsb_idl_txn, + sbrec_datapath_binding_by_key, + sbrec_port_binding_by_datapath, diff --git a/controller/physical.c b/controller/physical.c index f3c8bddce..705146316 100644 --- a/controller/physical.c @@ -520,10 +717,98 @@ index f3c8bddce..705146316 100644 localnet_port->header_.uuid.parts[0], &match, ofpacts_p, &localnet_port->header_.uuid); diff --git a/controller/pinctrl.c b/controller/pinctrl.c -index 3f5d0af79..1e4230ed3 100644 +index 3f5d0af79..bcbb04eed 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c -@@ -4378,7 +4378,7 @@ run_buffered_binding(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip, +@@ -173,6 +173,7 @@ struct pinctrl { + pthread_t pinctrl_thread; + /* Latch to destroy the 'pinctrl_thread' */ + struct latch pinctrl_thread_exit; ++ bool mac_binding_can_timestamp; + }; + + static struct pinctrl pinctrl; +@@ -544,6 +545,7 @@ pinctrl_init(void) + bfd_monitor_init(); + init_fdb_entries(); + pinctrl.br_int_name = NULL; ++ pinctrl.mac_binding_can_timestamp = false; + pinctrl_handler_seq = seq_create(); + pinctrl_main_seq = seq_create(); + +@@ -3519,7 +3521,7 @@ pinctrl_handler(void *arg_) + } + + static void +-pinctrl_set_br_int_name_(char *br_int_name) ++pinctrl_set_br_int_name_(const char *br_int_name) + OVS_REQUIRES(pinctrl_mutex) + { + if (br_int_name && (!pinctrl.br_int_name || strcmp(pinctrl.br_int_name, +@@ -3533,13 +3535,31 @@ pinctrl_set_br_int_name_(char *br_int_name) + } + + void +-pinctrl_set_br_int_name(char *br_int_name) ++pinctrl_set_br_int_name(const char *br_int_name) + { + ovs_mutex_lock(&pinctrl_mutex); + pinctrl_set_br_int_name_(br_int_name); + ovs_mutex_unlock(&pinctrl_mutex); + } + ++void ++pinctrl_update(const struct ovsdb_idl *idl, const char *br_int_name) ++{ ++ ovs_mutex_lock(&pinctrl_mutex); ++ pinctrl_set_br_int_name_(br_int_name); ++ ++ bool can_timestamp = sbrec_server_has_mac_binding_table_col_timestamp(idl); ++ if (can_timestamp != pinctrl.mac_binding_can_timestamp) { ++ pinctrl.mac_binding_can_timestamp = can_timestamp; ++ ++ /* Notify pinctrl_handler that mac binding timestamp column ++ * availability has changed. */ ++ notify_pinctrl_handler(); ++ } ++ ++ ovs_mutex_unlock(&pinctrl_mutex); ++} ++ + /* Called by ovn-controller. */ + void + pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, +@@ -3563,7 +3583,6 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, + const struct shash *local_active_ports_ras) + { + ovs_mutex_lock(&pinctrl_mutex); +- pinctrl_set_br_int_name_(br_int->name); + run_put_mac_bindings(ovnsb_idl_txn, sbrec_datapath_binding_by_key, + sbrec_port_binding_by_key, + sbrec_mac_binding_by_lport_ip); +@@ -4245,12 +4264,17 @@ mac_binding_add_to_sb(struct ovsdb_idl_txn *ovnsb_idl_txn, + b = sbrec_mac_binding_insert(ovnsb_idl_txn); + sbrec_mac_binding_set_logical_port(b, logical_port); + sbrec_mac_binding_set_ip(b, ip); +- sbrec_mac_binding_set_mac(b, mac_string); + sbrec_mac_binding_set_datapath(b, dp); +- sbrec_mac_binding_set_timestamp(b, time_wall_msec()); +- } else if (strcmp(b->mac, mac_string)) { ++ } ++ ++ if (strcmp(b->mac, mac_string)) { + sbrec_mac_binding_set_mac(b, mac_string); +- sbrec_mac_binding_set_timestamp(b, time_wall_msec()); ++ ++ /* For backward compatibility check if timestamp column is available ++ * in SB DB. */ ++ if (pinctrl.mac_binding_can_timestamp) { ++ sbrec_mac_binding_set_timestamp(b, time_wall_msec()); ++ } + } + } + +@@ -4378,7 +4402,7 @@ run_buffered_binding(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip, const struct sbrec_port_binding *pb; SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target, sbrec_port_binding_by_datapath) { @@ -532,20 +817,452 @@ index 3f5d0af79..1e4230ed3 100644 continue; } struct buffered_packets *cur_qp; +diff --git a/controller/pinctrl.h b/controller/pinctrl.h +index d4f52e94d..cfece04da 100644 +--- a/controller/pinctrl.h ++++ b/controller/pinctrl.h +@@ -26,6 +26,7 @@ + struct hmap; + struct shash; + struct lport_index; ++struct ovsdb_idl; + struct ovsdb_idl_index; + struct ovsdb_idl_txn; + struct ovsrec_bridge; +@@ -57,7 +58,8 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, + const struct shash *local_active_ports_ras); + void pinctrl_wait(struct ovsdb_idl_txn *ovnsb_idl_txn); + void pinctrl_destroy(void); +-void pinctrl_set_br_int_name(char *br_int_name); ++void pinctrl_set_br_int_name(const char *br_int_name); ++void pinctrl_update(const struct ovsdb_idl *idl, const char *br_int_name); + + struct activated_port { + uint32_t dp_key; diff --git a/debian/changelog b/debian/changelog -index 267e12baa..5ed83900b 100644 +index 267e12baa..08cc66fc0 100644 --- a/debian/changelog +++ b/debian/changelog -@@ -1,3 +1,9 @@ +@@ -1,3 +1,15 @@ ++OVN (22.09.2-1) unstable; urgency=low ++ [ OVN team ] ++ * New upstream version ++ ++ -- OVN team Tue, 20 Dec 2022 13:53:56 -0500 ++ +OVN (22.09.1-1) unstable; urgency=low + [ OVN team ] + * New upstream version + -+ -- OVN team Fri, 16 Sep 2022 13:54:11 -0400 ++ -- OVN team Tue, 20 Dec 2022 13:53:56 -0500 + ovn (22.09.0-1) unstable; urgency=low * New upstream version +diff --git a/ic/ovn-ic.c b/ic/ovn-ic.c +index e5c193d9d..9a80a7f68 100644 +--- a/ic/ovn-ic.c ++++ b/ic/ovn-ic.c +@@ -71,6 +71,7 @@ struct ic_context { + struct ovsdb_idl_index *icsbrec_port_binding_by_az; + struct ovsdb_idl_index *icsbrec_port_binding_by_ts; + struct ovsdb_idl_index *icsbrec_port_binding_by_ts_az; ++ struct ovsdb_idl_index *icsbrec_route_by_az; + struct ovsdb_idl_index *icsbrec_route_by_ts; + struct ovsdb_idl_index *icsbrec_route_by_ts_az; + }; +@@ -756,6 +757,7 @@ port_binding_run(struct ic_context *ctx, + } + icsbrec_port_binding_index_destroy_row(isb_pb_key); + ++ const struct sbrec_port_binding *sb_pb; + const struct icnbrec_transit_switch *ts; + ICNBREC_TRANSIT_SWITCH_FOR_EACH (ts, ctx->ovninb_idl) { + const struct nbrec_logical_switch *ls = find_ts_in_nb(ctx, ts->name); +@@ -787,9 +789,9 @@ port_binding_run(struct ic_context *ctx, + for (int i = 0; i < ls->n_ports; i++) { + lsp = ls->ports[i]; + +- const struct sbrec_port_binding *sb_pb = find_lsp_in_sb(ctx, lsp); + if (!strcmp(lsp->type, "router")) { + /* The port is local. */ ++ sb_pb = find_lsp_in_sb(ctx, lsp); + if (!sb_pb) { + continue; + } +@@ -806,6 +808,7 @@ port_binding_run(struct ic_context *ctx, + if (!isb_pb) { + nbrec_logical_switch_update_ports_delvalue(ls, lsp); + } else { ++ sb_pb = find_lsp_in_sb(ctx, lsp); + if (!sb_pb) { + continue; + } +@@ -881,17 +884,18 @@ ic_route_hash(const struct in6_addr *prefix, unsigned int plen, + static struct ic_route_info * + ic_route_find(struct hmap *routes, const struct in6_addr *prefix, + unsigned int plen, const struct in6_addr *nexthop, +- const char *origin, char *route_table) ++ const char *origin, const char *route_table, uint32_t hash) + { + struct ic_route_info *r; +- uint32_t hash = ic_route_hash(prefix, plen, nexthop, origin, route_table); ++ if (!hash) { ++ hash = ic_route_hash(prefix, plen, nexthop, origin, route_table); ++ } + HMAP_FOR_EACH_WITH_HASH (r, node, hash, routes) { + if (ipv6_addr_equals(&r->prefix, prefix) && + r->plen == plen && + ipv6_addr_equals(&r->nexthop, nexthop) && + !strcmp(r->origin, origin) && +- !strcmp(r->route_table ? r->route_table : "", route_table) && +- ipv6_addr_equals(&r->nexthop, nexthop)) { ++ !strcmp(r->route_table ? r->route_table : "", route_table)) { + return r; + } + } +@@ -942,8 +946,8 @@ add_to_routes_learned(struct hmap *routes_learned, + } + const char *origin = smap_get_def(&nb_route->options, "origin", ""); + if (ic_route_find(routes_learned, &prefix, plen, &nexthop, origin, +- nb_route->route_table)) { +- /* Route is already added to learned in previous iteration. */ ++ nb_route->route_table, 0)) { ++ /* Route was added to learned on previous iteration. */ + return true; + } + +@@ -1090,20 +1094,44 @@ route_need_advertise(const char *policy, + } + + static void +-add_to_routes_ad(struct hmap *routes_ad, +- const struct nbrec_logical_router_static_route *nb_route, +- const struct lport_addresses *nexthop_addresses, +- const struct smap *nb_options, const char *route_table) ++add_to_routes_ad(struct hmap *routes_ad, const struct in6_addr prefix, ++ unsigned int plen, const struct in6_addr nexthop, ++ const char *origin, const char *route_table, ++ const struct nbrec_logical_router_port *nb_lrp, ++ const struct nbrec_logical_router_static_route *nb_route) + { +- if (strcmp(route_table, nb_route->route_table)) { +- if (VLOG_IS_DBG_ENABLED()) { +- VLOG_DBG("Skip advertising route %s -> %s as its route table %s !=" +- " %s of TS port", nb_route->ip_prefix, nb_route->nexthop, +- nb_route->route_table, route_table); +- } +- return; ++ if (route_table == NULL) { ++ route_table = ""; ++ } ++ ++ uint hash = ic_route_hash(&prefix, plen, &nexthop, origin, route_table); ++ ++ if (!ic_route_find(routes_ad, &prefix, plen, &nexthop, origin, route_table, ++ hash)) { ++ struct ic_route_info *ic_route = xzalloc(sizeof *ic_route); ++ ic_route->prefix = prefix; ++ ic_route->plen = plen; ++ ic_route->nexthop = nexthop; ++ ic_route->nb_route = nb_route; ++ ic_route->origin = origin; ++ ic_route->route_table = route_table; ++ ic_route->nb_lrp = nb_lrp; ++ hmap_insert(routes_ad, &ic_route->node, hash); ++ } else { ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); ++ VLOG_WARN_RL(&rl, "Duplicate route advertisement was suppressed! NB " ++ "route uuid: "UUID_FMT, ++ UUID_ARGS(&nb_route->header_.uuid)); + } ++} + ++static void ++add_static_to_routes_ad( ++ struct hmap *routes_ad, ++ const struct nbrec_logical_router_static_route *nb_route, ++ const struct lport_addresses *nexthop_addresses, ++ const struct smap *nb_options) ++{ + struct in6_addr prefix, nexthop; + unsigned int plen; + if (!parse_route(nb_route->ip_prefix, nb_route->nexthop, +@@ -1142,16 +1170,8 @@ add_to_routes_ad(struct hmap *routes_ad, + ds_destroy(&msg); + } + +- struct ic_route_info *ic_route = xzalloc(sizeof *ic_route); +- ic_route->prefix = prefix; +- ic_route->plen = plen; +- ic_route->nexthop = nexthop; +- ic_route->nb_route = nb_route; +- ic_route->origin = ROUTE_ORIGIN_STATIC; +- ic_route->route_table = nb_route->route_table; +- hmap_insert(routes_ad, &ic_route->node, +- ic_route_hash(&prefix, plen, &nexthop, ROUTE_ORIGIN_STATIC, +- nb_route->route_table)); ++ add_to_routes_ad(routes_ad, prefix, plen, nexthop, ROUTE_ORIGIN_STATIC, ++ nb_route->route_table, NULL, nb_route); + } + + static void +@@ -1195,18 +1215,9 @@ add_network_to_routes_ad(struct hmap *routes_ad, const char *network, + ds_destroy(&msg); + } + +- struct ic_route_info *ic_route = xzalloc(sizeof *ic_route); +- ic_route->prefix = prefix; +- ic_route->plen = plen; +- ic_route->nexthop = nexthop; +- ic_route->nb_lrp = nb_lrp; +- ic_route->origin = ROUTE_ORIGIN_CONNECTED; +- + /* directly-connected routes go to
route table */ +- ic_route->route_table = NULL; +- hmap_insert(routes_ad, &ic_route->node, +- ic_route_hash(&prefix, plen, &nexthop, +- ROUTE_ORIGIN_CONNECTED, "")); ++ add_to_routes_ad(routes_ad, prefix, plen, nexthop, ROUTE_ORIGIN_CONNECTED, ++ NULL, nb_lrp, NULL); + } + + static bool +@@ -1366,7 +1377,7 @@ sync_learned_routes(struct ic_context *ctx, + struct ic_route_info *route_learned + = ic_route_find(&ic_lr->routes_learned, &prefix, plen, + &nexthop, isb_route->origin, +- isb_route->route_table); ++ isb_route->route_table, 0); + if (route_learned) { + /* Sync external-ids */ + struct uuid ext_id; +@@ -1465,7 +1476,7 @@ advertise_routes(struct ic_context *ctx, + } + struct ic_route_info *route_adv = + ic_route_find(routes_ad, &prefix, plen, &nexthop, +- isb_route->origin, isb_route->route_table); ++ isb_route->origin, isb_route->route_table, 0); + if (!route_adv) { + /* Delete the extra route from IC-SB. */ + VLOG_DBG("Delete route %s -> %s from IC-SB, which is not found" +@@ -1545,10 +1556,10 @@ build_ts_routes_to_adv(struct ic_context *ctx, + nbrec_logical_router_update_static_routes_delvalue(lr, + nb_route); + } +- } else { ++ } else if (!strcmp(ts_route_table, nb_route->route_table)) { + /* It may be a route to be advertised */ +- add_to_routes_ad(routes_ad, nb_route, ts_port_addrs, +- &nb_global->options, ts_route_table); ++ add_static_to_routes_ad(routes_ad, nb_route, ts_port_addrs, ++ &nb_global->options); + } + } + +@@ -1581,7 +1592,6 @@ advertise_lr_routes(struct ic_context *ctx, + const struct icsbrec_port_binding *isb_pb; + const char *lrp_name, *ts_name, *route_table; + struct lport_addresses ts_port_addrs; +- const struct nbrec_logical_router *lr = ic_lr->lr; + const struct icnbrec_transit_switch *key; + + struct hmap routes_ad = HMAP_INITIALIZER(&routes_ad); +@@ -1599,7 +1609,7 @@ advertise_lr_routes(struct ic_context *ctx, + VLOG_INFO_RL(&rl, "Route sync ignores port %s on ts %s for router" + " %s because the addresses are invalid.", + isb_pb->logical_port, isb_pb->transit_switch, +- lr->name); ++ ic_lr->lr->name); + continue; + } + lrp_name = get_lrp_name_by_ts_port_name(ctx, isb_pb->logical_port); +@@ -1612,6 +1622,39 @@ advertise_lr_routes(struct ic_context *ctx, + hmap_destroy(&routes_ad); + } + ++static void ++delete_orphan_ic_routes(struct ic_context *ctx, ++ const struct icsbrec_availability_zone *az) ++{ ++ const struct icsbrec_route *isb_route, *isb_route_key = ++ icsbrec_route_index_init_row(ctx->icsbrec_route_by_az); ++ icsbrec_route_index_set_availability_zone(isb_route_key, az); ++ ++ const struct icnbrec_transit_switch *t_sw, *t_sw_key; ++ ++ ICSBREC_ROUTE_FOR_EACH_EQUAL (isb_route, isb_route_key, ++ ctx->icsbrec_route_by_az) ++ { ++ t_sw_key = icnbrec_transit_switch_index_init_row( ++ ctx->icnbrec_transit_switch_by_name); ++ icnbrec_transit_switch_index_set_name(t_sw_key, ++ isb_route->transit_switch); ++ t_sw = icnbrec_transit_switch_index_find( ++ ctx->icnbrec_transit_switch_by_name, t_sw_key); ++ icnbrec_transit_switch_index_destroy_row(t_sw_key); ++ ++ if (!t_sw) { ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); ++ VLOG_INFO_RL(&rl, "Deleting orphan ICDB:Route: %s->%s (%s, rtb:%s," ++ " transit switch: %s)", isb_route->ip_prefix, ++ isb_route->nexthop, isb_route->origin, ++ isb_route->route_table, isb_route->transit_switch); ++ icsbrec_route_delete(isb_route); ++ } ++ } ++ icsbrec_route_index_destroy_row(isb_route_key); ++} ++ + static void + route_run(struct ic_context *ctx, + const struct icsbrec_availability_zone *az) +@@ -1620,6 +1663,8 @@ route_run(struct ic_context *ctx, + return; + } + ++ delete_orphan_ic_routes(ctx, az); ++ + struct hmap ic_lrs = HMAP_INITIALIZER(&ic_lrs); + const struct icsbrec_port_binding *isb_pb; + const struct icsbrec_port_binding *isb_pb_key = +@@ -1866,13 +1911,112 @@ main(int argc, char *argv[]) + struct ovsdb_idl_loop ovnisb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER( + ovsdb_idl_create(ovn_ic_sb_db, &icsbrec_idl_class, true, true)); + +- /* ovn-nb db. XXX: add only needed tables and columns */ ++ /* ovn-nb db. */ + struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER( +- ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true)); +- +- /* ovn-sb db. XXX: add only needed tables and columns */ ++ ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, false, true)); ++ ++ ovsdb_idl_add_table(ovnnb_idl_loop.idl, &nbrec_table_nb_global); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, &nbrec_nb_global_col_name); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, &nbrec_nb_global_col_options); ++ ++ ovsdb_idl_add_table(ovnnb_idl_loop.idl, ++ &nbrec_table_logical_router_static_route); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_router_static_route_col_route_table); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_router_static_route_col_ip_prefix); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_router_static_route_col_nexthop); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_router_static_route_col_external_ids); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_router_static_route_col_options); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_router_static_route_col_policy); ++ ++ ovsdb_idl_add_table(ovnnb_idl_loop.idl, &nbrec_table_logical_router); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_router_col_name); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_router_col_static_routes); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_router_col_ports); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_router_col_options); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_router_col_external_ids); ++ ++ ovsdb_idl_add_table(ovnnb_idl_loop.idl, &nbrec_table_logical_router_port); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_router_port_col_name); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_router_port_col_networks); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_router_port_col_external_ids); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_router_port_col_options); ++ ++ ovsdb_idl_add_table(ovnnb_idl_loop.idl, &nbrec_table_logical_switch); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_switch_col_name); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_switch_col_ports); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_switch_col_other_config); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_switch_col_external_ids); ++ ++ ovsdb_idl_add_table(ovnnb_idl_loop.idl, &nbrec_table_logical_switch_port); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_switch_port_col_name); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_switch_port_col_addresses); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_switch_port_col_options); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_switch_port_col_type); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_switch_port_col_up); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_switch_port_col_addresses); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_switch_port_col_enabled); ++ ovsdb_idl_add_column(ovnnb_idl_loop.idl, ++ &nbrec_logical_switch_port_col_external_ids); ++ ++ /* ovn-sb db. */ + struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER( +- ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, true, true)); ++ ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true)); ++ ++ ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_encaps); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_name); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_hostname); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_other_config); ++ ++ ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_encap); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_encap_col_chassis_name); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_encap_col_type); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_encap_col_ip); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_encap_col_options); ++ ++ ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, ++ &sbrec_datapath_binding_col_external_ids); ++ ++ ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, ++ &sbrec_port_binding_col_datapath); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, ++ &sbrec_port_binding_col_mac); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, ++ &sbrec_port_binding_col_options); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, ++ &sbrec_port_binding_col_logical_port); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, ++ &sbrec_port_binding_col_external_ids); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, ++ &sbrec_port_binding_col_chassis); + + /* Create IDL indexes */ + struct ovsdb_idl_index *nbrec_ls_by_name +@@ -1908,6 +2052,10 @@ main(int argc, char *argv[]) + &icsbrec_port_binding_col_transit_switch, + &icsbrec_port_binding_col_availability_zone); + ++ struct ovsdb_idl_index *icsbrec_route_by_az ++ = ovsdb_idl_index_create1(ovnisb_idl_loop.idl, ++ &icsbrec_route_col_availability_zone); ++ + struct ovsdb_idl_index *icsbrec_route_by_ts + = ovsdb_idl_index_create1(ovnisb_idl_loop.idl, + &icsbrec_route_col_transit_switch); +@@ -1962,6 +2110,7 @@ main(int argc, char *argv[]) + .icsbrec_port_binding_by_az = icsbrec_port_binding_by_az, + .icsbrec_port_binding_by_ts = icsbrec_port_binding_by_ts, + .icsbrec_port_binding_by_ts_az = icsbrec_port_binding_by_ts_az, ++ .icsbrec_route_by_az = icsbrec_route_by_az, + .icsbrec_route_by_ts = icsbrec_route_by_ts, + .icsbrec_route_by_ts_az = icsbrec_route_by_ts_az, + }; diff --git a/include/ovn/actions.h b/include/ovn/actions.h index d7ee84dac..fdb6ab08b 100644 --- a/include/ovn/actions.h @@ -1174,7 +1891,7 @@ index 9b902f005..241872681 100644 struct sset ips_v4; struct sset ips_v6; diff --git a/northd/northd.c b/northd/northd.c -index 84440a47f..5dd44a220 100644 +index 84440a47f..404c40b8c 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -121,20 +121,22 @@ enum ovn_stage { @@ -1269,16 +1986,15 @@ index 84440a47f..5dd44a220 100644 /* Indicate that this packet has been recirculated using egress * loopback. This allows certain checks to be bypassed, such as a * logical router dropping packets with source IP address equals -@@ -228,6 +241,8 @@ enum ovn_stage { +@@ -228,6 +241,7 @@ enum ovn_stage { #define REGBIT_LOOKUP_NEIGHBOR_IP_RESULT "reg9[3]" #define REGBIT_DST_NAT_IP_LOCAL "reg9[4]" #define REGBIT_KNOWN_ECMP_NH "reg9[5]" +#define REGBIT_KNOWN_LB_SESSION "reg9[6]" -+#define REG /* Register to store the eth address associated to a router port for packets * received in S_ROUTER_IN_ADMISSION. -@@ -245,6 +260,10 @@ enum ovn_stage { +@@ -245,6 +259,10 @@ enum ovn_stage { #define REG_SRC_IPV6 "xxreg1" #define REG_ROUTE_TABLE_ID "reg7" @@ -1289,7 +2005,7 @@ index 84440a47f..5dd44a220 100644 #define REG_ORIG_TP_DPORT_ROUTER "reg9[16..31]" /* Register used for setting a label for ACLs in a Logical Switch. */ -@@ -267,73 +286,75 @@ enum ovn_stage { +@@ -267,73 +285,75 @@ enum ovn_stage { * OVS register usage: * * Logical Switch pipeline: @@ -1427,7 +2143,7 @@ index 84440a47f..5dd44a220 100644 * */ -@@ -1040,7 +1061,16 @@ init_mcast_info_for_switch_datapath(struct ovn_datapath *od) +@@ -1040,7 +1060,16 @@ init_mcast_info_for_switch_datapath(struct ovn_datapath *od) mcast_sw_info->query_max_response = smap_get_ullong(&od->nbs->other_config, "mcast_query_max_response", OVN_MCAST_DEFAULT_QUERY_MAX_RESPONSE_S); @@ -1444,106 +2160,151 @@ index 84440a47f..5dd44a220 100644 mcast_sw_info->active_v4_flows = ATOMIC_VAR_INIT(0); mcast_sw_info->active_v6_flows = ATOMIC_VAR_INIT(0); } -@@ -6936,6 +6966,225 @@ build_lb_rules_pre_stateful(struct hmap *lflows, struct ovn_northd_lb *lb, +@@ -6936,6 +6965,426 @@ build_lb_rules_pre_stateful(struct hmap *lflows, struct ovn_northd_lb *lb, } } -+/* Builds the logical flows related to load balancer affinity in: -+ * - Ingress Table 11: Load balancing affinity check -+ * - Ingress Table 12: LB -+ * - Ingress Table 13: Load balancing affinity learn ++/* Builds the logical router flows related to load balancer affinity. ++ * For a LB configured with 'vip=V:VP' and backends 'B1:BP1,B2:BP2' and ++ * affinity timeout set to T, it generates the following logical flows: ++ * - load balancing affinity check: ++ * table=lr_in_lb_aff_check, priority=100 ++ * match=(new_lb_match) ++ * action=(REGBIT_KNOWN_LB_SESSION = chk_lb_aff(); next;) ++ * ++ * - load balancing: ++ * table=lr_in_dnat, priority=150 ++ * match=(REGBIT_KNOWN_LB_SESSION == 1 && ct.new && ip4 ++ * && REG_LB_AFF_BACKEND_IP4 == B1 && REG_LB_AFF_MATCH_PORT == BP1) ++ * action=(REG_NEXT_HOP_IPV4 = V; lb_action; ++ * ct_lb_mark(backends=B1:BP1);) ++ * table=lr_in_dnat, priority=150 ++ * match=(REGBIT_KNOWN_LB_SESSION == 1 && ct.new && ip4 ++ * && REG_LB_AFF_BACKEND_IP4 == B2 && REG_LB_AFF_MATCH_PORT == BP2) ++ * action=(REG_NEXT_HOP_IPV4 = V; lb_action; ++ * ct_lb_mark(backends=B2:BP2);) ++ * ++ * - load balancing affinity learn: ++ * table=lr_in_lb_aff_learn, priority=100 ++ * match=(REGBIT_KNOWN_LB_SESSION == 0 ++ * && ct.new && ip4 ++ * && REG_NEXT_HOP_IPV4 == V && REG_ORIG_TP_DPORT_ROUTER = VP ++ * && ip4.dst == B1 && tcp.dst == BP1) ++ * action=(commit_lb_aff(vip = "V:VP", backend = "B1:BP1", ++ * proto = tcp, timeout = T)); ++ * table=lr_in_lb_aff_learn, priority=100 ++ * match=(REGBIT_KNOWN_LB_SESSION == 0 ++ * && ct.new && ip4 ++ * && REG_NEXT_HOP_IPV4 == V && REG_ORIG_TP_DPORT_ROUTER = VP ++ * && ip4.dst == B2 && tcp.dst == BP2) ++ * action=(commit_lb_aff(vip = "V:VP", backend = "B2:BP2", ++ * proto = tcp, timeout = T)); ++ * + */ +static void -+build_lb_affinity_flows(struct hmap *lflows, struct ovn_northd_lb *lb, -+ struct ovn_lb_vip *lb_vip, char *check_lb_match, -+ char *lb_action, struct ovn_datapath **dplist, -+ int n_dplist, bool router_pipeline) ++build_lb_affinity_lr_flows(struct hmap *lflows, struct ovn_northd_lb *lb, ++ struct ovn_lb_vip *lb_vip, char *new_lb_match, ++ char *lb_action, struct ovn_datapath **dplist, ++ int n_dplist) +{ + if (!lb->affinity_timeout) { + return; + } + -+ enum ovn_stage stage0 = router_pipeline ? -+ S_ROUTER_IN_LB_AFF_CHECK : S_SWITCH_IN_LB_AFF_CHECK; ++ static char *aff_check = REGBIT_KNOWN_LB_SESSION" = chk_lb_aff(); next;"; + struct ovn_lflow *lflow_ref_aff_check = NULL; + /* Check if we have already a enstablished connection for this + * tuple and we are in affinity timeslot. */ + uint32_t hash_aff_check = ovn_logical_flow_hash( -+ ovn_stage_get_table(stage0), ovn_stage_get_pipeline(stage0), 100, -+ check_lb_match, REGBIT_KNOWN_LB_SESSION" = chk_lb_aff(); next;"); ++ ovn_stage_get_table(S_ROUTER_IN_LB_AFF_CHECK), ++ ovn_stage_get_pipeline(S_ROUTER_IN_LB_AFF_CHECK), 100, ++ new_lb_match, aff_check); + + for (size_t i = 0; i < n_dplist; i++) { + if (!ovn_dp_group_add_with_reference(lflow_ref_aff_check, dplist[i])) { + lflow_ref_aff_check = ovn_lflow_add_at_with_hash( -+ lflows, dplist[i], stage0, 100, check_lb_match, -+ REGBIT_KNOWN_LB_SESSION" = chk_lb_aff(); next;", -+ NULL, NULL, &lb->nlb->header_, ++ lflows, dplist[i], S_ROUTER_IN_LB_AFF_CHECK, 100, ++ new_lb_match, aff_check, NULL, NULL, &lb->nlb->header_, + OVS_SOURCE_LOCATOR, hash_aff_check); + } + } + ++ struct ds aff_action = DS_EMPTY_INITIALIZER; + struct ds aff_action_learn = DS_EMPTY_INITIALIZER; -+ struct ds aff_match_learn = DS_EMPTY_INITIALIZER; -+ struct ds aff_action_lb_common = DS_EMPTY_INITIALIZER; -+ struct ds aff_action_lb = DS_EMPTY_INITIALIZER; + struct ds aff_match = DS_EMPTY_INITIALIZER; ++ struct ds aff_match_learn = DS_EMPTY_INITIALIZER; + + bool ipv6 = !IN6_IS_ADDR_V4MAPPED(&lb_vip->vip); -+ const char *reg_vip; -+ if (router_pipeline) { -+ reg_vip = ipv6 ? REG_NEXT_HOP_IPV6 : REG_NEXT_HOP_IPV4; -+ } else { -+ reg_vip = ipv6 ? REG_ORIG_DIP_IPV6 : REG_ORIG_DIP_IPV4; -+ } ++ const char *ip_match = ipv6 ? "ip6" : "ip4"; ++ ++ const char *reg_vip = ipv6 ? REG_NEXT_HOP_IPV6 : REG_NEXT_HOP_IPV4; ++ const char *reg_backend = ++ ipv6 ? REG_LB_L3_AFF_BACKEND_IP6 : REG_LB_AFF_BACKEND_IP4; ++ ++ /* Prepare common part of affinity LB and affinity learn action. */ ++ ds_put_format(&aff_action, "%s = %s; ", reg_vip, lb_vip->vip_str); ++ ds_put_cstr(&aff_action_learn, "commit_lb_aff(vip = \""); + -+ ds_put_format(&aff_action_lb_common, -+ REGBIT_CONNTRACK_COMMIT" = 0; %s = %s; ", -+ reg_vip, lb_vip->vip_str); + if (lb_vip->vip_port) { -+ ds_put_format(&aff_action_lb_common, REG_ORIG_TP_DPORT" = %d; ", -+ lb_vip->vip_port); ++ ds_put_format(&aff_action_learn, ipv6 ? "[%s]:%"PRIu16 : "%s:%"PRIu16, ++ lb_vip->vip_str, lb_vip->vip_port); ++ } else { ++ ds_put_cstr(&aff_action_learn, lb_vip->vip_str); + } + + if (lb_action) { -+ ds_put_format(&aff_action_lb_common, "%s;", lb_action); ++ ds_put_cstr(&aff_action, lb_action); + } ++ ds_put_cstr(&aff_action, "ct_lb_mark(backends="); ++ ds_put_cstr(&aff_action_learn, "\", backend = \""); ++ ++ /* Prepare common part of affinity learn match. */ ++ if (lb_vip->vip_port) { ++ ds_put_format(&aff_match_learn, REGBIT_KNOWN_LB_SESSION" == 0 && " ++ "ct.new && %s && %s == %s && " ++ REG_ORIG_TP_DPORT_ROUTER" == %"PRIu16" && " ++ "%s.dst == ", ip_match, reg_vip, lb_vip->vip_str, ++ lb_vip->vip_port, ip_match); ++ } else { ++ ds_put_format(&aff_match_learn, REGBIT_KNOWN_LB_SESSION" == 0 && " ++ "ct.new && %s && %s == %s && %s.dst == ", ip_match, ++ reg_vip, lb_vip->vip_str, ip_match); ++ } ++ ++ /* Prepare common part of affinity match. */ ++ ds_put_format(&aff_match, REGBIT_KNOWN_LB_SESSION" == 1 && " ++ "ct.new && %s && %s == ", ip_match, reg_backend); ++ ++ /* Store the common part length. */ ++ size_t aff_action_len = aff_action.length; ++ size_t aff_action_learn_len = aff_action_learn.length; ++ size_t aff_match_len = aff_match.length; ++ size_t aff_match_learn_len = aff_match_learn.length; ++ + -+ stage0 = router_pipeline -+ ? S_ROUTER_IN_LB_AFF_LEARN : S_SWITCH_IN_LB_AFF_LEARN; -+ enum ovn_stage stage1 = router_pipeline -+ ? S_ROUTER_IN_DNAT : S_SWITCH_IN_LB; + for (size_t i = 0; i < lb_vip->n_backends; i++) { + struct ovn_lb_backend *backend = &lb_vip->backends[i]; + -+ /* Forward to OFTABLE_CHK_LB_AFFINITY table to store flow tuple. */ -+ ds_put_format(&aff_match_learn, -+ REGBIT_KNOWN_LB_SESSION" == 0 && " -+ "ct.new && %s && %s.dst == %s && %s == %s", -+ ipv6 ? "ip6" : "ip4", ipv6 ? "ip6" : "ip4", -+ backend->ip_str, reg_vip, lb_vip->vip_str); ++ ds_put_cstr(&aff_match_learn, backend->ip_str); ++ ds_put_cstr(&aff_match, backend->ip_str); ++ + if (backend->port) { ++ ds_put_format(&aff_action, ipv6 ? "[%s]:%d" : "%s:%d", ++ backend->ip_str, backend->port); ++ ds_put_format(&aff_action_learn, ipv6 ? "[%s]:%d" : "%s:%d", ++ backend->ip_str, backend->port); ++ + ds_put_format(&aff_match_learn, " && %s.dst == %d", + lb->proto, backend->port); -+ } -+ -+ if (lb_vip->vip_port) { -+ ds_put_format(&aff_action_learn, -+ "commit_lb_aff(vip = \"%s%s%s:%d\"", -+ ipv6 ? "[" : "", lb_vip->vip_str, ipv6 ? "]" : "", -+ lb_vip->vip_port); ++ ds_put_format(&aff_match, " && "REG_LB_AFF_MATCH_PORT" == %d", ++ backend->port); + } else { -+ ds_put_format(&aff_action_learn, "commit_lb_aff(vip = \"%s\"", -+ lb_vip->vip_str); ++ ds_put_cstr(&aff_action, backend->ip_str); ++ ds_put_cstr(&aff_action_learn, backend->ip_str); + } + -+ if (backend->port) { -+ ds_put_format(&aff_action_learn,", backend = \"%s%s%s:%d\"", -+ ipv6 ? "[" : "", backend->ip_str, -+ ipv6 ? "]" : "", backend->port); -+ } else { -+ ds_put_format(&aff_action_learn,", backend = \"%s\"", -+ backend->ip_str); -+ } ++ ds_put_cstr(&aff_action, ");"); ++ ds_put_char(&aff_action_learn, '"'); + + if (lb_vip->vip_port) { + ds_put_format(&aff_action_learn, ", proto = %s", lb->proto); @@ -1554,104 +2315,260 @@ index 84440a47f..5dd44a220 100644 + + struct ovn_lflow *lflow_ref_aff_learn = NULL; + uint32_t hash_aff_learn = ovn_logical_flow_hash( -+ ovn_stage_get_table(stage0), ovn_stage_get_pipeline(stage0), ++ ovn_stage_get_table(S_ROUTER_IN_LB_AFF_LEARN), ++ ovn_stage_get_pipeline(S_ROUTER_IN_LB_AFF_LEARN), + 100, ds_cstr(&aff_match_learn), ds_cstr(&aff_action_learn)); + -+ const char *reg_backend; -+ if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { -+ reg_backend = REG_LB_AFF_BACKEND_IP4; -+ } else { -+ reg_backend = router_pipeline -+ ? REG_LB_L3_AFF_BACKEND_IP6 : REG_LB_L2_AFF_BACKEND_IP6; -+ } -+ -+ /* Use already selected backend within affinity -+ * timeslot. */ -+ if (backend->port) { -+ ds_put_format(&aff_match, -+ REGBIT_KNOWN_LB_SESSION" == 1 && %s && %s == %s " -+ "&& "REG_LB_AFF_MATCH_PORT" == %d", -+ IN6_IS_ADDR_V4MAPPED(&lb_vip->vip) ? "ip4" : "ip6", -+ reg_backend, backend->ip_str, backend->port); -+ ds_put_format(&aff_action_lb, "%s ct_lb_mark(backends=%s%s%s:%d);", -+ ds_cstr(&aff_action_lb_common), -+ ipv6 ? "[" : "", backend->ip_str, -+ ipv6 ? "]" : "", backend->port); -+ } else { -+ ds_put_format(&aff_match, -+ REGBIT_KNOWN_LB_SESSION" == 1 && %s && %s == %s", -+ IN6_IS_ADDR_V4MAPPED(&lb_vip->vip) ? "ip4" : "ip6", -+ reg_backend, backend->ip_str); -+ ds_put_format(&aff_action_lb, "%s ct_lb_mark(backends=%s);", -+ ds_cstr(&aff_action_lb_common), backend->ip_str); -+ } -+ + struct ovn_lflow *lflow_ref_aff_lb = NULL; + uint32_t hash_aff_lb = ovn_logical_flow_hash( -+ ovn_stage_get_table(stage1), ovn_stage_get_pipeline(stage1), -+ 150, ds_cstr(&aff_match), ds_cstr(&aff_action_lb)); ++ ovn_stage_get_table(S_ROUTER_IN_DNAT), ++ ovn_stage_get_pipeline(S_ROUTER_IN_DNAT), ++ 150, ds_cstr(&aff_match), ds_cstr(&aff_action)); + + for (size_t j = 0; j < n_dplist; j++) { ++ /* Forward to OFTABLE_CHK_LB_AFFINITY table to store flow tuple. */ + if (!ovn_dp_group_add_with_reference(lflow_ref_aff_learn, + dplist[j])) { + lflow_ref_aff_learn = ovn_lflow_add_at_with_hash( -+ lflows, dplist[j], stage0, 100, -+ ds_cstr(&aff_match_learn), ds_cstr(&aff_action_learn), -+ NULL, NULL, &lb->nlb->header_, OVS_SOURCE_LOCATOR, -+ hash_aff_learn); -+ } -+ if (!ovn_dp_group_add_with_reference(lflow_ref_aff_learn, -+ dplist[j])) { -+ lflow_ref_aff_learn = ovn_lflow_add_at_with_hash( -+ lflows, dplist[j], stage0, 100, ++ lflows, dplist[j], S_ROUTER_IN_LB_AFF_LEARN, 100, + ds_cstr(&aff_match_learn), ds_cstr(&aff_action_learn), + NULL, NULL, &lb->nlb->header_, OVS_SOURCE_LOCATOR, + hash_aff_learn); + } ++ /* Use already selected backend within affinity timeslot. */ + if (!ovn_dp_group_add_with_reference(lflow_ref_aff_lb, + dplist[j])) { + lflow_ref_aff_lb = ovn_lflow_add_at_with_hash( -+ lflows, dplist[j], stage1, 150, ds_cstr(&aff_match), -+ ds_cstr(&aff_action_lb), NULL, NULL, -+ &lb->nlb->header_, OVS_SOURCE_LOCATOR, -+ hash_aff_lb); ++ lflows, dplist[j], S_ROUTER_IN_DNAT, 150, ++ ds_cstr(&aff_match), ds_cstr(&aff_action), NULL, NULL, ++ &lb->nlb->header_, OVS_SOURCE_LOCATOR, ++ hash_aff_lb); + } + } + -+ ds_clear(&aff_action_learn); -+ ds_clear(&aff_match_learn); -+ ds_clear(&aff_action_lb); -+ ds_clear(&aff_match); ++ ds_truncate(&aff_action, aff_action_len); ++ ds_truncate(&aff_action_learn, aff_action_learn_len); ++ ds_truncate(&aff_match, aff_match_len); ++ ds_truncate(&aff_match_learn, aff_match_learn_len); + } + ++ ds_destroy(&aff_action); + ds_destroy(&aff_action_learn); -+ ds_destroy(&aff_match_learn); -+ ds_destroy(&aff_action_lb_common); -+ ds_destroy(&aff_action_lb); + ds_destroy(&aff_match); ++ ds_destroy(&aff_match_learn); +} + ++/* Builds the logical switch flows related to load balancer affinity. ++ * For a LB configured with 'vip=V:VP' and backends 'B1:BP1,B2:BP2' and ++ * affinity timeout set to T, it generates the following logical flows: ++ * - load balancing affinity check: ++ * table=ls_in_lb_aff_check, priority=100 ++ * match=(ct.new && ip4 ++ * && REG_ORIG_DIP_IPV4 == V && REG_ORIG_TP_DPORT == VP) ++ * action=(REGBIT_KNOWN_LB_SESSION = chk_lb_aff(); next;) ++ * ++ * - load balancing: ++ * table=ls_in_lb, priority=150 ++ * match=(REGBIT_KNOWN_LB_SESSION == 1 && ct.new && ip4 ++ * && REG_LB_AFF_BACKEND_IP4 == B1 && REG_LB_AFF_MATCH_PORT == BP1) ++ * action=(REGBIT_CONNTRACK_COMMIT = 0; ++ * REG_ORIG_DIP_IPV4 = V; REG_ORIG_TP_DPORT = VP; ++ * ct_lb_mark(backends=B1:BP1);) ++ * table=ls_in_lb, priority=150 ++ * match=(REGBIT_KNOWN_LB_SESSION == 1 && ct.new && ip4 ++ * && REG_LB_AFF_BACKEND_IP4 == B2 && REG_LB_AFF_MATCH_PORT == BP2) ++ * action=(REGBIT_CONNTRACK_COMMIT = 0; ++ * REG_ORIG_DIP_IPV4 = V; ++ * REG_ORIG_TP_DPORT = VP; ++ * ct_lb_mark(backends=B1:BP2);) ++ * ++ * - load balancing affinity learn: ++ * table=ls_in_lb_aff_learn, priority=100 ++ * match=(REGBIT_KNOWN_LB_SESSION == 0 ++ * && ct.new && ip4 ++ * && REG_ORIG_DIP_IPV4 == V && REG_ORIG_TP_DPORT == VP ++ * && ip4.dst == B1 && tcp.dst == BP1) ++ * action=(commit_lb_aff(vip = "V:VP", backend = "B1:BP1", ++ * proto = tcp, timeout = T)); ++ * table=ls_in_lb_aff_learn, priority=100 ++ * match=(REGBIT_KNOWN_LB_SESSION == 0 ++ * && ct.new && ip4 ++ * && REG_ORIG_DIP_IPV4 == V && REG_ORIG_TP_DPORT == VP ++ * && ip4.dst == B2 && tcp.dst == BP2) ++ * action=(commit_lb_aff(vip = "V:VP", backend = "B2:BP2", ++ * proto = tcp, timeout = T)); ++ * ++ */ +static void +build_lb_affinity_ls_flows(struct hmap *lflows, struct ovn_northd_lb *lb, + struct ovn_lb_vip *lb_vip) +{ -+ struct ds match = DS_EMPTY_INITIALIZER; ++ if (!lb->affinity_timeout) { ++ return; ++ } ++ ++ struct ds new_lb_match = DS_EMPTY_INITIALIZER; + if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { -+ ds_put_format(&match, "ct.new && ip4 && "REG_ORIG_DIP_IPV4 " == %s", ++ ds_put_format(&new_lb_match, ++ "ct.new && ip4 && "REG_ORIG_DIP_IPV4 " == %s", + lb_vip->vip_str); + } else { -+ ds_put_format(&match, "ct.new && ip6 && "REG_ORIG_DIP_IPV6 " == %s", ++ ds_put_format(&new_lb_match, ++ "ct.new && ip6 && "REG_ORIG_DIP_IPV6 " == %s", + lb_vip->vip_str); + } + + if (lb_vip->vip_port) { -+ ds_put_format(&match, " && "REG_ORIG_TP_DPORT " == %"PRIu16, ++ ds_put_format(&new_lb_match, " && "REG_ORIG_TP_DPORT " == %"PRIu16, ++ lb_vip->vip_port); ++ } ++ ++ static char *aff_check = REGBIT_KNOWN_LB_SESSION" = chk_lb_aff(); next;"; ++ struct ovn_lflow *lflow_ref_aff_check = NULL; ++ /* Check if we have already a enstablished connection for this ++ * tuple and we are in affinity timeslot. */ ++ uint32_t hash_aff_check = ovn_logical_flow_hash( ++ ovn_stage_get_table(S_SWITCH_IN_LB_AFF_CHECK), ++ ovn_stage_get_pipeline(S_SWITCH_IN_LB_AFF_CHECK), 100, ++ ds_cstr(&new_lb_match), aff_check); ++ ++ for (size_t i = 0; i < lb->n_nb_ls; i++) { ++ if (!ovn_dp_group_add_with_reference(lflow_ref_aff_check, ++ lb->nb_ls[i])) { ++ lflow_ref_aff_check = ovn_lflow_add_at_with_hash( ++ lflows, lb->nb_ls[i], S_SWITCH_IN_LB_AFF_CHECK, 100, ++ ds_cstr(&new_lb_match), aff_check, NULL, NULL, ++ &lb->nlb->header_, OVS_SOURCE_LOCATOR, hash_aff_check); ++ } ++ } ++ ds_destroy(&new_lb_match); ++ ++ struct ds aff_action = DS_EMPTY_INITIALIZER; ++ struct ds aff_action_learn = DS_EMPTY_INITIALIZER; ++ struct ds aff_match = DS_EMPTY_INITIALIZER; ++ struct ds aff_match_learn = DS_EMPTY_INITIALIZER; ++ ++ bool ipv6 = !IN6_IS_ADDR_V4MAPPED(&lb_vip->vip); ++ const char *ip_match = ipv6 ? "ip6" : "ip4"; ++ ++ const char *reg_vip = ipv6 ? REG_ORIG_DIP_IPV6 : REG_ORIG_DIP_IPV4; ++ const char *reg_backend = ++ ipv6 ? REG_LB_L2_AFF_BACKEND_IP6 : REG_LB_AFF_BACKEND_IP4; ++ ++ /* Prepare common part of affinity LB and affinity learn action. */ ++ ds_put_format(&aff_action, REGBIT_CONNTRACK_COMMIT" = 0; %s = %s; ", ++ reg_vip, lb_vip->vip_str); ++ ds_put_cstr(&aff_action_learn, "commit_lb_aff(vip = \""); ++ ++ if (lb_vip->vip_port) { ++ ds_put_format(&aff_action, REG_ORIG_TP_DPORT" = %"PRIu16"; ", + lb_vip->vip_port); ++ ds_put_format(&aff_action_learn, ipv6 ? "[%s]:%"PRIu16 : "%s:%"PRIu16, ++ lb_vip->vip_str, lb_vip->vip_port); ++ } else { ++ ds_put_cstr(&aff_action_learn, lb_vip->vip_str); ++ } ++ ++ ds_put_cstr(&aff_action, "ct_lb_mark(backends="); ++ ds_put_cstr(&aff_action_learn, "\", backend = \""); ++ ++ /* Prepare common part of affinity learn match. */ ++ if (lb_vip->vip_port) { ++ ds_put_format(&aff_match_learn, REGBIT_KNOWN_LB_SESSION" == 0 && " ++ "ct.new && %s && %s == %s && " ++ REG_ORIG_TP_DPORT" == %"PRIu16" && %s.dst == ", ++ ip_match, reg_vip, lb_vip->vip_str, ++ lb_vip->vip_port, ip_match); ++ } else { ++ ds_put_format(&aff_match_learn, REGBIT_KNOWN_LB_SESSION" == 0 && " ++ "ct.new && %s && %s == %s && %s.dst == ", ++ ip_match, reg_vip, lb_vip->vip_str, ip_match); ++ } ++ ++ /* Prepare common part of affinity match. */ ++ ds_put_format(&aff_match, REGBIT_KNOWN_LB_SESSION" == 1 && " ++ "ct.new && %s && %s == ", ip_match, reg_backend); ++ ++ /* Store the common part length. */ ++ size_t aff_action_len = aff_action.length; ++ size_t aff_action_learn_len = aff_action_learn.length; ++ size_t aff_match_len = aff_match.length; ++ size_t aff_match_learn_len = aff_match_learn.length; ++ ++ for (size_t i = 0; i < lb_vip->n_backends; i++) { ++ struct ovn_lb_backend *backend = &lb_vip->backends[i]; ++ ++ ds_put_cstr(&aff_match_learn, backend->ip_str); ++ ds_put_cstr(&aff_match, backend->ip_str); ++ ++ if (backend->port) { ++ ds_put_format(&aff_action, ipv6 ? "[%s]:%d" : "%s:%d", ++ backend->ip_str, backend->port); ++ ds_put_format(&aff_action_learn, ipv6 ? "[%s]:%d" : "%s:%d", ++ backend->ip_str, backend->port); ++ ++ ds_put_format(&aff_match_learn, " && %s.dst == %d", ++ lb->proto, backend->port); ++ ds_put_format(&aff_match, " && "REG_LB_AFF_MATCH_PORT" == %d", ++ backend->port); ++ } else { ++ ds_put_cstr(&aff_action, backend->ip_str); ++ ds_put_cstr(&aff_action_learn, backend->ip_str); ++ } ++ ++ ds_put_cstr(&aff_action, ");"); ++ ds_put_char(&aff_action_learn, '"'); ++ ++ if (lb_vip->vip_port) { ++ ds_put_format(&aff_action_learn, ", proto = %s", lb->proto); ++ } ++ ++ ds_put_format(&aff_action_learn, ", timeout = %d); /* drop */", ++ lb->affinity_timeout); ++ ++ struct ovn_lflow *lflow_ref_aff_learn = NULL; ++ uint32_t hash_aff_learn = ovn_logical_flow_hash( ++ ovn_stage_get_table(S_SWITCH_IN_LB_AFF_LEARN), ++ ovn_stage_get_pipeline(S_SWITCH_IN_LB_AFF_LEARN), ++ 100, ds_cstr(&aff_match_learn), ds_cstr(&aff_action_learn)); ++ ++ struct ovn_lflow *lflow_ref_aff_lb = NULL; ++ uint32_t hash_aff_lb = ovn_logical_flow_hash( ++ ovn_stage_get_table(S_SWITCH_IN_LB), ++ ovn_stage_get_pipeline(S_SWITCH_IN_LB), ++ 150, ds_cstr(&aff_match), ds_cstr(&aff_action)); ++ ++ for (size_t j = 0; j < lb->n_nb_ls; j++) { ++ /* Forward to OFTABLE_CHK_LB_AFFINITY table to store flow tuple. */ ++ if (!ovn_dp_group_add_with_reference(lflow_ref_aff_learn, ++ lb->nb_ls[j])) { ++ lflow_ref_aff_learn = ovn_lflow_add_at_with_hash( ++ lflows, lb->nb_ls[j], S_SWITCH_IN_LB_AFF_LEARN, 100, ++ ds_cstr(&aff_match_learn), ds_cstr(&aff_action_learn), ++ NULL, NULL, &lb->nlb->header_, OVS_SOURCE_LOCATOR, ++ hash_aff_learn); ++ } ++ /* Use already selected backend within affinity timeslot. */ ++ if (!ovn_dp_group_add_with_reference(lflow_ref_aff_lb, ++ lb->nb_ls[j])) { ++ lflow_ref_aff_lb = ovn_lflow_add_at_with_hash( ++ lflows, lb->nb_ls[j], S_SWITCH_IN_LB, 150, ++ ds_cstr(&aff_match), ds_cstr(&aff_action), NULL, NULL, ++ &lb->nlb->header_, OVS_SOURCE_LOCATOR, ++ hash_aff_lb); ++ } ++ } ++ ++ ds_truncate(&aff_action, aff_action_len); ++ ds_truncate(&aff_action_learn, aff_action_learn_len); ++ ds_truncate(&aff_match, aff_match_len); ++ ds_truncate(&aff_match_learn, aff_match_learn_len); + } + -+ build_lb_affinity_flows(lflows, lb, lb_vip, ds_cstr(&match), NULL, -+ lb->nb_ls, lb->n_nb_ls, false); -+ ds_destroy(&match); ++ ds_destroy(&aff_action); ++ ds_destroy(&aff_action_learn); ++ ds_destroy(&aff_match); ++ ds_destroy(&aff_match_learn); +} + +static void @@ -1670,7 +2587,7 @@ index 84440a47f..5dd44a220 100644 static void build_lb_rules(struct hmap *lflows, struct ovn_northd_lb *lb, bool ct_lb_mark, struct ds *match, struct ds *action, -@@ -6985,6 +7234,8 @@ build_lb_rules(struct hmap *lflows, struct ovn_northd_lb *lb, bool ct_lb_mark, +@@ -6985,6 +7434,8 @@ build_lb_rules(struct hmap *lflows, struct ovn_northd_lb *lb, bool ct_lb_mark, priority = 120; } @@ -1679,7 +2596,7 @@ index 84440a47f..5dd44a220 100644 struct ovn_lflow *lflow_ref = NULL; uint32_t hash = ovn_logical_flow_hash( ovn_stage_get_table(S_SWITCH_IN_LB), -@@ -8451,6 +8702,10 @@ build_lswitch_ip_mcast_igmp_mld(struct ovn_igmp_group *igmp_group, +@@ -8451,6 +8902,10 @@ build_lswitch_ip_mcast_igmp_mld(struct ovn_igmp_group *igmp_group, if (atomic_compare_exchange_strong( &mcast_sw_info->active_v4_flows, &table_size, mcast_sw_info->table_size)) { @@ -1690,7 +2607,7 @@ index 84440a47f..5dd44a220 100644 return; } atomic_add(&mcast_sw_info->active_v4_flows, 1, &dummy); -@@ -10063,6 +10318,14 @@ build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip, +@@ -10063,6 +10518,14 @@ build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip, xcalloc(lb->n_nb_lr, sizeof *distributed_router); int n_distributed_router = 0; @@ -1705,7 +2622,7 @@ index 84440a47f..5dd44a220 100644 /* Group gw router since we do not have datapath dependency in * lflow generation for them. */ -@@ -10081,6 +10344,13 @@ build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip, +@@ -10081,6 +10544,13 @@ build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip, distributed_router[n_distributed_router++] = od; } @@ -1719,18 +2636,17 @@ index 84440a47f..5dd44a220 100644 if (sset_contains(&od->external_ips, lb_vip->vip_str)) { /* The load balancer vip is also present in the NAT entries. * So add a high priority lflow to advance the the packet -@@ -10113,10 +10383,26 @@ build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip, +@@ -10113,10 +10583,26 @@ build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip, "flags.force_snat_for_lb = 1; next;", lflows, prio, meter_groups); + /* LB affinity flows for datapaths where CMS has specified + * force_snat_for_lb floag option. + */ -+ build_lb_affinity_flows(lflows, lb, lb_vip, new_match, -+ "flags.force_snat_for_lb = 1", -+ lb_aff_force_snat_router, -+ n_lb_aff_force_snat_router, -+ true); ++ build_lb_affinity_lr_flows(lflows, lb, lb_vip, new_match, ++ "flags.force_snat_for_lb = 1; ", ++ lb_aff_force_snat_router, ++ n_lb_aff_force_snat_router); + build_gw_lrouter_nat_flows_for_lb(lb, gw_router, n_gw_router, reject, new_match, ds_cstr(action), est_match, @@ -1739,14 +2655,15 @@ index 84440a47f..5dd44a220 100644 + /* LB affinity flows for datapaths where CMS has specified + * skip_snat_for_lb floag option or regular datapaths. + */ -+ char *lb_aff_action = lb->skip_snat ? "flags.skip_snat_for_lb = 1" : NULL; -+ build_lb_affinity_flows(lflows, lb, lb_vip, new_match, lb_aff_action, -+ lb_aff_router, n_lb_aff_router, true); ++ char *lb_aff_action = ++ lb->skip_snat ? "flags.skip_snat_for_lb = 1; " : NULL; ++ build_lb_affinity_lr_flows(lflows, lb, lb_vip, new_match, lb_aff_action, ++ lb_aff_router, n_lb_aff_router); + /* Distributed router logic */ for (size_t i = 0; i < n_distributed_router; i++) { struct ovn_datapath *od = distributed_router[i]; -@@ -10210,6 +10496,8 @@ build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip, +@@ -10210,6 +10696,8 @@ build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip, free(gw_router_force_snat); free(gw_router_skip_snat); free(distributed_router); @@ -1755,7 +2672,7 @@ index 84440a47f..5dd44a220 100644 free(gw_router); } -@@ -13633,7 +13921,8 @@ static void +@@ -13633,7 +14121,8 @@ static void build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, struct hmap *lflows, const struct hmap *ports, struct ds *match, struct ds *actions, @@ -1765,7 +2682,7 @@ index 84440a47f..5dd44a220 100644 { if (!od->nbr) { return; -@@ -13827,6 +14116,26 @@ build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, struct hmap *lflows, +@@ -13827,6 +14316,26 @@ build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, struct hmap *lflows, } } @@ -1792,7 +2709,7 @@ index 84440a47f..5dd44a220 100644 /* Handle force SNAT options set in the gateway router. */ if (od->is_gw_router) { if (dnat_force_snat_ip) { -@@ -13925,7 +14234,9 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od, +@@ -13925,7 +14434,9 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od, build_misc_local_traffic_drop_flows_for_lrouter(od, lsi->lflows); build_lrouter_arp_nd_for_datapath(od, lsi->lflows, lsi->meter_groups); build_lrouter_nat_defrag_and_lb(od, lsi->lflows, lsi->ports, &lsi->match, @@ -1803,7 +2720,7 @@ index 84440a47f..5dd44a220 100644 } /* Helper function to combine all lflow generation which is iterated by port. -@@ -15148,6 +15459,11 @@ build_mcast_groups(struct lflow_input *input_data, +@@ -15148,6 +15659,11 @@ build_mcast_groups(struct lflow_input *input_data, hmap_init(mcast_groups); hmap_init(igmp_groups); @@ -1815,7 +2732,7 @@ index 84440a47f..5dd44a220 100644 HMAP_FOR_EACH (op, key_node, ports) { if (op->nbrp && lrport_is_enabled(op->nbrp)) { -@@ -15205,8 +15521,7 @@ build_mcast_groups(struct lflow_input *input_data, +@@ -15205,8 +15721,7 @@ build_mcast_groups(struct lflow_input *input_data, } /* If the datapath value is stale, purge the group. */ @@ -1825,7 +2742,7 @@ index 84440a47f..5dd44a220 100644 if (!od || ovn_datapath_is_stale(od)) { sbrec_igmp_group_delete(sb_igmp); -@@ -15251,7 +15566,6 @@ build_mcast_groups(struct lflow_input *input_data, +@@ -15251,7 +15766,6 @@ build_mcast_groups(struct lflow_input *input_data, * IGMP groups are based on the groups learnt by their multicast enabled * peers. */ @@ -1834,10 +2751,10 @@ index 84440a47f..5dd44a220 100644 if (ovs_list_is_empty(&od->mcast_info.groups)) { diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml -index dae961c87..450e67639 100644 +index dae961c87..509ca4821 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml -@@ -853,9 +853,55 @@ +@@ -853,9 +853,56 @@ @@ -1879,11 +2796,12 @@ index dae961c87..450e67639 100644 + is specified in options column, that includes a L4 port + PORT of protocol P and IP address VIP, + a priority-150 flow is added. For IPv4 VIPs, the flow -+ matches reg9[6] == 1 && ip && ip4.dst == -+ VIP && P.dst == PORT. -+ For IPv6 VIPs, the flow matches reg9[6] == 1 -+ && ip && ip6.dst == VIP && -+ P && P.dst == PORT. ++ matches reg9[6] == 1 && ct.new && ip && ++ ip4.dst == VIP && P.dst == PORT ++ . For IPv6 VIPs, the flow matches ++ reg9[6] == 1 && ct.new && ip && ++ ip6.dst == VIP && P && ++ P.dst == PORT. + The flow's action is ct_lb_mark(args), where + args contains comma separated IP addresses (and optional + port numbers) to load balance to. The address family of the IP @@ -1894,7 +2812,7 @@ index dae961c87..450e67639 100644
  • For all the configured load balancing rules for a switch in OVN_Northbound database that includes a L4 port -@@ -914,7 +960,38 @@ +@@ -914,7 +961,38 @@
  • @@ -1934,7 +2852,7 @@ index dae961c87..450e67639 100644

    Logical flows in this table closely reproduce those in the -@@ -976,7 +1053,7 @@ +@@ -976,7 +1054,7 @@ @@ -1943,7 +2861,7 @@ index dae961c87..450e67639 100644

    • -@@ -999,7 +1076,7 @@ +@@ -999,7 +1077,7 @@
    @@ -1952,7 +2870,7 @@ index dae961c87..450e67639 100644
    • If the logical switch has load balancer(s) configured, then a -@@ -1017,7 +1094,7 @@ +@@ -1017,7 +1095,7 @@
    @@ -1961,7 +2879,7 @@ index dae961c87..450e67639 100644
    • If the logical switch has load balancer(s) configured, then a -@@ -1052,7 +1129,7 @@ +@@ -1052,7 +1130,7 @@
    @@ -1970,7 +2888,7 @@ index dae961c87..450e67639 100644
    • -@@ -1086,7 +1163,7 @@ +@@ -1086,7 +1164,7 @@

    @@ -1979,7 +2897,7 @@ index dae961c87..450e67639 100644

    This table implements ARP/ND responder in a logical switch for known -@@ -1388,7 +1465,7 @@ output; +@@ -1388,7 +1466,7 @@ output; @@ -1988,7 +2906,7 @@ index dae961c87..450e67639 100644

    This table adds the DHCPv4 options to a DHCPv4 packet from the -@@ -1449,7 +1526,7 @@ next; +@@ -1449,7 +1527,7 @@ next; @@ -1997,7 +2915,7 @@ index dae961c87..450e67639 100644

    This table implements DHCP responder for the DHCP replies generated by -@@ -1530,7 +1607,7 @@ output; +@@ -1530,7 +1608,7 @@ output; @@ -2006,7 +2924,7 @@ index dae961c87..450e67639 100644

    This table looks up and resolves the DNS names to the corresponding -@@ -1559,7 +1636,7 @@ reg0[4] = dns_lookup(); next; +@@ -1559,7 +1637,7 @@ reg0[4] = dns_lookup(); next; @@ -2015,7 +2933,7 @@ index dae961c87..450e67639 100644

    This table implements DNS responder for the DNS replies generated by -@@ -1594,7 +1671,7 @@ output; +@@ -1594,7 +1672,7 @@ output; @@ -2024,7 +2942,7 @@ index dae961c87..450e67639 100644

    Traffic from the external logical ports enter the ingress -@@ -1637,7 +1714,7 @@ output; +@@ -1637,7 +1715,7 @@ output; @@ -2033,7 +2951,7 @@ index dae961c87..450e67639 100644

    This table implements switching behavior. It contains these logical -@@ -1806,7 +1883,7 @@ output; +@@ -1806,7 +1884,7 @@ output; @@ -2042,7 +2960,7 @@ index dae961c87..450e67639 100644

    This table handles the packets whose destination was not found or -@@ -3172,7 +3249,33 @@ icmp6 { +@@ -3172,7 +3250,33 @@ icmp6 { packet de-fragmentation and tracking before sending it to the next table.

    @@ -2077,7 +2995,7 @@ index dae961c87..450e67639 100644

    Packets enter the pipeline with destination IP address that needs to -@@ -3180,7 +3283,7 @@ icmp6 { +@@ -3180,7 +3284,7 @@ icmp6 { in the reverse direction needs to be unDNATed.

    @@ -2086,7 +3004,7 @@ index dae961c87..450e67639 100644

    Following load balancing DNAT flows are added for Gateway router or -@@ -3190,6 +3293,21 @@ icmp6 { +@@ -3190,6 +3294,21 @@ icmp6 {

      @@ -2095,8 +3013,8 @@ index dae961c87..450e67639 100644 + a positive affinity timeout is specified in options + column, that includes a L4 port PORT of protocol + P and IPv4 or IPv6 address VIP, a priority-150 -+ flow that matches on reg9[6] == 1 && ip && -+ reg0 == VIP && P && ++ flow that matches on reg9[6] == 1 && ct.new && ++ ip && reg0 == VIP && P && + reg9[16..31] == PORT (xxreg0 + == VIP in the IPv6 case) with an action of + ct_lb_mark(args) , where args @@ -2108,7 +3026,7 @@ index dae961c87..450e67639 100644
    • If controller_event has been enabled for all the configured load balancing rules for a Gateway router or Router with gateway port -@@ -3319,7 +3437,7 @@ icmp6 { +@@ -3319,7 +3438,7 @@ icmp6 {
    @@ -2117,7 +3035,7 @@ index dae961c87..450e67639 100644
    • -@@ -3361,7 +3479,7 @@ icmp6 { +@@ -3361,7 +3480,7 @@ icmp6 {
    @@ -2126,7 +3044,7 @@ index dae961c87..450e67639 100644

    On distributed routers, the DNAT table only handles packets -@@ -3416,7 +3534,35 @@ icmp6 { +@@ -3416,7 +3535,35 @@ icmp6 { @@ -2163,7 +3081,7 @@ index dae961c87..450e67639 100644

    • If ECMP routes with symmetric reply are configured in the -@@ -3435,7 +3581,7 @@ icmp6 { +@@ -3435,7 +3582,7 @@ icmp6 {
    @@ -2172,7 +3090,7 @@ index dae961c87..450e67639 100644
    • -@@ -3465,7 +3611,7 @@ reg0[5] = put_nd_ra_opts(options);next; +@@ -3465,7 +3612,7 @@ reg0[5] = put_nd_ra_opts(options);next;
    @@ -2181,7 +3099,7 @@ index dae961c87..450e67639 100644

    This table implements IPv6 ND RA responder for the IPv6 ND RA replies -@@ -3510,7 +3656,7 @@ output; +@@ -3510,7 +3657,7 @@ output; @@ -2190,7 +3108,7 @@ index dae961c87..450e67639 100644

    If a packet arrived at this table from Logical Router Port P -@@ -3540,7 +3686,7 @@ output; +@@ -3540,7 +3687,7 @@ output; @@ -2199,7 +3117,7 @@ index dae961c87..450e67639 100644

    A packet that arrives at this table is an IP packet that should be -@@ -3741,7 +3887,7 @@ select(reg8[16..31], MID1, MID2, ...); +@@ -3741,7 +3888,7 @@ select(reg8[16..31], MID1, MID2, ...); @@ -2208,7 +3126,7 @@ index dae961c87..450e67639 100644

    This table implements the second part of IP routing for ECMP routes -@@ -3793,7 +3939,7 @@ outport = P; +@@ -3793,7 +3940,7 @@ outport = P; @@ -2217,7 +3135,7 @@ index dae961c87..450e67639 100644

    This table adds flows for the logical router policies configured on the logical router. Please see the -@@ -3865,7 +4011,7 @@ next; +@@ -3865,7 +4012,7 @@ next; @@ -2226,7 +3144,7 @@ index dae961c87..450e67639 100644

    This table handles the ECMP for the router policies configured with multiple nexthops. -@@ -3909,7 +4055,7 @@ outport = P +@@ -3909,7 +4056,7 @@ outport = P @@ -2235,7 +3153,7 @@ index dae961c87..450e67639 100644

    Any packet that reaches this table is an IP packet whose next-hop -@@ -4110,7 +4256,7 @@ outport = P +@@ -4110,7 +4257,7 @@ outport = P @@ -2244,7 +3162,7 @@ index dae961c87..450e67639 100644

    For distributed logical routers or gateway routers with gateway -@@ -4147,7 +4293,7 @@ REGBIT_PKT_LARGER = check_pkt_larger(L); next; +@@ -4147,7 +4294,7 @@ REGBIT_PKT_LARGER = check_pkt_larger(L); next; and advances to the next table.

    @@ -2253,7 +3171,7 @@ index dae961c87..450e67639 100644

    For distributed logical routers or gateway routers with gateway port -@@ -4210,7 +4356,7 @@ icmp6 { +@@ -4210,7 +4357,7 @@ icmp6 { and advances to the next table.

    @@ -2262,7 +3180,7 @@ index dae961c87..450e67639 100644

    For distributed logical routers where one or more of the logical router -@@ -4278,7 +4424,7 @@ icmp6 { +@@ -4278,7 +4425,7 @@ icmp6 { @@ -2271,7 +3189,7 @@ index dae961c87..450e67639 100644

    In the common case where the Ethernet destination has been resolved, this -@@ -4392,6 +4538,22 @@ nd_ns { +@@ -4392,6 +4539,22 @@ nd_ns { @@ -2294,6 +3212,36 @@ index dae961c87..450e67639 100644

    Egress Table 1: UNDNAT

    +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 96f17f15f..4bf1afe3b 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -125,6 +125,10 @@ static const char *rbac_igmp_group_auth[] = + {""}; + static const char *rbac_igmp_group_update[] = + {"address", "chassis", "datapath", "ports"}; ++static const char *rbac_bfd_auth[] = ++ {""}; ++static const char *rbac_bfd_update[] = ++ {"status"}; + + static struct rbac_perm_cfg { + const char *table; +@@ -207,6 +211,14 @@ static struct rbac_perm_cfg { + .update = rbac_igmp_group_update, + .n_update = ARRAY_SIZE(rbac_igmp_group_update), + .row = NULL ++ },{ ++ .table = "BFD", ++ .auth = rbac_bfd_auth, ++ .n_auth = ARRAY_SIZE(rbac_bfd_auth), ++ .insdel = false, ++ .update = rbac_bfd_update, ++ .n_update = ARRAY_SIZE(rbac_bfd_update), ++ .row = NULL + },{ + .table = NULL, + .auth = NULL, diff --git a/ovn-nb.xml b/ovn-nb.xml index 7fe88af27..dee9d4c15 100644 --- a/ovn-nb.xml @@ -2500,8 +3448,183 @@ index 3c3fb31c7..6a0e83c33 100644 + +OVN_CLEANUP([hv1]) +AT_CLEANUP +diff --git a/tests/ovn-ic.at b/tests/ovn-ic.at +index b136472c8..c2e26a4be 100644 +--- a/tests/ovn-ic.at ++++ b/tests/ovn-ic.at +@@ -119,6 +119,139 @@ OVN_CLEANUP_IC + AT_CLEANUP + ]) + ++OVN_FOR_EACH_NORTHD([ ++AT_SETUP([ovn-ic -- route deletion upon TS deletion]) ++ ++ovn_init_ic_db ++net_add n1 ++ ++# 1 GW per AZ ++for i in 1 2; do ++ az=az$i ++ ovn_start $az ++ sim_add gw-$az ++ as gw-$az ++ check ovs-vsctl add-br br-phys ++ ovn_az_attach $az n1 br-phys 192.168.1.$i ++ check ovs-vsctl set open . external-ids:ovn-is-interconn=true ++ check ovn-nbctl set nb-global . \ ++ options:ic-route-adv=true \ ++ options:ic-route-adv-default=true \ ++ options:ic-route-learn=true \ ++ options:ic-route-learn-default=true ++done ++ ++create_ic_infra() { ++ az_id=$1 ++ ts_id=$2 ++ az=az$i ++ ++ lsp=lsp${az_id}-${ts_id} ++ lrp=lrp${az_id}-${ts_id} ++ ts=ts${az_id}-${ts_id} ++ lr=lr${az_id}-${ts_id} ++ ++ ovn_as $az ++ ++ check ovn-ic-nbctl ts-add $ts ++ check ovn-nbctl lr-add $lr ++ check ovn-nbctl lrp-add $lr $lrp 00:00:00:00:00:0$az_id 10.0.$az_id.1/24 ++ check ovn-nbctl lrp-set-gateway-chassis $lrp gw-$az ++ ++ check ovn-nbctl lsp-add $ts $lsp -- \ ++ lsp-set-addresses $lsp router -- \ ++ lsp-set-type $lsp router -- \ ++ lsp-set-options $lsp router-port=$lrp ++ ++ check ovn-nbctl lr-route-add $lr 192.168.0.0/16 10.0.$az_id.10 ++} ++ ++create_ic_infra 1 1 ++create_ic_infra 1 2 ++create_ic_infra 2 1 ++ ++ovn_as az1 ++ ++wait_row_count ic-sb:Route 3 ip_prefix=192.168.0.0/16 ++ ++# remove transit switch 1 (from az1) and check if its route is deleted ++# same route from another AZ and ts should remain, as ++check ovn-ic-nbctl ts-del ts1-1 ++sleep 2 ++ovn-ic-sbctl list route ++ovn-ic-nbctl list transit_switch ++wait_row_count ic-sb:route 2 ip_prefix=192.168.0.0/16 ++ovn-ic-sbctl list route ++ ++for i in 1 2; do ++ az=az$i ++ OVN_CLEANUP_SBOX(gw-$az) ++ OVN_CLEANUP_AZ([$az]) ++done ++OVN_CLEANUP_IC ++AT_CLEANUP ++]) ++ ++OVN_FOR_EACH_NORTHD([ ++AT_SETUP([ovn-ic -- duplicate NB route adv/learn]) ++ ++ovn_init_ic_db ++net_add n1 ++ ++# 1 GW per AZ ++for i in 1 2; do ++ az=az$i ++ ovn_start $az ++ sim_add gw-$az ++ as gw-$az ++ check ovs-vsctl add-br br-phys ++ ovn_az_attach $az n1 br-phys 192.168.1.$i ++ check ovs-vsctl set open . external-ids:ovn-is-interconn=true ++ check ovn-nbctl set nb-global . \ ++ options:ic-route-adv=true \ ++ options:ic-route-adv-default=true \ ++ options:ic-route-learn=true \ ++ options:ic-route-learn-default=true ++done ++ ++ovn_as az1 ++ ++# create transit switch and connect to LR ++check ovn-ic-nbctl ts-add ts1 ++for i in 1 2; do ++ ovn_as az$i ++ ++ check ovn-nbctl lr-add lr1 ++ check ovn-nbctl lrp-add lr1 lrp$i 00:00:00:00:0$i:01 10.0.$i.1/24 ++ check ovn-nbctl lrp-set-gateway-chassis lrp$i gw-az$i ++ ++ check ovn-nbctl lsp-add ts1 lsp$i -- \ ++ lsp-set-addresses lsp$i router -- \ ++ lsp-set-type lsp$i router -- \ ++ lsp-set-options lsp$i router-port=lrp$i ++done ++ ++ovn_as az1 ++ ++ovn-nbctl \ ++ --id=@id create logical-router-static-route ip_prefix=1.1.1.1/32 nexthop=10.0.1.10 -- \ ++ add logical-router lr1 static_routes @id ++ovn-nbctl \ ++ --id=@id create logical-router-static-route ip_prefix=1.1.1.1/32 nexthop=10.0.1.10 -- \ ++ add logical-router lr1 static_routes @id ++ ++wait_row_count ic-sb:route 1 ip_prefix=1.1.1.1/32 ++ ++for i in 1 2; do ++ az=az$i ++ OVN_CLEANUP_SBOX(gw-$az) ++ OVN_CLEANUP_AZ([$az]) ++done ++ ++OVN_CLEANUP_IC ++AT_CLEANUP ++]) ++ + OVN_FOR_EACH_NORTHD([ + AT_SETUP([ovn-ic -- gateway sync]) + +diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at +index 726efa6f4..0d3412742 100644 +--- a/tests/ovn-nbctl.at ++++ b/tests/ovn-nbctl.at +@@ -1623,6 +1623,7 @@ AT_CHECK([ovn-nbctl lr-route-add lr0 0.0.0.0/0 192.168.0.1]) + AT_CHECK([ovn-nbctl lr-route-add lr0 10.0.1.0/24 11.0.1.1 lp0]) + AT_CHECK([ovn-nbctl lr-route-add lr0 10.0.0.1/24 11.0.0.2]) + AT_CHECK([ovn-nbctl lr-route-add lr0 10.0.10.0/24 lp0]) ++AT_CHECK([ovn-nbctl --bfd lr-route-add lr0 10.0.20.0/24 11.0.2.1 lp0]) + AT_CHECK([ovn-nbctl lr-route-add lr0 10.0.10.0/24 lp1], [1], [], + [ovn-nbctl: bad IPv4 nexthop argument: lp1 + ]) +@@ -1676,6 +1677,7 @@ Route Table

    : + 10.0.0.0/24 11.0.0.1 dst-ip + 10.0.1.0/24 11.0.1.1 dst-ip lp0 + 10.0.10.0/24 dst-ip lp0 ++ 10.0.20.0/24 11.0.2.1 dst-ip lp0 bfd + 20.0.0.0/24 discard dst-ip + 9.16.1.0/24 11.0.0.1 src-ip + 10.0.0.0/24 11.0.0.2 src-ip +@@ -1683,6 +1685,10 @@ Route Table
    : + 0.0.0.0/0 192.168.0.1 dst-ip + ]) + ++check_row_count nb:BFD 1 ++AT_CHECK([ovn-nbctl lr-route-del lr0 10.0.20.0/24]) ++check_row_count nb:BFD 0 ++ + AT_CHECK([ovn-nbctl lrp-add lr0 lp1 f0:00:00:00:00:02 11.0.0.254/24]) + AT_CHECK([ovn-nbctl --may-exist lr-route-add lr0 10.0.0.111/24 11.0.0.1 lp1]) + AT_CHECK([ovn-nbctl lr-route-list lr0], [0], [dnl diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at -index 7c3c84007..c7d766c2d 100644 +index 7c3c84007..c00831432 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -2149,9 +2149,9 @@ AT_CAPTURE_FILE([sw1flows]) @@ -3019,7 +4142,7 @@ index 7c3c84007..c7d766c2d 100644 ]) ovn-sbctl get datapath S0 _uuid > dp_uuids -@@ -7841,14 +7846,92 @@ ovn-sbctl dump-flows S1 > S1flows +@@ -7841,14 +7846,137 @@ ovn-sbctl dump-flows S1 > S1flows AT_CAPTURE_FILE([S0flows]) AT_CAPTURE_FILE([S1flows]) @@ -3070,7 +4193,7 @@ index 7c3c84007..c7d766c2d 100644 +ovn-sbctl dump-flows R1 > R1flows + +AT_CAPTURE_FILE([S0flows]) -+AT_CAPTURE_FILE([S1flows]) ++AT_CAPTURE_FILE([R1flows]) + +AT_CHECK([grep "ls_in_lb_aff_check" S0flows | sort], [0], [dnl + table=11(ls_in_lb_aff_check ), priority=0 , match=(1), action=(next;) @@ -3088,30 +4211,75 @@ index 7c3c84007..c7d766c2d 100644 + +ovn-nbctl --wait=sb set load_balancer lb0 options:affinity_timeout=60 + ++AS_BOX([Test LS flows]) +ovn-sbctl dump-flows S0 > S0flows -+ovn-sbctl dump-flows R1 > R1flows -+ +AT_CAPTURE_FILE([S0flows]) -+AT_CAPTURE_FILE([S1flows]) + +AT_CHECK([grep "ls_in_lb_aff_check" S0flows | sort], [0], [dnl + table=11(ls_in_lb_aff_check ), priority=0 , match=(1), action=(next;) + table=11(ls_in_lb_aff_check ), priority=100 , match=(ct.new && ip4 && reg1 == 172.16.0.10 && reg2[[0..15]] == 80), action=(reg9[[6]] = chk_lb_aff(); next;) +]) ++AT_CHECK([grep "ls_in_lb " S0flows | sort], [0], [dnl ++ table=12(ls_in_lb ), priority=0 , match=(1), action=(next;) ++ table=12(ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == 172.16.0.10 && tcp.dst == 80), action=(reg0[[1]] = 0; ct_lb_mark(backends=10.0.0.2:80,20.0.0.2:80);) ++ table=12(ls_in_lb ), priority=150 , match=(reg9[[6]] == 1 && ct.new && ip4 && reg4 == 10.0.0.2 && reg8[[0..15]] == 80), action=(reg0[[1]] = 0; reg1 = 172.16.0.10; reg2[[0..15]] = 80; ct_lb_mark(backends=10.0.0.2:80);) ++ table=12(ls_in_lb ), priority=150 , match=(reg9[[6]] == 1 && ct.new && ip4 && reg4 == 20.0.0.2 && reg8[[0..15]] == 80), action=(reg0[[1]] = 0; reg1 = 172.16.0.10; reg2[[0..15]] = 80; ct_lb_mark(backends=20.0.0.2:80);) ++]) +AT_CHECK([grep "ls_in_lb_aff_learn" S0flows | sort], [0], [dnl + table=13(ls_in_lb_aff_learn ), priority=0 , match=(1), action=(next;) -+ table=13(ls_in_lb_aff_learn ), priority=100 , match=(reg9[[6]] == 0 && ct.new && ip4 && ip4.dst == 10.0.0.2 && reg1 == 172.16.0.10 && tcp.dst == 80), action=(commit_lb_aff(vip = "172.16.0.10:80", backend = "10.0.0.2:80", proto = tcp, timeout = 60); /* drop */) -+ table=13(ls_in_lb_aff_learn ), priority=100 , match=(reg9[[6]] == 0 && ct.new && ip4 && ip4.dst == 20.0.0.2 && reg1 == 172.16.0.10 && tcp.dst == 80), action=(commit_lb_aff(vip = "172.16.0.10:80", backend = "20.0.0.2:80", proto = tcp, timeout = 60); /* drop */) ++ table=13(ls_in_lb_aff_learn ), priority=100 , match=(reg9[[6]] == 0 && ct.new && ip4 && reg1 == 172.16.0.10 && reg2[[0..15]] == 80 && ip4.dst == 10.0.0.2 && tcp.dst == 80), action=(commit_lb_aff(vip = "172.16.0.10:80", backend = "10.0.0.2:80", proto = tcp, timeout = 60); /* drop */) ++ table=13(ls_in_lb_aff_learn ), priority=100 , match=(reg9[[6]] == 0 && ct.new && ip4 && reg1 == 172.16.0.10 && reg2[[0..15]] == 80 && ip4.dst == 20.0.0.2 && tcp.dst == 80), action=(commit_lb_aff(vip = "172.16.0.10:80", backend = "20.0.0.2:80", proto = tcp, timeout = 60); /* drop */) +]) + ++AS_BOX([Test LR flows]) ++ovn-sbctl dump-flows R1 > R1flows ++AT_CAPTURE_FILE([R1flows]) ++ +AT_CHECK([grep "lr_in_lb_aff_check" R1flows | sort], [0], [dnl + table=6 (lr_in_lb_aff_check ), priority=0 , match=(1), action=(next;) + table=6 (lr_in_lb_aff_check ), priority=100 , match=(ct.new && ip4 && reg0 == 172.16.0.10 && tcp && reg9[[16..31]] == 80), action=(reg9[[6]] = chk_lb_aff(); next;) +]) ++AT_CHECK([grep "lr_in_dnat " R1flows | sort], [0], [dnl ++ table=7 (lr_in_dnat ), priority=0 , match=(1), action=(next;) ++ table=7 (lr_in_dnat ), priority=120 , match=(ct.est && ip4 && reg0 == 172.16.0.10 && tcp && reg9[[16..31]] == 80 && ct_mark.natted == 1), action=(next;) ++ table=7 (lr_in_dnat ), priority=120 , match=(ct.new && ip4 && reg0 == 172.16.0.10 && tcp && reg9[[16..31]] == 80), action=(ct_lb_mark(backends=10.0.0.2:80,20.0.0.2:80);) ++ table=7 (lr_in_dnat ), priority=150 , match=(reg9[[6]] == 1 && ct.new && ip4 && reg4 == 10.0.0.2 && reg8[[0..15]] == 80), action=(reg0 = 172.16.0.10; ct_lb_mark(backends=10.0.0.2:80);) ++ table=7 (lr_in_dnat ), priority=150 , match=(reg9[[6]] == 1 && ct.new && ip4 && reg4 == 20.0.0.2 && reg8[[0..15]] == 80), action=(reg0 = 172.16.0.10; ct_lb_mark(backends=20.0.0.2:80);) ++]) +AT_CHECK([grep "lr_in_lb_aff_learn" R1flows | sort], [0], [dnl + table=8 (lr_in_lb_aff_learn ), priority=0 , match=(1), action=(next;) -+ table=8 (lr_in_lb_aff_learn ), priority=100 , match=(reg9[[6]] == 0 && ct.new && ip4 && ip4.dst == 10.0.0.2 && reg0 == 172.16.0.10 && tcp.dst == 80), action=(commit_lb_aff(vip = "172.16.0.10:80", backend = "10.0.0.2:80", proto = tcp, timeout = 60); /* drop */) -+ table=8 (lr_in_lb_aff_learn ), priority=100 , match=(reg9[[6]] == 0 && ct.new && ip4 && ip4.dst == 20.0.0.2 && reg0 == 172.16.0.10 && tcp.dst == 80), action=(commit_lb_aff(vip = "172.16.0.10:80", backend = "20.0.0.2:80", proto = tcp, timeout = 60); /* drop */) ++ table=8 (lr_in_lb_aff_learn ), priority=100 , match=(reg9[[6]] == 0 && ct.new && ip4 && reg0 == 172.16.0.10 && reg9[[16..31]] == 80 && ip4.dst == 10.0.0.2 && tcp.dst == 80), action=(commit_lb_aff(vip = "172.16.0.10:80", backend = "10.0.0.2:80", proto = tcp, timeout = 60); /* drop */) ++ table=8 (lr_in_lb_aff_learn ), priority=100 , match=(reg9[[6]] == 0 && ct.new && ip4 && reg0 == 172.16.0.10 && reg9[[16..31]] == 80 && ip4.dst == 20.0.0.2 && tcp.dst == 80), action=(commit_lb_aff(vip = "172.16.0.10:80", backend = "20.0.0.2:80", proto = tcp, timeout = 60); /* drop */) ++]) ++ ++AS_BOX([Test LR flows - skip_snat=true]) ++check ovn-nbctl --wait=sb set load_balancer lb0 options:skip_snat=true ++ ++ovn-sbctl dump-flows R1 > R1flows_skip_snat ++AT_CAPTURE_FILE([R1flows_skip_snat]) ++ ++AT_CHECK([grep "lr_in_dnat " R1flows_skip_snat | sort], [0], [dnl ++ table=7 (lr_in_dnat ), priority=0 , match=(1), action=(next;) ++ table=7 (lr_in_dnat ), priority=120 , match=(ct.est && ip4 && reg0 == 172.16.0.10 && tcp && reg9[[16..31]] == 80 && ct_mark.natted == 1), action=(flags.skip_snat_for_lb = 1; next;) ++ table=7 (lr_in_dnat ), priority=120 , match=(ct.new && ip4 && reg0 == 172.16.0.10 && tcp && reg9[[16..31]] == 80), action=(flags.skip_snat_for_lb = 1; ct_lb_mark(backends=10.0.0.2:80,20.0.0.2:80);) ++ table=7 (lr_in_dnat ), priority=150 , match=(reg9[[6]] == 1 && ct.new && ip4 && reg4 == 10.0.0.2 && reg8[[0..15]] == 80), action=(reg0 = 172.16.0.10; flags.skip_snat_for_lb = 1; ct_lb_mark(backends=10.0.0.2:80);) ++ table=7 (lr_in_dnat ), priority=150 , match=(reg9[[6]] == 1 && ct.new && ip4 && reg4 == 20.0.0.2 && reg8[[0..15]] == 80), action=(reg0 = 172.16.0.10; flags.skip_snat_for_lb = 1; ct_lb_mark(backends=20.0.0.2:80);) ++]) ++ ++check ovn-nbctl remove load_balancer lb0 options skip_snat ++ ++AS_BOX([Test LR flows - lb_force_snat_ip="172.16.0.1"]) ++check ovn-nbctl --wait=sb set logical_router R1 options:lb_force_snat_ip="172.16.0.1" ++ ++ovn-sbctl dump-flows R1 > R1flows_force_snat ++AT_CAPTURE_FILE([R1flows_force_snat]) ++ ++AT_CHECK([grep "lr_in_dnat " R1flows_force_snat | sort], [0], [dnl ++ table=7 (lr_in_dnat ), priority=0 , match=(1), action=(next;) ++ table=7 (lr_in_dnat ), priority=120 , match=(ct.est && ip4 && reg0 == 172.16.0.10 && tcp && reg9[[16..31]] == 80 && ct_mark.natted == 1), action=(flags.force_snat_for_lb = 1; next;) ++ table=7 (lr_in_dnat ), priority=120 , match=(ct.new && ip4 && reg0 == 172.16.0.10 && tcp && reg9[[16..31]] == 80), action=(flags.force_snat_for_lb = 1; ct_lb_mark(backends=10.0.0.2:80,20.0.0.2:80);) ++ table=7 (lr_in_dnat ), priority=150 , match=(reg9[[6]] == 1 && ct.new && ip4 && reg4 == 10.0.0.2 && reg8[[0..15]] == 80), action=(reg0 = 172.16.0.10; flags.force_snat_for_lb = 1; ct_lb_mark(backends=10.0.0.2:80);) ++ table=7 (lr_in_dnat ), priority=150 , match=(reg9[[6]] == 1 && ct.new && ip4 && reg4 == 20.0.0.2 && reg8[[0..15]] == 80), action=(reg0 = 172.16.0.10; flags.force_snat_for_lb = 1; ct_lb_mark(backends=20.0.0.2:80);) +]) + +AT_CLEANUP @@ -3801,7 +4969,7 @@ index 616a87fcf..8e6cb415c 100644 +/.*terminating with signal 15.*/d"]) +])) diff --git a/tests/system-ovn.at b/tests/system-ovn.at -index 8acfb3e39..cb3412717 100644 +index 8acfb3e39..161c2823e 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -5272,158 +5272,22 @@ AT_CLEANUP @@ -3986,7 +5154,7 @@ index 8acfb3e39..cb3412717 100644 AT_CHECK([ovn-nbctl remove Logical_Switch_Port public options qos_burst=3000000]) OVS_WAIT_UNTIL([test "$(tc qdisc show | grep 'htb 1: dev ovs-public')" = ""]) -@@ -8343,3 +8211,930 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d +@@ -8343,3 +8211,985 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d AT_CLEANUP ]) @@ -4362,8 +5530,12 @@ index 8acfb3e39..cb3412717 100644 + +ovn-nbctl lb-add lb0 172.16.1.100:8080 192.168.1.2:80,192.168.2.2:80 +ovn-nbctl lb-add lb10 172.16.1.110:8080 192.168.1.2:80,192.168.2.2:80 ++ovn-nbctl lb-add lb0-no-aff 172.16.1.100:8081 192.168.1.2:80,192.168.2.2:80 ++ovn-nbctl lb-add lb10-no-aff 172.16.1.110:8081 192.168.1.2:80,192.168.2.2:80 +ovn-nbctl lr-lb-add R2 lb0 +ovn-nbctl lr-lb-add R2 lb10 ++ovn-nbctl lr-lb-add R2 lb0-no-aff ++ovn-nbctl lr-lb-add R2 lb10-no-aff + +# Start webservers in 'foo1', 'bar1'. +NETNS_DAEMONIZE([foo1], [nc -l -k 192.168.1.2 80], [nc-foo1.pid]) @@ -4409,23 +5581,28 @@ index 8acfb3e39..cb3412717 100644 +]) + +check_affinity_flows () { -+n1=$(ovs-ofctl dump-flows br-int table=15 |awk '/priority=150,ip,reg4=0xc0a80102/{print substr($4,11,length($4)-11)}') -+n2=$(ovs-ofctl dump-flows br-int table=15 |awk '/priority=150,ip,reg4=0xc0a80202/{print substr($4,11,length($4)-11)}') ++n1=$(ovs-ofctl dump-flows br-int table=15 |awk '/priority=150,ct_state=\+new\+trk,ip,reg4=0xc0a80102/{print substr($4,11,length($4)-11)}') ++n2=$(ovs-ofctl dump-flows br-int table=15 |awk '/priority=150,ct_state=\+new\+trk,ip,reg4=0xc0a80202/{print substr($4,11,length($4)-11)}') +[[ $n1 -gt 0 -a $n2 -eq 0 ]] || [[ $n1 -eq 0 -a $n2 -gt 0 ]] +echo $? +} +AT_CHECK([test $(check_affinity_flows) -eq 0]) ++NS_CHECK_EXEC([alice1], [nc -z 172.16.1.100 8081]) + +# Flush conntrack entries for easier output parsing of next test. +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +ovn-nbctl lb-add lb1 172.16.1.101:8080 192.168.1.2:80,192.168.2.2:80 +ovn-nbctl lb-add lb11 172.16.1.111:8080 192.168.1.2:80,192.168.2.2:80 ++ovn-nbctl lb-add lb1-no-aff 172.16.1.101:8081 192.168.1.2:80,192.168.2.2:80 ++ovn-nbctl lb-add lb11-no-aff 172.16.1.111:8081 192.168.1.2:80,192.168.2.2:80 +# Enable lb affinity +ovn-nbctl --wait=sb set load_balancer lb1 options:affinity_timeout=3 -+ovn-nbctl --wait=sb set load_balancer lb1 options:affinity_timeout=3 ++ovn-nbctl --wait=sb set load_balancer lb11 options:affinity_timeout=3 +ovn-nbctl lr-lb-add R2 lb1 +ovn-nbctl lr-lb-add R2 lb11 ++ovn-nbctl lr-lb-add R2 lb1-no-aff ++ovn-nbctl lr-lb-add R2 lb11-no-aff + +# check we use both backends +for i in $(seq 1 15); do @@ -4440,6 +5617,7 @@ index 8acfb3e39..cb3412717 100644 +tcp,orig=(src=172.16.1.2,dst=172.16.1.101,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.1.2,sport=,dport=),zone=,mark=2,protoinfo=(state=) +tcp,orig=(src=172.16.1.2,dst=172.16.1.101,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.2,sport=,dport=),zone=,mark=2,protoinfo=(state=) +]) ++NS_CHECK_EXEC([alice1], [nc -z 172.16.1.101 8081]) + +# Flush conntrack entries for easier output parsing of next test. +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) @@ -4448,10 +5626,14 @@ index 8acfb3e39..cb3412717 100644 + +ovn-nbctl lb-add lb2 192.168.2.100:8080 192.168.2.2:80,192.168.2.3:80 +ovn-nbctl lb-add lb20 192.168.2.120:8080 192.168.2.2:80,192.168.2.3:80 ++ovn-nbctl lb-add lb2-no-aff 192.168.2.100:8081 192.168.2.2:80,192.168.2.3:80 ++ovn-nbctl lb-add lb20-no-aff 192.168.2.120:8081 192.168.2.2:80,192.168.2.3:80 +ovn-nbctl --wait=sb set load_balancer lb2 options:affinity_timeout=60 +ovn-nbctl --wait=sb set load_balancer lb20 options:affinity_timeout=60 +ovn-nbctl ls-lb-add foo lb2 +ovn-nbctl ls-lb-add foo lb20 ++ovn-nbctl ls-lb-add foo lb2-no-aff ++ovn-nbctl ls-lb-add foo lb20-no-aff + +for i in $(seq 1 15); do + echo Request $i @@ -4462,16 +5644,21 @@ index 8acfb3e39..cb3412717 100644 +sed -e 's/zone=[[0-9]]*/zone=/; s/src=192.168.2.[[0-9]]/src=192.168.2./'], [0], [dnl +tcp,orig=(src=192.168.1.2,dst=192.168.2.100,sport=,dport=),reply=(src=192.168.2.,dst=192.168.1.2,sport=,dport=),zone=,mark=2,protoinfo=(state=) +]) ++NS_CHECK_EXEC([foo1], [nc -z 192.168.2.100 8081]) + +# Flush conntrack entries for easier output parsing of next test. +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +ovn-nbctl lb-add lb3 192.168.2.101:8080 192.168.2.2:80,192.168.2.3:80 +ovn-nbctl lb-add lb30 192.168.2.131:8080 192.168.2.2:80,192.168.2.3:80 ++ovn-nbctl lb-add lb3-no-aff 192.168.2.101:8081 192.168.2.2:80,192.168.2.3:80 ++ovn-nbctl lb-add lb30-no-aff 192.168.2.131:8081 192.168.2.2:80,192.168.2.3:80 +ovn-nbctl --wait=sb set load_balancer lb3 options:affinity_timeout=3 +ovn-nbctl --wait=sb set load_balancer lb30 options:affinity_timeout=3 +ovn-nbctl ls-lb-add foo lb3 +ovn-nbctl ls-lb-add foo lb30 ++ovn-nbctl ls-lb-add foo lb3-no-aff ++ovn-nbctl ls-lb-add foo lb30-no-aff +# Flush conntrack entries for easier output parsing of next test. +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + @@ -4487,16 +5674,23 @@ index 8acfb3e39..cb3412717 100644 +tcp,orig=(src=192.168.1.2,dst=192.168.2.101,sport=,dport=),reply=(src=192.168.2.2,dst=192.168.1.2,sport=,dport=),zone=,mark=2,protoinfo=(state=) +tcp,orig=(src=192.168.1.2,dst=192.168.2.101,sport=,dport=),reply=(src=192.168.2.3,dst=192.168.1.2,sport=,dport=),zone=,mark=2,protoinfo=(state=) +]) ++NS_CHECK_EXEC([foo1], [nc -z 192.168.2.101 8081]) + +NS_CHECK_EXEC([foo1], [ip neigh add 192.168.1.200 lladdr 00:00:01:01:02:03 dev foo1], [0]) +ovn-nbctl lb-add lb4 192.168.1.100:8080 192.168.1.2:80 +ovn-nbctl lb-add lb40 192.168.1.140:8080 192.168.1.2:80 ++ovn-nbctl lb-add lb4-no-aff 192.168.1.100:8081 192.168.1.2:80 ++ovn-nbctl lb-add lb40-no-aff 192.168.1.140:8081 192.168.1.2:80 +ovn-nbctl --wait=sb set load_balancer lb4 options:affinity_timeout=60 options:hairpin_snat_ip=192.168.1.200 +ovn-nbctl --wait=sb set load_balancer lb40 options:affinity_timeout=60 options:hairpin_snat_ip=192.168.1.200 +ovn-nbctl ls-lb-add foo lb4 +ovn-nbctl ls-lb-add foo lb40 +ovn-nbctl lr-lb-add R1 lb4 +ovn-nbctl lr-lb-add R1 lb40 ++ovn-nbctl ls-lb-add foo lb4-no-aff ++ovn-nbctl ls-lb-add foo lb40-no-aff ++ovn-nbctl lr-lb-add R1 lb4-no-aff ++ovn-nbctl lr-lb-add R1 lb40-no-aff + +# Flush conntrack entries for easier output parsing of next test. +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) @@ -4513,6 +5707,7 @@ index 8acfb3e39..cb3412717 100644 +tcp,orig=(src=192.168.1.2,dst=192.168.1.2,sport=,dport=),reply=(src=192.168.1.2,dst=192.168.1.200,sport=,dport=),zone=,protoinfo=(state=) +tcp,orig=(src=192.168.1.200,dst=192.168.1.2,sport=,dport=),reply=(src=192.168.1.2,dst=192.168.1.200,sport=,dport=),zone=,protoinfo=(state=) +]) ++NS_CHECK_EXEC([foo1], [nc -z 192.168.1.100 8081]) + +OVS_APP_EXIT_AND_WAIT([ovn-controller]) + @@ -4634,8 +5829,12 @@ index 8acfb3e39..cb3412717 100644 + +ovn-nbctl lb-add lb0 [[fd30::1]]:8080 [[fd11::2]]:80,[[fd12::2]]:80 +ovn-nbctl lb-add lb10 [[fd30::10]]:8080 [[fd11::2]]:80,[[fd12::2]]:80 ++ovn-nbctl lb-add lb0-no-aff [[fd30::1]]:8081 [[fd11::2]]:80,[[fd12::2]]:80 ++ovn-nbctl lb-add lb10-no-aff [[fd30::10]]:8081 [[fd11::2]]:80,[[fd12::2]]:80 +ovn-nbctl lr-lb-add R2 lb0 +ovn-nbctl lr-lb-add R2 lb10 ++ovn-nbctl lr-lb-add R2 lb0-no-aff ++ovn-nbctl lr-lb-add R2 lb10-no-aff + +# Wait for ovn-controller to catch up. +ovn-nbctl --wait=hv sync @@ -4658,6 +5857,7 @@ index 8acfb3e39..cb3412717 100644 +tcp,orig=(src=fd72::2,dst=fd30::1,sport=,dport=),reply=(src=fd11::2,dst=fd72::2,sport=,dport=),zone=,mark=2,protoinfo=(state=) +tcp,orig=(src=fd72::2,dst=fd30::1,sport=,dport=),reply=(src=fd12::2,dst=fd72::2,sport=,dport=),zone=,mark=2,protoinfo=(state=) +]) ++NS_CHECK_EXEC([alice1], [nc -z fd30::1 8081]) + +# Flush conntrack entries for easier output parsing of next test. +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) @@ -4681,23 +5881,28 @@ index 8acfb3e39..cb3412717 100644 +]) + +check_affinity_flows () { -+n1=$(ovs-ofctl dump-flows br-int table=15 |awk '/priority=150,ipv6,reg4=0xfd110000/{print substr($4,11,length($4)-11)}') -+n2=$(ovs-ofctl dump-flows br-int table=15 |awk '/priority=150,ipv6,reg4=0xfd120000/{print substr($4,11,length($4)-11)}') ++n1=$(ovs-ofctl dump-flows br-int table=15 |awk '/priority=150,ct_state=\+new\+trk,ipv6,reg4=0xfd110000/{print substr($4,11,length($4)-11)}') ++n2=$(ovs-ofctl dump-flows br-int table=15 |awk '/priority=150,ct_state=\+new\+trk,ipv6,reg4=0xfd120000/{print substr($4,11,length($4)-11)}') +[[ $n1 -gt 0 -a $n2 -eq 0 ]] || [[ $n1 -eq 0 -a $n2 -gt 0 ]] +echo $? +} +AT_CHECK([test $(check_affinity_flows) -eq 0]) ++NS_CHECK_EXEC([alice1], [nc -z fd30::1 8081]) + +# Flush conntrack entries for easier output parsing of next test. +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +ovn-nbctl lb-add lb1 [[fd30::2]]:8080 [[fd11::2]]:80,[[fd12::2]]:80 +ovn-nbctl lb-add lb11 [[fd30::12]]:8080 [[fd11::2]]:80,[[fd12::2]]:80 ++ovn-nbctl lb-add lb1-no-aff [[fd30::2]]:8081 [[fd11::2]]:80,[[fd12::2]]:80 ++ovn-nbctl lb-add lb11-no-aff [[fd30::12]]:8081 [[fd11::2]]:80,[[fd12::2]]:80 +# Enable lb affinity +ovn-nbctl --wait=sb set load_balancer lb1 options:affinity_timeout=3 +ovn-nbctl --wait=sb set load_balancer lb11 options:affinity_timeout=3 +ovn-nbctl lr-lb-add R2 lb1 +ovn-nbctl lr-lb-add R2 lb11 ++ovn-nbctl lr-lb-add R2 lb1-no-aff ++ovn-nbctl lr-lb-add R2 lb11-no-aff + +# check we use both backends +for i in $(seq 1 15); do @@ -4712,6 +5917,7 @@ index 8acfb3e39..cb3412717 100644 +tcp,orig=(src=fd72::2,dst=fd30::2,sport=,dport=),reply=(src=fd11::2,dst=fd72::2,sport=,dport=),zone=,mark=2,protoinfo=(state=) +tcp,orig=(src=fd72::2,dst=fd30::2,sport=,dport=),reply=(src=fd12::2,dst=fd72::2,sport=,dport=),zone=,mark=2,protoinfo=(state=) +]) ++NS_CHECK_EXEC([alice1], [nc -z fd30::2 8081]) + +# Flush conntrack entries for easier output parsing of next test. +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) @@ -4720,10 +5926,14 @@ index 8acfb3e39..cb3412717 100644 + +ovn-nbctl lb-add lb2 [[fd12::a]]:8080 [[fd12::2]]:80,[[fd12::3]]:80 +ovn-nbctl lb-add lb20 [[fd12::2a]]:8080 [[fd12::2]]:80,[[fd12::3]]:80 ++ovn-nbctl lb-add lb2-no-aff [[fd12::a]]:8081 [[fd12::2]]:80,[[fd12::3]]:80 ++ovn-nbctl lb-add lb20-no-aff [[fd12::2a]]:8081 [[fd12::2]]:80,[[fd12::3]]:80 +ovn-nbctl --wait=sb set load_balancer lb2 options:affinity_timeout=60 +ovn-nbctl --wait=sb set load_balancer lb20 options:affinity_timeout=60 +ovn-nbctl ls-lb-add foo lb2 +ovn-nbctl ls-lb-add foo lb20 ++ovn-nbctl ls-lb-add foo lb2-no-aff ++ovn-nbctl ls-lb-add foo lb20-no-aff + +for i in $(seq 1 15); do + echo Request $i @@ -4735,15 +5945,20 @@ index 8acfb3e39..cb3412717 100644 +sed -e 's/zone=[[0-9]]*/zone=/; s/src=fd12::[[0-9]]/src=fd12::/'], [0], [dnl +tcp,orig=(src=fd11::2,dst=fd12::a,sport=,dport=),reply=(src=fd12::,dst=fd11::2,sport=,dport=),zone=,mark=2,protoinfo=(state=) +]) ++NS_CHECK_EXEC([foo1], [nc -z fd12::a 8081]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +ovn-nbctl lb-add lb3 [[fd12::b]]:8080 [[fd12::2]]:80,[[fd12::3]]:80 +ovn-nbctl lb-add lb30 [[fd12::3b]]:8080 [[fd12::2]]:80,[[fd12::3]]:80 ++ovn-nbctl lb-add lb3-no-aff [[fd12::b]]:8081 [[fd12::2]]:80,[[fd12::3]]:80 ++ovn-nbctl lb-add lb30-no-aff [[fd12::3b]]:8081 [[fd12::2]]:80,[[fd12::3]]:80 +ovn-nbctl --wait=sb set load_balancer lb3 options:affinity_timeout=3 +ovn-nbctl --wait=sb set load_balancer lb30 options:affinity_timeout=3 +ovn-nbctl ls-lb-add foo lb3 +ovn-nbctl ls-lb-add foo lb30 ++ovn-nbctl ls-lb-add foo lb3-no-aff ++ovn-nbctl ls-lb-add foo lb30-no-aff + +for i in $(seq 1 15); do + echo Request $i @@ -4756,16 +5971,23 @@ index 8acfb3e39..cb3412717 100644 +tcp,orig=(src=fd11::2,dst=fd12::b,sport=,dport=),reply=(src=fd12::2,dst=fd11::2,sport=,dport=),zone=,mark=2,protoinfo=(state=) +tcp,orig=(src=fd11::2,dst=fd12::b,sport=,dport=),reply=(src=fd12::3,dst=fd11::2,sport=,dport=),zone=,mark=2,protoinfo=(state=) +]) ++NS_CHECK_EXEC([foo1], [nc -z fd12::b 8081]) + +NS_CHECK_EXEC([foo1], [ip -6 neigh add fd11::b lladdr 00:00:01:01:02:03 dev foo1], [0]) +ovn-nbctl --wait=sb lb-add lb4 [[fd11::a]]:8080 [[fd11::2]]:80 +ovn-nbctl --wait=sb lb-add lb40 [[fd11::a]]:8080 [[fd11::2]]:80 ++ovn-nbctl --wait=sb lb-add lb4-no-aff [[fd11::a]]:8081 [[fd11::2]]:80 ++ovn-nbctl --wait=sb lb-add lb40-no-aff [[fd11::a]]:8081 [[fd11::2]]:80 +ovn-nbctl --wait=sb set load_balancer lb4 options:affinity_timeout=60 options:hairpin_snat_ip="fd11::b" +ovn-nbctl --wait=sb set load_balancer lb40 options:affinity_timeout=60 options:hairpin_snat_ip="fd11::b" +ovn-nbctl ls-lb-add foo lb4 +ovn-nbctl ls-lb-add foo lb40 +ovn-nbctl lr-lb-add R1 lb4 +ovn-nbctl lr-lb-add R1 lb40 ++ovn-nbctl ls-lb-add foo lb4-no-aff ++ovn-nbctl ls-lb-add foo lb40-no-aff ++ovn-nbctl lr-lb-add R1 lb4-no-aff ++ovn-nbctl lr-lb-add R1 lb40-no-aff + +# Flush conntrack entries for easier output parsing of next test. +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) @@ -4782,6 +6004,7 @@ index 8acfb3e39..cb3412717 100644 +tcp,orig=(src=fd11::2,dst=fd11::a,sport=,dport=),reply=(src=fd11::2,dst=fd11::2,sport=,dport=),zone=,mark=2,protoinfo=(state=) +tcp,orig=(src=fd11::b,dst=fd11::2,sport=,dport=),reply=(src=fd11::2,dst=fd11::b,sport=,dport=),zone=,protoinfo=(state=) +]) ++NS_CHECK_EXEC([foo1], [nc -z fd11::a 8081]) + +OVS_APP_EXIT_AND_WAIT([ovn-controller]) + @@ -4917,11 +6140,103 @@ index 8acfb3e39..cb3412717 100644 +/connection dropped.*/d"]) +AT_CLEANUP +]) +diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c +index 3bbdbd998..2f8ec4348 100644 +--- a/utilities/ovn-nbctl.c ++++ b/utilities/ovn-nbctl.c +@@ -4421,6 +4421,8 @@ nbctl_pre_lr_route_del(struct ctl_context *ctx) + + ovsdb_idl_add_column(ctx->idl, + &nbrec_logical_router_static_route_col_policy); ++ ovsdb_idl_add_column(ctx->idl, ++ &nbrec_logical_router_static_route_col_bfd); + ovsdb_idl_add_column(ctx->idl, + &nbrec_logical_router_static_route_col_ip_prefix); + ovsdb_idl_add_column(ctx->idl, +@@ -4433,7 +4435,7 @@ nbctl_pre_lr_route_del(struct ctl_context *ctx) + } + + static void +-nbctl_lr_route_del(struct ctl_context *ctx) ++ nbctl_lr_route_del(struct ctl_context *ctx) + { + const struct nbrec_logical_router *lr; + char *error = lr_by_name_or_uuid(ctx, ctx->argv[1], true, &lr); +@@ -4550,6 +4552,10 @@ nbctl_lr_route_del(struct ctl_context *ctx) + } + + /* Everything matched. Removing. */ ++ if (lr->static_routes[i]->bfd) { ++ nbrec_bfd_delete(lr->static_routes[i]->bfd); ++ } ++ + nbrec_logical_router_update_static_routes_delvalue( + lr, lr->static_routes[i]); + n_removed++; diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c -index d9e7129d9..9b728ebd9 100644 +index d9e7129d9..858f481fc 100644 --- a/utilities/ovn-trace.c +++ b/utilities/ovn-trace.c -@@ -3298,6 +3298,10 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len, +@@ -60,6 +60,9 @@ static char *unixctl_path; + /* The southbound database. */ + static struct ovsdb_idl *ovnsb_idl; + ++/* --leader-only, --no-leader-only: Only accept the leader in a cluster. */ ++static int leader_only = true; ++ + /* --detailed: Show a detailed, table-by-table trace. */ + static bool detailed; + +@@ -138,6 +141,7 @@ main(int argc, char *argv[]) + 1, INT_MAX, ovntrace_trace, NULL); + } + ovnsb_idl = ovsdb_idl_create(db, &sbrec_idl_class, true, false); ++ ovsdb_idl_set_leader_only(ovnsb_idl, leader_only); + + bool already_read = false; + for (;;) { +@@ -243,6 +247,8 @@ parse_options(int argc, char *argv[]) + { + enum { + OPT_DB = UCHAR_MAX + 1, ++ OPT_LEADER_ONLY, ++ OPT_NO_LEADER_ONLY, + OPT_UNIXCTL, + OPT_DETAILED, + OPT_SUMMARY, +@@ -260,6 +266,8 @@ parse_options(int argc, char *argv[]) + }; + static const struct option long_options[] = { + {"db", required_argument, NULL, OPT_DB}, ++ {"leader-only", no_argument, NULL, OPT_LEADER_ONLY}, ++ {"no-leader-only", no_argument, NULL, OPT_NO_LEADER_ONLY}, + {"unixctl", required_argument, NULL, OPT_UNIXCTL}, + {"detailed", no_argument, NULL, OPT_DETAILED}, + {"summary", no_argument, NULL, OPT_SUMMARY}, +@@ -294,6 +302,14 @@ parse_options(int argc, char *argv[]) + db = optarg; + break; + ++ case OPT_LEADER_ONLY: ++ leader_only = true; ++ break; ++ ++ case OPT_NO_LEADER_ONLY: ++ leader_only = false; ++ break; ++ + case OPT_UNIXCTL: + unixctl_path = optarg; + break; +@@ -390,6 +406,7 @@ Output style options:\n\ + Other options:\n\ + --db=DATABASE connect to DATABASE\n\ + (default: %s)\n\ ++ --no-leader-only accept any cluster member, not just the leader\n\ + --ovs[=REMOTE] obtain corresponding OpenFlow flows from REMOTE\n\ + (default: %s)\n\ + --unixctl=SOCKET set control socket name\n\ +@@ -3298,6 +3315,10 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len, break; case OVNACT_CHK_ECMP_NH: break; diff --git a/SPECS/ovn22.09.spec b/SPECS/ovn22.09.spec index fdf4bfa..cc34c55 100644 --- a/SPECS/ovn22.09.spec +++ b/SPECS/ovn22.09.spec @@ -51,7 +51,7 @@ Summary: Open Virtual Network support Group: System Environment/Daemons URL: http://www.ovn.org/ Version: 22.09.0 -Release: 22%{?commit0:.%{date}git%{shortcommit0}}%{?dist} +Release: 31%{?commit0:.%{date}git%{shortcommit0}}%{?dist} Provides: openvswitch%{pkgver}-ovn-common = %{?epoch:%{epoch}:}%{version}-%{release} Obsoletes: openvswitch%{pkgver}-ovn-common < 2.11.0-1 @@ -62,8 +62,8 @@ License: ASL 2.0 and LGPLv2+ and SISSL # Always pull an upstream release, since this is what we rebase to. Source: https://github.com/ovn-org/ovn/archive/v%{version}.tar.gz#/ovn-%{version}.tar.gz -%define ovscommit c9c602b6f332c6e76b273c607366185cf28ed156 -%define ovsshortcommit c9c602b +%define ovscommit 2410b95597fcec5f733caf77febdb46f4ffacd27 +%define ovsshortcommit 2410b95 Source10: https://github.com/openvswitch/ovs/archive/%{ovscommit}.tar.gz#/openvswitch-%{ovsshortcommit}.tar.gz %define ovsdir ovs-%{ovscommit} @@ -524,6 +524,42 @@ fi %{_unitdir}/ovn-controller-vtep.service %changelog +* Fri Jan 13 2023 Mark Michelson - 22.09.0-31 +- Merging from branch-22.09 to ovn22.09-lb-affinity +[Upstream: 75e5bb9272fb7e3a867b51fff2f524ca50f53b03] + +* Thu Jan 12 2023 Mark Michelson - 22.09.0-30 +- Merging from branch-22.09 to ovn22.09-lb-affinity +[Upstream: f73dd8e3018cfb7d6ee5cb29a5f2a05927541421] + +* Wed Dec 21 2022 Mark Michelson - 22.09.0-29 +- Merging from branch-22.09 to ovn22.09-lb-affinity +[Upstream: 548638a08d6c5927eb3aad93870af36f58da34a7] + +* Mon Dec 12 2022 Mark Michelson - 22.09.0-28 +- Merging from branch-22.09 to ovn22.09-lb-affinity +[Upstream: d6510560a43a7323d33a1d44f4386b7df846f978] + +* Fri Dec 09 2022 Mark Michelson - 22.09.0-27 +- Merging from branch-22.09 to ovn22.09-lb-affinity +[Upstream: 2a7c712e3bb14cdfd89fc5d21c11b567b0855f2a] + +* Thu Dec 08 2022 Mark Michelson - 22.09.0-26 +- Merging from branch-22.09 to ovn22.09-lb-affinity +[Upstream: c70ad426f41a0ad2799dac0117c4b70ec3ebcd4a] + +* Tue Dec 06 2022 Mark Michelson - 22.09.0-25 +- ovn-northd.at: Fix failing lb-affinity test. +[Upstream: 60b856cfd55b8d636c5f8c1011781f587efe7cf5] + +* Tue Dec 06 2022 Dumitru Ceara - 22.09.0-24 +- northd: Include VIP port in LB affinity learn flow matches. (#2150533) +[Upstream: cc037c7538d635e7d014e98935a83bc15140674f] + +* Tue Dec 06 2022 Ales Musil - 22.09.0-23 +- northd: Improve the LB affinity code +[Upstream: 11da2339668a024a05512ba2178046135b784825] + * Tue Nov 29 2022 Lorenzo Bianconi - 22.09.0-22 - northd: rely on new actions for lb affinity [Upstream: 5b6223dcb6060205c6e9d4e8c092e96134bb032a]