diff --git a/.ovn.metadata b/.ovn.metadata
index 4bcfed1..3c2a0bd 100644
--- a/.ovn.metadata
+++ b/.ovn.metadata
@@ -1,5 +1,5 @@
002450621b33c5690060345b0aac25bc2426d675 SOURCES/docutils-0.12.tar.gz
-838279b54706cbb447491f422d829569c1c90b73 SOURCES/openvswitch-498cedc.tar.gz
+3141cf2ef8fc5066ae4d7f128e105a293c81549a SOURCES/openvswitch-45ecaa9.tar.gz
347346dae160f28d6e56b1dee8fa8b701a50748e SOURCES/ovn-21.12.0.tar.gz
d34f96421a86004aa5d26ecf975edefd09f948b1 SOURCES/Pygments-1.4.tar.gz
6beb30f18ffac3de7689b7fd63e9a8a7d9c8df3a SOURCES/Sphinx-1.1.3.tar.gz
diff --git a/SOURCES/ovn-2021.patch b/SOURCES/ovn-2021.patch
index 9f922c1..d1ba099 100644
--- a/SOURCES/ovn-2021.patch
+++ b/SOURCES/ovn-2021.patch
@@ -15,7 +15,7 @@ index 37e8d4250..e0c528479 100755
+pip3 install --disable-pip-version-check --user \
+ flake8 'hacking>=3.0' sphinx setuptools pyelftools pyOpenSSL
diff --git a/.ci/ovn-kubernetes/Dockerfile b/.ci/ovn-kubernetes/Dockerfile
-index 9cfc32f62..495ffc8be 100644
+index 9cfc32f62..2439c61f7 100644
--- a/.ci/ovn-kubernetes/Dockerfile
+++ b/.ci/ovn-kubernetes/Dockerfile
@@ -1,4 +1,5 @@
@@ -24,7 +24,7 @@ index 9cfc32f62..495ffc8be 100644
FROM fedora:33 AS ovnbuilder
-@@ -38,11 +39,21 @@ RUN rm rpm/rpmbuild/RPMS/x86_64/*docker*
+@@ -38,18 +39,28 @@ RUN rm rpm/rpmbuild/RPMS/x86_64/*docker*
# Build ovn-kubernetes
FROM golang:1.16 as ovnkubebuilder
ARG OVNKUBE_COMMIT
@@ -47,6 +47,14 @@ index 9cfc32f62..495ffc8be 100644
# Build the final image
FROM fedora:33
+
+ # install needed dependencies
+ RUN INSTALL_PKGS=" \
+- iptables iproute iputils hostname unbound-libs kubernetes-client kmod" && \
++ iptables iproute iputils hostname unbound-libs kubernetes-client kmod socat" && \
+ dnf install --best --refresh -y --setopt=tsflags=nodocs $INSTALL_PKGS && \
+ dnf clean all && rm -rf /var/cache/dnf/*
+
diff --git a/.github/workflows/ovn-kubernetes.yml b/.github/workflows/ovn-kubernetes.yml
index 60c585a24..c05bbd3f9 100644
--- a/.github/workflows/ovn-kubernetes.yml
@@ -109,14 +117,20 @@ index 064725f68..40f36d815 100644
Terry Wilson twilson@redhat.com
Tetsuo NAKAGAWA nakagawa@mxc.nes.nec.co.jp
diff --git a/NEWS b/NEWS
-index 75f26ddb7..3b3104c2f 100644
+index 75f26ddb7..31e08b015 100644
--- a/NEWS
+++ b/NEWS
-@@ -1,3 +1,12 @@
-+OVN v21.12.2 - xx xxx xxxx
+@@ -1,3 +1,18 @@
++OVN v21.12.3 - xx xxx xxxx
++--------------------------
++
++OVN v21.12.2 - 03 Jun 2022
+--------------------------
++ - Bug fixes
+ - When configured to log packets matching ACLs, log the direction (logical
+ pipeline) too.
++ - Replaced the usage of masked ct_label by ct_mark in most cases to work
++ better with hardware-offloading.
+
+OVN v21.12.1 - 11 Mar 2022
+--------------------------
@@ -126,7 +140,7 @@ index 75f26ddb7..3b3104c2f 100644
--------------------------
- Set ignore_lsp_down to true as default, so that ARP responder flows are
diff --git a/configure.ac b/configure.ac
-index 48b4662f0..c4bf08db7 100644
+index 48b4662f0..f37afb7bb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
@@ -134,7 +148,7 @@ index 48b4662f0..c4bf08db7 100644
AC_PREREQ(2.63)
-AC_INIT(ovn, 21.12.0, bugs@openvswitch.org)
-+AC_INIT(ovn, 21.12.2, bugs@openvswitch.org)
++AC_INIT(ovn, 21.12.3, bugs@openvswitch.org)
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_AUX_DIR([build-aux])
AC_CONFIG_HEADERS([config.h])
@@ -182,7 +196,7 @@ index 288772dc4..9fbfc0337 100644
if (gw_node) {
VLOG_WARN("Chassis for VTEP physical switch (%s) disappears, "
diff --git a/controller/binding.c b/controller/binding.c
-index 4d62b0858..1259e6b3b 100644
+index 4d62b0858..c40751465 100644
--- a/controller/binding.c
+++ b/controller/binding.c
@@ -481,6 +481,16 @@ remove_related_lport(const struct sbrec_port_binding *pb,
@@ -202,7 +216,34 @@ index 4d62b0858..1259e6b3b 100644
static void
update_active_pb_ras_pd(const struct sbrec_port_binding *pb,
struct hmap *local_datapaths,
-@@ -2251,6 +2261,9 @@ binding_handle_port_binding_changes(struct binding_ctx_in *b_ctx_in,
+@@ -898,7 +908,9 @@ claimed_lport_set_up(const struct sbrec_port_binding *pb,
+ if (!notify_up) {
+ bool up = true;
+ if (!parent_pb || (parent_pb->n_up && parent_pb->up[0])) {
+- sbrec_port_binding_set_up(pb, &up, 1);
++ if (pb->n_up) {
++ sbrec_port_binding_set_up(pb, &up, 1);
++ }
+ }
+ return;
+ }
+@@ -2049,6 +2061,15 @@ handle_deleted_lport(const struct sbrec_port_binding *pb,
+ return;
+ }
+
++ /*
++ * Remove localport that was part of local datapath that is not
++ * considered to be local anymore.
++ */
++ if (!ld && !strcmp(pb->type, "localport") &&
++ sset_find(&b_ctx_out->related_lports->lport_names, pb->logical_port)) {
++ remove_related_lport(pb, b_ctx_out);
++ }
++
+ /* If the binding is not local, if 'pb' is a L3 gateway port, we should
+ * remove its peer, if that one is local.
+ */
+@@ -2251,6 +2272,9 @@ binding_handle_port_binding_changes(struct binding_ctx_in *b_ctx_in,
continue;
}
@@ -212,6 +253,31 @@ index 4d62b0858..1259e6b3b 100644
enum en_lport_type lport_type = get_lport_type(pb);
struct binding_lport *b_lport =
+diff --git a/controller/chassis.c b/controller/chassis.c
+index 8a1559653..92850fcc1 100644
+--- a/controller/chassis.c
++++ b/controller/chassis.c
+@@ -350,6 +350,7 @@ chassis_build_other_config(const struct ovs_chassis_cfg *ovs_cfg,
+ smap_replace(config, "is-interconn",
+ ovs_cfg->is_interconn ? "true" : "false");
+ smap_replace(config, OVN_FEATURE_PORT_UP_NOTIF, "true");
++ smap_replace(config, OVN_FEATURE_CT_NO_MASKED_LABEL, "true");
+ }
+
+ /*
+@@ -455,6 +456,12 @@ chassis_other_config_changed(const struct ovs_chassis_cfg *ovs_cfg,
+ return true;
+ }
+
++ if (!smap_get_bool(&chassis_rec->other_config,
++ OVN_FEATURE_CT_NO_MASKED_LABEL,
++ false)) {
++ return true;
++ }
++
+ return false;
+ }
+
diff --git a/controller/lflow-conj-ids.c b/controller/lflow-conj-ids.c
index bfe63862a..6d3601237 100644
--- a/controller/lflow-conj-ids.c
@@ -571,10 +637,42 @@ index 6da0a612c..b53e570f2 100644
#endif /* controller/lflow-conj-ids.h */
diff --git a/controller/lflow.c b/controller/lflow.c
-index 933e2f3cc..489347dae 100644
+index 933e2f3cc..7abd24c42 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
-@@ -852,6 +852,7 @@ consider_logical_flow__(const struct sbrec_logical_flow *lflow,
+@@ -580,6 +580,23 @@ lflow_parse_ctrl_meter(const struct sbrec_logical_flow *lflow,
+ }
+ }
+
++static int
++get_common_nat_zone(const struct sbrec_datapath_binding *dp)
++{
++ /* Normally, the common NAT zone defaults to the DNAT zone. However,
++ * if the "snat-ct-zone" is set on the datapath, the user is
++ * expecting an explicit CT zone to be used for SNAT. If we default
++ * to the DNAT zone, then it means SNAT will not use the configured
++ * value. The way we get around this is to use the SNAT zone as the
++ * common zone if "snat-ct-zone" is set.
++ */
++ if (smap_get(&dp->external_ids, "snat-ct-zone")) {
++ return MFF_LOG_SNAT_ZONE;
++ } else {
++ return MFF_LOG_DNAT_ZONE;
++ }
++}
++
+ static void
+ add_matches_to_flow_table(const struct sbrec_logical_flow *lflow,
+ const struct sbrec_datapath_binding *dp,
+@@ -629,6 +646,7 @@ add_matches_to_flow_table(const struct sbrec_logical_flow *lflow,
+ .fdb_ptable = OFTABLE_GET_FDB,
+ .fdb_lookup_ptable = OFTABLE_LOOKUP_FDB,
+ .ctrl_meter_id = ctrl_meter_id,
++ .common_nat_ct_zone = get_common_nat_zone(dp),
+ };
+ ovnacts_encode(ovnacts->data, ovnacts->size, &ep, &ofpacts);
+
+@@ -852,6 +870,7 @@ consider_logical_flow__(const struct sbrec_logical_flow *lflow,
&& lcv->n_conjs
&& !lflow_conj_ids_alloc_specified(l_ctx_out->conj_ids,
&lflow->header_.uuid,
@@ -582,7 +680,7 @@ index 933e2f3cc..489347dae 100644
lcv->conj_id_ofs, lcv->n_conjs)) {
/* This should happen very rarely. */
VLOG_DBG("lflow "UUID_FMT" match cached with conjunctions, but the"
-@@ -915,6 +916,7 @@ consider_logical_flow__(const struct sbrec_logical_flow *lflow,
+@@ -915,6 +934,7 @@ consider_logical_flow__(const struct sbrec_logical_flow *lflow,
if (n_conjs) {
start_conj_id = lflow_conj_ids_alloc(l_ctx_out->conj_ids,
&lflow->header_.uuid,
@@ -590,8 +688,154 @@ index 933e2f3cc..489347dae 100644
n_conjs);
if (!start_conj_id) {
VLOG_ERR("32-bit conjunction ids exhausted!");
+@@ -1302,6 +1322,7 @@ add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb,
+ struct ovn_lb_vip *lb_vip,
+ struct ovn_lb_backend *lb_backend,
+ uint8_t lb_proto,
++ bool use_ct_mark,
+ struct ovn_desired_flow_table *flow_table)
+ {
+ uint64_t stub[1024 / 8];
+@@ -1392,15 +1413,30 @@ add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb,
+ * - packets must have ip.src == ip.dst at this point.
+ * - the destination protocol and port must be of a valid backend that
+ * has the same IP as ip.dst.
++ *
++ * During upgrades logical flows might still use the old way of storing
++ * ct.natted in ct_label. For backwards compatibility, only use ct_mark
++ * if ovn-northd notified ovn-controller to do that.
+ */
+- ovs_u128 lb_ct_label = {
+- .u64.lo = OVN_CT_NATTED,
+- };
+- match_set_ct_label_masked(&hairpin_match, lb_ct_label, lb_ct_label);
++ if (use_ct_mark) {
++ uint32_t lb_ct_mark = OVN_CT_NATTED;
++ match_set_ct_mark_masked(&hairpin_match, lb_ct_mark, lb_ct_mark);
++
++ ofctrl_add_flow(flow_table, OFTABLE_CHK_LB_HAIRPIN, 100,
++ lb->slb->header_.uuid.parts[0], &hairpin_match,
++ &ofpacts, &lb->slb->header_.uuid);
++ } else {
++ match_set_ct_mark_masked(&hairpin_match, 0, 0);
++ ovs_u128 lb_ct_label = {
++ .u64.lo = OVN_CT_NATTED,
++ };
++ match_set_ct_label_masked(&hairpin_match, lb_ct_label, lb_ct_label);
++
++ ofctrl_add_flow(flow_table, OFTABLE_CHK_LB_HAIRPIN, 100,
++ lb->slb->header_.uuid.parts[0], &hairpin_match,
++ &ofpacts, &lb->slb->header_.uuid);
++ }
+
+- ofctrl_add_flow(flow_table, OFTABLE_CHK_LB_HAIRPIN, 100,
+- lb->slb->header_.uuid.parts[0], &hairpin_match,
+- &ofpacts, &lb->slb->header_.uuid);
+ ofpbuf_uninit(&ofpacts);
+ }
+
+@@ -1673,6 +1709,7 @@ add_lb_ct_snat_hairpin_flows(struct ovn_controller_lb *lb,
+ static void
+ consider_lb_hairpin_flows(const struct sbrec_load_balancer *sbrec_lb,
+ const struct hmap *local_datapaths,
++ bool use_ct_mark,
+ struct ovn_desired_flow_table *flow_table,
+ struct simap *ids)
+ {
+@@ -1712,7 +1749,7 @@ consider_lb_hairpin_flows(const struct sbrec_load_balancer *sbrec_lb,
+ struct ovn_lb_backend *lb_backend = &lb_vip->backends[j];
+
+ add_lb_vip_hairpin_flows(lb, lb_vip, lb_backend, lb_proto,
+- flow_table);
++ use_ct_mark, flow_table);
+ }
+ }
+
+@@ -1725,7 +1762,7 @@ consider_lb_hairpin_flows(const struct sbrec_load_balancer *sbrec_lb,
+ * backends to handle the load balanced hairpin traffic. */
+ static void
+ add_lb_hairpin_flows(const struct sbrec_load_balancer_table *lb_table,
+- const struct hmap *local_datapaths,
++ const struct hmap *local_datapaths, bool use_ct_mark,
+ struct ovn_desired_flow_table *flow_table,
+ struct simap *ids,
+ struct id_pool *pool)
+@@ -1748,7 +1785,8 @@ add_lb_hairpin_flows(const struct sbrec_load_balancer_table *lb_table,
+ ovs_assert(id_pool_alloc_id(pool, &id));
+ simap_put(ids, lb->name, id);
+ }
+- consider_lb_hairpin_flows(lb, local_datapaths, flow_table, ids);
++ consider_lb_hairpin_flows(lb, local_datapaths, use_ct_mark,
++ flow_table, ids);
+ }
+ }
+
+@@ -1854,6 +1892,7 @@ lflow_run(struct lflow_ctx_in *l_ctx_in, struct lflow_ctx_out *l_ctx_out)
+ l_ctx_in->mac_binding_table, l_ctx_in->local_datapaths,
+ l_ctx_out->flow_table);
+ add_lb_hairpin_flows(l_ctx_in->lb_table, l_ctx_in->local_datapaths,
++ l_ctx_in->lb_hairpin_use_ct_mark,
+ l_ctx_out->flow_table,
+ l_ctx_out->hairpin_lb_ids,
+ l_ctx_out->hairpin_id_pool);
+@@ -1967,6 +2006,18 @@ lflow_add_flows_for_datapath(const struct sbrec_datapath_binding *dp,
+ }
+ sbrec_fdb_index_destroy_row(fdb_index_row);
+
++ struct sbrec_mac_binding *mb_index_row = sbrec_mac_binding_index_init_row(
++ l_ctx_in->sbrec_mac_binding_by_datapath);
++ sbrec_mac_binding_index_set_datapath(mb_index_row, dp);
++ const struct sbrec_mac_binding *mb;
++ SBREC_MAC_BINDING_FOR_EACH_EQUAL (
++ mb, mb_index_row, l_ctx_in->sbrec_mac_binding_by_datapath) {
++ consider_neighbor_flow(l_ctx_in->sbrec_port_binding_by_name,
++ l_ctx_in->local_datapaths,
++ mb, l_ctx_out->flow_table);
++ }
++ sbrec_mac_binding_index_destroy_row(mb_index_row);
++
+ dhcp_opts_destroy(&dhcp_opts);
+ dhcp_opts_destroy(&dhcpv6_opts);
+ nd_ra_opts_destroy(&nd_ra_opts);
+@@ -1976,6 +2027,7 @@ lflow_add_flows_for_datapath(const struct sbrec_datapath_binding *dp,
+ * associated. */
+ for (size_t i = 0; i < n_dp_lbs; i++) {
+ consider_lb_hairpin_flows(dp_lbs[i], l_ctx_in->local_datapaths,
++ l_ctx_in->lb_hairpin_use_ct_mark,
+ l_ctx_out->flow_table,
+ l_ctx_out->hairpin_lb_ids);
+ }
+@@ -2089,6 +2141,7 @@ lflow_handle_changed_lbs(struct lflow_ctx_in *l_ctx_in,
+ VLOG_DBG("Add load balancer hairpin flows for "UUID_FMT,
+ UUID_ARGS(&lb->header_.uuid));
+ consider_lb_hairpin_flows(lb, l_ctx_in->local_datapaths,
++ l_ctx_in->lb_hairpin_use_ct_mark,
+ l_ctx_out->flow_table,
+ l_ctx_out->hairpin_lb_ids);
+ }
+diff --git a/controller/lflow.h b/controller/lflow.h
+index 489dd70fb..569eecedc 100644
+--- a/controller/lflow.h
++++ b/controller/lflow.h
+@@ -133,6 +133,8 @@ struct lflow_ctx_in {
+ struct ovsdb_idl_index *sbrec_logical_flow_by_logical_dp_group;
+ struct ovsdb_idl_index *sbrec_port_binding_by_name;
+ struct ovsdb_idl_index *sbrec_fdb_by_dp_key;
++ struct ovsdb_idl_index *sbrec_mac_binding_by_datapath;
++ struct ovsdb_idl_index *sbrec_static_mac_binding_by_datapath;
+ const struct sbrec_port_binding_table *port_binding_table;
+ const struct sbrec_dhcp_options_table *dhcp_options_table;
+ const struct sbrec_dhcpv6_options_table *dhcpv6_options_table;
+@@ -150,6 +152,7 @@ struct lflow_ctx_in {
+ const struct sset *active_tunnels;
+ const struct sset *related_lport_ids;
+ const struct hmap *chassis_tunnels;
++ bool lb_hairpin_use_ct_mark;
+ };
+
+ struct lflow_ctx_out {
diff --git a/controller/ofctrl.c b/controller/ofctrl.c
-index 08fcfed8b..8d958faf1 100644
+index 08fcfed8b..c0a04ec50 100644
--- a/controller/ofctrl.c
+++ b/controller/ofctrl.c
@@ -335,6 +335,22 @@ static struct ovn_extend_table *groups;
@@ -641,7 +885,21 @@ index 08fcfed8b..8d958faf1 100644
}
uint64_t
-@@ -1802,26 +1821,14 @@ add_meter_string(struct ovn_extend_table_info *m_desired,
+@@ -899,7 +918,12 @@ link_installed_to_desired(struct installed_flow *i, struct desired_flow *d)
+ break;
+ }
+ }
+- ovs_list_insert(&f->installed_ref_list_node, &d->installed_ref_list_node);
++ if (!f) {
++ ovs_list_insert(&i->desired_refs, &d->installed_ref_list_node);
++ } else {
++ ovs_list_insert(&f->installed_ref_list_node,
++ &d->installed_ref_list_node);
++ }
+ d->installed_flow = i;
+ return installed_flow_get_active(i) == d;
+ }
+@@ -1802,26 +1826,14 @@ add_meter_string(struct ovn_extend_table_info *m_desired,
}
static void
@@ -673,7 +931,7 @@ index 08fcfed8b..8d958faf1 100644
mm.meter.flags = OFPMF13_STATS;
if (!strcmp(sb_meter->unit, "pktps")) {
-@@ -1854,6 +1861,152 @@ add_meter(struct ovn_extend_table_info *m_desired,
+@@ -1854,6 +1866,152 @@ add_meter(struct ovn_extend_table_info *m_desired,
free(mm.meter.bands);
}
@@ -826,7 +1084,7 @@ index 08fcfed8b..8d958faf1 100644
static void
installed_flow_add(struct ovn_flow *d,
struct ofputil_bundle_ctrl_msg *bc,
-@@ -1994,7 +2147,20 @@ deleted_flow_lookup(struct hmap *deleted_flows, struct ovn_flow *target)
+@@ -1994,7 +2152,20 @@ deleted_flow_lookup(struct hmap *deleted_flows, struct ovn_flow *target)
&& f->cookie == target->cookie
&& ofpacts_equal(f->ofpacts, f->ofpacts_len, target->ofpacts,
target->ofpacts_len)) {
@@ -848,7 +1106,7 @@ index 08fcfed8b..8d958faf1 100644
}
}
return NULL;
-@@ -2023,10 +2189,6 @@ merge_tracked_flows(struct ovn_desired_flow_table *flow_table)
+@@ -2023,10 +2194,6 @@ merge_tracked_flows(struct ovn_desired_flow_table *flow_table)
continue;
}
@@ -859,7 +1117,7 @@ index 08fcfed8b..8d958faf1 100644
if (!f->installed_flow) {
/* f is not installed yet. */
replace_installed_to_desired(del_f->installed_flow, del_f, f);
-@@ -2232,13 +2394,19 @@ ofctrl_put(struct ovn_desired_flow_table *lflow_table,
+@@ -2232,13 +2399,19 @@ ofctrl_put(struct ovn_desired_flow_table *lflow_table,
/* Iterate through all the desired meters. If there are new ones,
* add them to the switch. */
struct ovn_extend_table_info *m_desired;
@@ -885,7 +1143,7 @@ index 08fcfed8b..8d958faf1 100644
}
}
-@@ -2328,12 +2496,14 @@ ofctrl_put(struct ovn_desired_flow_table *lflow_table,
+@@ -2328,12 +2501,14 @@ ofctrl_put(struct ovn_desired_flow_table *lflow_table,
struct ovn_extend_table_info *m_installed, *next_meter;
EXTEND_TABLE_FOR_EACH_INSTALLED (m_installed, next_meter, meters) {
/* Delete the meter. */
@@ -907,10 +1165,52 @@ index 08fcfed8b..8d958faf1 100644
}
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
-index 5069aedfc..f85af9353 100644
+index 5069aedfc..f5d749a2f 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
-@@ -962,7 +962,8 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
+@@ -131,6 +131,9 @@ static const char *ssl_ca_cert_file;
+ #define DEFAULT_LFLOW_CACHE_WMARK_PERC 50
+ #define DEFAULT_LFLOW_CACHE_TRIM_TO_MS 30000
+
++/* SB Global options defaults. */
++#define DEFAULT_SB_GLOBAL_LB_HAIRPIN_USE_CT_MARK false
++
+ struct controller_engine_ctx {
+ struct lflow_cache *lflow_cache;
+ struct if_status_mgr *if_mgr;
+@@ -813,13 +816,18 @@ restore_ct_zones(const struct ovsrec_bridge_table *bridge_table,
+ }
+
+ const char *user = node->key + 8;
+- int zone = atoi(node->value);
++ if (!user[0]) {
++ continue;
++ }
+
+- if (user[0] && zone) {
+- VLOG_DBG("restoring ct zone %"PRId32" for '%s'", zone, user);
+- bitmap_set1(ct_zone_bitmap, zone);
+- simap_put(ct_zones, user, zone);
++ unsigned int zone;
++ if (!str_to_uint(node->value, 10, &zone)) {
++ continue;
+ }
++
++ VLOG_DBG("restoring ct zone %"PRId32" for '%s'", zone, user);
++ bitmap_set1(ct_zone_bitmap, zone);
++ simap_put(ct_zones, user, zone);
+ }
+ }
+
+@@ -948,6 +956,7 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
+ }
+
+ #define SB_NODES \
++ SB_NODE(sb_global, "sb_global") \
+ SB_NODE(chassis, "chassis") \
+ SB_NODE(encap, "encap") \
+ SB_NODE(address_set, "address_set") \
+@@ -962,7 +971,8 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
SB_NODE(dhcpv6_options, "dhcpv6_options") \
SB_NODE(dns, "dns") \
SB_NODE(load_balancer, "load_balancer") \
@@ -920,7 +1220,109 @@ index 5069aedfc..f85af9353 100644
enum sb_engine_node {
#define SB_NODE(NAME, NAME_STR) SB_##NAME,
-@@ -2713,6 +2714,26 @@ lflow_output_sb_fdb_handler(struct engine_node *node, void *data)
+@@ -2155,6 +2165,63 @@ non_vif_data_ovs_iface_handler(struct engine_node *node, void *data OVS_UNUSED)
+ return local_nonvif_data_handle_ovs_iface_changes(iface_table);
+ }
+
++struct ed_type_northd_options {
++ bool lb_hairpin_use_ct_mark;
++};
++
++
++static void *
++en_northd_options_init(struct engine_node *node OVS_UNUSED,
++ struct engine_arg *arg OVS_UNUSED)
++{
++ struct ed_type_northd_options *n_opts = xzalloc(sizeof *n_opts);
++ return n_opts;
++}
++
++static void
++en_northd_options_cleanup(void *data OVS_UNUSED)
++{
++}
++
++static void
++en_northd_options_run(struct engine_node *node, void *data)
++{
++ struct ed_type_northd_options *n_opts = data;
++ const struct sbrec_sb_global_table *sb_global_table =
++ EN_OVSDB_GET(engine_get_input("SB_sb_global", node));
++ const struct sbrec_sb_global *sb_global =
++ sbrec_sb_global_table_first(sb_global_table);
++
++ n_opts->lb_hairpin_use_ct_mark =
++ sb_global
++ ? smap_get_bool(&sb_global->options, "lb_hairpin_use_ct_mark",
++ DEFAULT_SB_GLOBAL_LB_HAIRPIN_USE_CT_MARK)
++ : DEFAULT_SB_GLOBAL_LB_HAIRPIN_USE_CT_MARK;
++ engine_set_node_state(node, EN_UPDATED);
++}
++
++static bool
++en_northd_options_sb_sb_global_handler(struct engine_node *node, void *data)
++{
++ struct ed_type_northd_options *n_opts = data;
++ const struct sbrec_sb_global_table *sb_global_table =
++ EN_OVSDB_GET(engine_get_input("SB_sb_global", node));
++ const struct sbrec_sb_global *sb_global =
++ sbrec_sb_global_table_first(sb_global_table);
++
++ bool lb_hairpin_use_ct_mark =
++ sb_global
++ ? smap_get_bool(&sb_global->options, "lb_hairpin_use_ct_mark",
++ DEFAULT_SB_GLOBAL_LB_HAIRPIN_USE_CT_MARK)
++ : DEFAULT_SB_GLOBAL_LB_HAIRPIN_USE_CT_MARK;
++
++ if (lb_hairpin_use_ct_mark != n_opts->lb_hairpin_use_ct_mark) {
++ n_opts->lb_hairpin_use_ct_mark = lb_hairpin_use_ct_mark;
++ engine_set_node_state(node, EN_UPDATED);
++ }
++ return true;
++}
++
+ struct lflow_output_persistent_data {
+ struct lflow_cache *lflow_cache;
+ };
+@@ -2217,6 +2284,11 @@ init_lflow_ctx(struct engine_node *node,
+ engine_get_input("SB_fdb", node),
+ "dp_key");
+
++ struct ovsdb_idl_index *sbrec_mac_binding_by_datapath =
++ engine_ovsdb_node_get_index(
++ engine_get_input("SB_mac_binding", node),
++ "datapath");
++
+ struct sbrec_port_binding_table *port_binding_table =
+ (struct sbrec_port_binding_table *)EN_OVSDB_GET(
+ engine_get_input("SB_port_binding", node));
+@@ -2277,6 +2349,9 @@ init_lflow_ctx(struct engine_node *node,
+ engine_get_input_data("port_groups", node);
+ struct shash *port_groups = &pg_data->port_groups_cs_local;
+
++ struct ed_type_northd_options *n_opts =
++ engine_get_input_data("northd_options", node);
++
+ l_ctx_in->sbrec_multicast_group_by_name_datapath =
+ sbrec_mc_group_by_name_dp;
+ l_ctx_in->sbrec_logical_flow_by_logical_datapath =
+@@ -2285,6 +2360,7 @@ init_lflow_ctx(struct engine_node *node,
+ sbrec_logical_flow_by_dp_group;
+ l_ctx_in->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
+ l_ctx_in->sbrec_fdb_by_dp_key = sbrec_fdb_by_dp_key;
++ l_ctx_in->sbrec_mac_binding_by_datapath = sbrec_mac_binding_by_datapath;
+ l_ctx_in->port_binding_table = port_binding_table;
+ l_ctx_in->dhcp_options_table = dhcp_table;
+ l_ctx_in->dhcpv6_options_table = dhcpv6_table;
+@@ -2301,6 +2377,7 @@ init_lflow_ctx(struct engine_node *node,
+ l_ctx_in->active_tunnels = &rt_data->active_tunnels;
+ l_ctx_in->related_lport_ids = &rt_data->related_lports.lport_ids;
+ l_ctx_in->chassis_tunnels = &non_vif_data->chassis_tunnels;
++ l_ctx_in->lb_hairpin_use_ct_mark = n_opts->lb_hairpin_use_ct_mark;
+
+ l_ctx_out->flow_table = &fo->flow_table;
+ l_ctx_out->group_table = &fo->group_table;
+@@ -2713,6 +2790,26 @@ lflow_output_sb_fdb_handler(struct engine_node *node, void *data)
return handled;
}
@@ -947,7 +1349,37 @@ index 5069aedfc..f85af9353 100644
struct ed_type_pflow_output {
/* Desired physical flows. */
struct ovn_desired_flow_table flow_table;
-@@ -3303,6 +3324,8 @@ main(int argc, char *argv[])
+@@ -3147,6 +3244,9 @@ main(int argc, char *argv[])
+ = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
+ &sbrec_fdb_col_mac,
+ &sbrec_fdb_col_dp_key);
++ struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
++ = ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
++ &sbrec_mac_binding_col_datapath);
+
+ ovsdb_idl_track_add_all(ovnsb_idl_loop.idl);
+ ovsdb_idl_omit_alert(ovnsb_idl_loop.idl,
+@@ -3217,6 +3317,7 @@ main(int argc, char *argv[])
+ ENGINE_NODE(flow_output, "flow_output");
+ ENGINE_NODE(addr_sets, "addr_sets");
+ ENGINE_NODE_WITH_CLEAR_TRACK_DATA(port_groups, "port_groups");
++ ENGINE_NODE(northd_options, "northd_options");
+
+ #define SB_NODE(NAME, NAME_STR) ENGINE_NODE_SB(NAME, NAME_STR);
+ SB_NODES
+@@ -3265,6 +3366,11 @@ main(int argc, char *argv[])
+ engine_add_input(&en_pflow_output, &en_ovs_open_vswitch, NULL);
+ engine_add_input(&en_pflow_output, &en_ovs_bridge, NULL);
+
++ engine_add_input(&en_northd_options, &en_sb_sb_global,
++ en_northd_options_sb_sb_global_handler);
++
++ engine_add_input(&en_lflow_output, &en_northd_options, NULL);
++
+ engine_add_input(&en_lflow_output, &en_addr_sets,
+ lflow_output_addr_sets_handler);
+ engine_add_input(&en_lflow_output, &en_port_groups,
+@@ -3303,6 +3409,8 @@ main(int argc, char *argv[])
lflow_output_sb_load_balancer_handler);
engine_add_input(&en_lflow_output, &en_sb_fdb,
lflow_output_sb_fdb_handler);
@@ -956,11 +1388,58 @@ index 5069aedfc..f85af9353 100644
engine_add_input(&en_ct_zones, &en_ovs_open_vswitch, NULL);
engine_add_input(&en_ct_zones, &en_ovs_bridge, NULL);
+@@ -3363,6 +3471,8 @@ main(int argc, char *argv[])
+ sbrec_datapath_binding_by_key);
+ engine_ovsdb_node_add_index(&en_sb_fdb, "dp_key",
+ sbrec_fdb_by_dp_key);
++ engine_ovsdb_node_add_index(&en_sb_mac_binding, "datapath",
++ sbrec_mac_binding_by_datapath);
+
+ struct ed_type_lflow_output *lflow_output_data =
+ engine_get_internal_data(&en_lflow_output);
diff --git a/controller/physical.c b/controller/physical.c
-index 836fc769a..aa651b876 100644
+index 836fc769a..2f0f87489 100644
--- a/controller/physical.c
+++ b/controller/physical.c
-@@ -1477,10 +1477,12 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
+@@ -421,6 +421,12 @@ populate_remote_chassis_macs(const struct sbrec_chassis *my_chassis,
+ char *save_ptr2 = NULL;
+ char *chassis_mac_bridge = strtok_r(token, ":", &save_ptr2);
+ char *chassis_mac_str = strtok_r(NULL, "", &save_ptr2);
++ if (!chassis_mac_str) {
++ VLOG_WARN("Parsing of ovn-chassis-mac-mappings failed for: "
++ "\"%s\", the correct format is \"br-name1:MAC1\".",
++ token);
++ continue;
++ }
+ struct remote_chassis_mac *remote_chassis_mac = NULL;
+ remote_chassis_mac = xmalloc(sizeof *remote_chassis_mac);
+ hmap_insert(&remote_chassis_macs, &remote_chassis_mac->hmap_node,
+@@ -1309,6 +1315,24 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
+ }
+ }
+
++ /* Table 37, priority 150.
++ * =======================
++ *
++ * Handles packets received from ports of type "localport". These
++ * ports are present on every hypervisor. Traffic that originates at
++ * one should never go over a tunnel to a remote hypervisor,
++ * so resubmit them to table 38 for local delivery. */
++ if (!strcmp(binding->type, "localport")) {
++ ofpbuf_clear(ofpacts_p);
++ put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts_p);
++ match_init_catchall(&match);
++ match_set_reg(&match, MFF_LOG_INPORT - MFF_REG0,
++ binding->tunnel_key);
++ match_set_metadata(&match, htonll(binding->datapath->tunnel_key));
++ ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 150,
++ binding->header_.uuid.parts[0], &match,
++ ofpacts_p, &binding->header_.uuid);
++ }
+ } else if (!tun && !is_ha_remote) {
+ /* Remote port connected by localnet port */
+ /* Table 38, priority 100.
+@@ -1477,10 +1501,12 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
&remote_ofpacts);
put_resubmit(OFTABLE_CHECK_LOOPBACK, &remote_ofpacts);
@@ -977,8 +1456,41 @@ index 836fc769a..aa651b876 100644
put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
put_resubmit(OFTABLE_CHECK_LOOPBACK, &ofpacts);
} else if (!strcmp(port->type, "chassisredirect")
+@@ -1824,32 +1850,6 @@ physical_run(struct physical_ctx *p_ctx,
+ ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 150, 0,
+ &match, &ofpacts, hc_uuid);
+
+- /* Table 37, priority 150.
+- * =======================
+- *
+- * Handles packets received from ports of type "localport". These ports
+- * are present on every hypervisor. Traffic that originates at one should
+- * never go over a tunnel to a remote hypervisor, so resubmit them to table
+- * 38 for local delivery. */
+- match_init_catchall(&match);
+- ofpbuf_clear(&ofpacts);
+- put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
+- const char *localport;
+- SSET_FOR_EACH (localport, p_ctx->local_lports) {
+- /* Iterate over all local logical ports and insert a drop
+- * rule with higher priority for every localport in this
+- * datapath. */
+- const struct sbrec_port_binding *pb = lport_lookup_by_name(
+- p_ctx->sbrec_port_binding_by_name, localport);
+- if (pb && !strcmp(pb->type, "localport")) {
+- match_set_reg(&match, MFF_LOG_INPORT - MFF_REG0, pb->tunnel_key);
+- match_set_metadata(&match, htonll(pb->datapath->tunnel_key));
+- ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 150,
+- pb->header_.uuid.parts[0],
+- &match, &ofpacts, hc_uuid);
+- }
+- }
+-
+ /* Table 37, Priority 0.
+ * =======================
+ *
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
-index f0667807e..2dd1bc7bd 100644
+index f0667807e..cb9cc096d 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -1624,12 +1624,8 @@ pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow,
@@ -1311,7 +1823,22 @@ index f0667807e..2dd1bc7bd 100644
}
static void
-@@ -4564,16 +4518,15 @@ pinctrl_compose_ipv4(struct dp_packet *packet, struct eth_addr eth_src,
+@@ -4051,12 +4005,14 @@ prepare_ipv6_ras(const struct shash *local_active_ports_ras,
+ void
+ pinctrl_wait(struct ovsdb_idl_txn *ovnsb_idl_txn)
+ {
++ ovs_mutex_lock(&pinctrl_mutex);
+ wait_put_mac_bindings(ovnsb_idl_txn);
+ wait_controller_event(ovnsb_idl_txn);
+ wait_put_vport_bindings(ovnsb_idl_txn);
+ int64_t new_seq = seq_read(pinctrl_main_seq);
+ seq_wait(pinctrl_main_seq, new_seq);
+ wait_put_fdbs(ovnsb_idl_txn);
++ ovs_mutex_unlock(&pinctrl_mutex);
+ }
+
+ /* Called by ovn-controller. */
+@@ -4564,16 +4520,15 @@ pinctrl_compose_ipv4(struct dp_packet *packet, struct eth_addr eth_src,
ovs_be32 ipv4_dst, uint8_t ip_proto, uint8_t ttl,
uint16_t ip_payload_len)
{
@@ -1336,7 +1863,7 @@ index f0667807e..2dd1bc7bd 100644
nh->ip_ihl_ver = IP_IHL_VER(5, 4);
nh->ip_tot_len = htons(sizeof *nh + ip_payload_len);
nh->ip_tos = IP_DSCP_CS6;
-@@ -4584,6 +4537,7 @@ pinctrl_compose_ipv4(struct dp_packet *packet, struct eth_addr eth_src,
+@@ -4584,6 +4539,7 @@ pinctrl_compose_ipv4(struct dp_packet *packet, struct eth_addr eth_src,
nh->ip_csum = 0;
nh->ip_csum = csum(nh, sizeof *nh);
@@ -1344,7 +1871,7 @@ index f0667807e..2dd1bc7bd 100644
}
static void
-@@ -4592,22 +4546,20 @@ pinctrl_compose_ipv6(struct dp_packet *packet, struct eth_addr eth_src,
+@@ -4592,22 +4548,20 @@ pinctrl_compose_ipv6(struct dp_packet *packet, struct eth_addr eth_src,
struct in6_addr *ipv6_dst, uint8_t ip_proto, uint8_t ttl,
uint16_t ip_payload_len)
{
@@ -1376,7 +1903,7 @@ index f0667807e..2dd1bc7bd 100644
packet_set_ipv6(packet, ipv6_src, ipv6_dst, 0, 0, ttl);
}
-@@ -5400,10 +5352,6 @@ ip_mcast_querier_send_igmp(struct rconn *swconn, struct ip_mcast_snoop *ip_ms)
+@@ -5400,10 +5354,6 @@ ip_mcast_querier_send_igmp(struct rconn *swconn, struct ip_mcast_snoop *ip_ms)
ip_ms->cfg.query_ipv4_dst,
IPPROTO_IGMP, 1, sizeof(struct igmpv3_query_header));
@@ -1387,7 +1914,7 @@ index f0667807e..2dd1bc7bd 100644
/* IGMP query max-response in tenths of seconds. */
uint8_t max_response = ip_ms->cfg.query_max_resp_s * 10;
uint8_t qqic = max_response;
-@@ -5449,15 +5397,10 @@ ip_mcast_querier_send_mld(struct rconn *swconn, struct ip_mcast_snoop *ip_ms)
+@@ -5449,15 +5399,10 @@ ip_mcast_querier_send_mld(struct rconn *swconn, struct ip_mcast_snoop *ip_ms)
IPPROTO_HOPOPTS, 1,
IPV6_EXT_HEADER_LEN + MLD_QUERY_HEADER_LEN);
@@ -1404,7 +1931,7 @@ index f0667807e..2dd1bc7bd 100644
/* MLD query max-response in milliseconds. */
uint16_t max_response = ip_ms->cfg.query_max_resp_s * 1000;
uint8_t qqic = ip_ms->cfg.query_max_resp_s;
-@@ -6033,6 +5976,8 @@ pinctrl_handle_put_nd_ra_opts(
+@@ -6033,6 +5978,8 @@ pinctrl_handle_put_nd_ra_opts(
struct dp_packet pkt_out;
dp_packet_init(&pkt_out, new_packet_size);
dp_packet_clear(&pkt_out);
@@ -1413,7 +1940,7 @@ index f0667807e..2dd1bc7bd 100644
dp_packet_prealloc_tailroom(&pkt_out, new_packet_size);
pkt_out_ptr = &pkt_out;
-@@ -6155,23 +6100,26 @@ wait_controller_event(struct ovsdb_idl_txn *ovnsb_idl_txn)
+@@ -6155,23 +6102,26 @@ wait_controller_event(struct ovsdb_idl_txn *ovnsb_idl_txn)
static bool
pinctrl_handle_empty_lb_backends_opts(struct ofpbuf *userdata)
{
@@ -1444,7 +1971,7 @@ index f0667807e..2dd1bc7bd 100644
case EMPTY_LB_VIP:
vip = xmemdup0(userdata_opt_data, size);
break;
-@@ -6820,8 +6768,6 @@ bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet,
+@@ -6820,8 +6770,6 @@ bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet,
{
int payload_len = sizeof(struct udp_header) + sizeof(struct bfd_msg);
@@ -1453,7 +1980,7 @@ index f0667807e..2dd1bc7bd 100644
if (IN6_IS_ADDR_V4MAPPED(&entry->ip_src)) {
ovs_be32 ip_src = in6_addr_get_mapped_ipv4(&entry->ip_src);
ovs_be32 ip_dst = in6_addr_get_mapped_ipv4(&entry->ip_dst);
-@@ -6833,13 +6779,13 @@ bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet,
+@@ -6833,13 +6781,13 @@ bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet,
MAXTTL, payload_len);
}
@@ -1469,7 +1996,7 @@ index f0667807e..2dd1bc7bd 100644
msg->vers_diag = (BFD_VERSION << 5);
msg->mult = entry->local_mult;
msg->length = BFD_PACKET_LEN;
-@@ -7383,7 +7329,7 @@ svc_monitor_send_tcp_health_check__(struct rconn *swconn,
+@@ -7383,7 +7331,7 @@ svc_monitor_send_tcp_health_check__(struct rconn *swconn,
ip4_src, in6_addr_get_mapped_ipv4(&svc_mon->ip),
IPPROTO_TCP, 63, TCP_HEADER_LEN);
@@ -1478,7 +2005,7 @@ index f0667807e..2dd1bc7bd 100644
dp_packet_set_l4(&packet, th);
th->tcp_dst = htons(svc_mon->proto_port);
th->tcp_src = tcp_src;
-@@ -7446,13 +7392,12 @@ svc_monitor_send_udp_health_check(struct rconn *swconn,
+@@ -7446,13 +7394,12 @@ svc_monitor_send_udp_health_check(struct rconn *swconn,
ip4_src, in6_addr_get_mapped_ipv4(&svc_mon->ip),
IPPROTO_UDP, 63, UDP_HEADER_LEN + 8);
@@ -1527,15 +2054,21 @@ index 55eb3c7b6..5d692f1d1 100644
UUID_ARGS(&uuid), start_conj_id, n_conjs,
ret ? "true" : "false");
diff --git a/debian/changelog b/debian/changelog
-index 0cc5f14ac..1a1c7364e 100644
+index 0cc5f14ac..abf5ab264 100644
--- a/debian/changelog
+++ b/debian/changelog
-@@ -1,3 +1,15 @@
+@@ -1,3 +1,21 @@
++OVN (21.12.3-1) unstable; urgency=low
++ [ OVN team ]
++ * New upstream version
++
++ -- OVN team Fri, 03 Jun 2022 11:53:58 -0400
++
+OVN (21.12.2-1) unstable; urgency=low
+ [ OVN team ]
+ * New upstream version
+
-+ -- OVN team Fri, 11 Mar 2022 13:22:24 -0500
++ -- OVN team Fri, 03 Jun 2022 11:53:58 -0400
+
+OVN (21.12.1-1) unstable; urgency=low
+ [ OVN team ]
@@ -1547,16 +2080,101 @@ index 0cc5f14ac..1a1c7364e 100644
* New upstream version
diff --git a/include/ovn/actions.h b/include/ovn/actions.h
-index cdef5fb03..0641b927e 100644
+index cdef5fb03..547797584 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
-@@ -807,5 +807,6 @@ void ovnacts_encode(const struct ovnact[], size_t ovnacts_len,
+@@ -59,6 +59,8 @@ struct ovn_extend_table;
+ OVNACT(NEXT, ovnact_next) \
+ OVNACT(LOAD, ovnact_load) \
+ OVNACT(MOVE, ovnact_move) \
++ OVNACT(PUSH, ovnact_push_pop) \
++ OVNACT(POP, ovnact_push_pop) \
+ OVNACT(EXCHANGE, ovnact_move) \
+ OVNACT(DEC_TTL, ovnact_null) \
+ OVNACT(CT_NEXT, ovnact_ct_next) \
+@@ -69,6 +71,7 @@ struct ovn_extend_table;
+ OVNACT(CT_DNAT_IN_CZONE, ovnact_ct_nat) \
+ OVNACT(CT_SNAT_IN_CZONE, ovnact_ct_nat) \
+ OVNACT(CT_LB, ovnact_ct_lb) \
++ OVNACT(CT_LB_MARK, ovnact_ct_lb) \
+ OVNACT(SELECT, ovnact_select) \
+ OVNACT(CT_CLEAR, ovnact_null) \
+ OVNACT(CLONE, ovnact_nest) \
+@@ -233,6 +236,12 @@ struct ovnact_move {
+ struct expr_field rhs;
+ };
+
++/* OVNACT_PUSH, OVNACT_POP. */
++struct ovnact_push_pop {
++ struct ovnact ovnact;
++ struct expr_field field;
++};
++
+ /* OVNACT_CT_NEXT. */
+ struct ovnact_ct_next {
+ struct ovnact ovnact;
+@@ -273,7 +282,7 @@ struct ovnact_ct_lb_dst {
+ uint16_t port;
+ };
+
+-/* OVNACT_CT_LB. */
++/* OVNACT_CT_LB/OVNACT_CT_LB_MARK. */
+ struct ovnact_ct_lb {
+ struct ovnact ovnact;
+ struct ovnact_ct_lb_dst *dsts;
+@@ -799,6 +808,8 @@ struct ovnact_encode_params {
+ * 'lookup_fdb' to resubmit. */
+ uint32_t ctrl_meter_id; /* Meter to be used if the resulting flow
+ sends packets to controller. */
++ uint32_t common_nat_ct_zone; /* When performing NAT in a common CT zone,
++ this determines which CT zone to use */
+ };
+
+ void ovnacts_encode(const struct ovnact[], size_t ovnacts_len,
+@@ -807,5 +818,6 @@ void ovnacts_encode(const struct ovnact[], size_t ovnacts_len,
void ovnacts_free(struct ovnact[], size_t ovnacts_len);
char *ovnact_op_to_string(uint32_t);
+int encode_ra_dnssl_opt(char *data, char *buf, int buf_len);
#endif /* ovn/actions.h */
+diff --git a/include/ovn/features.h b/include/ovn/features.h
+index d12a8eb0d..8fbdbf19a 100644
+--- a/include/ovn/features.h
++++ b/include/ovn/features.h
+@@ -21,7 +21,8 @@
+ #include "smap.h"
+
+ /* ovn-controller supported feature names. */
+-#define OVN_FEATURE_PORT_UP_NOTIF "port-up-notif"
++#define OVN_FEATURE_PORT_UP_NOTIF "port-up-notif"
++#define OVN_FEATURE_CT_NO_MASKED_LABEL "ct-no-masked-label"
+
+ /* OVS datapath supported features. Based on availability OVN might generate
+ * different types of openflows.
+diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
+index 2118f7933..18516634e 100644
+--- a/include/ovn/logical-fields.h
++++ b/include/ovn/logical-fields.h
+@@ -36,8 +36,6 @@ enum ovn_controller_event {
+ * (32 bits). */
+ #define MFF_LOG_SNAT_ZONE MFF_REG12 /* conntrack snat zone for gateway router
+ * (32 bits). */
+-#define MFF_LOG_NAT_ZONE MFF_LOG_DNAT_ZONE /* conntrack zone for both snat
+- * and dnat. */
+ #define MFF_LOG_CT_ZONE MFF_REG13 /* Logical conntrack zone for lports
+ * (32 bits). */
+ #define MFF_LOG_INPORT MFF_REG14 /* Logical input port (32 bits). */
+@@ -176,6 +174,9 @@ const struct ovn_field *ovn_field_from_name(const char *name);
+ #define OVN_CT_BLOCKED 1
+ #define OVN_CT_NATTED 2
+
++#define OVN_CT_ECMP_ETH_1ST_BIT 32
++#define OVN_CT_ECMP_ETH_END_BIT 79
++
+ #define OVN_CT_STR(LABEL_VALUE) OVS_STRINGIZE(LABEL_VALUE)
+ #define OVN_CT_MASKED_STR(LABEL_VALUE) \
+ OVS_STRINGIZE(LABEL_VALUE) "/" OVS_STRINGIZE(LABEL_VALUE)
diff --git a/lib/acl-log.c b/lib/acl-log.c
index 220b6dc30..9530dd763 100644
--- a/lib/acl-log.c
@@ -1598,7 +2216,7 @@ index 4f23f790d..da7fa2f02 100644
#endif /* lib/acl-log.h */
diff --git a/lib/actions.c b/lib/actions.c
-index da00ee349..c46772e78 100644
+index da00ee349..d5985b937 100644
--- a/lib/actions.c
+++ b/lib/actions.c
@@ -40,6 +40,7 @@
@@ -1609,7 +2227,196 @@ index da00ee349..c46772e78 100644
VLOG_DEFINE_THIS_MODULE(actions);
-@@ -1842,19 +1843,20 @@ encode_event_empty_lb_backends_opts(struct ofpbuf *ofpacts,
+@@ -570,6 +571,75 @@ ovnact_move_free(struct ovnact_move *move OVS_UNUSED)
+ {
+ }
+
++
++static void
++parse_push_pop(struct action_context *ctx, bool is_push)
++{
++ lexer_force_match(ctx->lexer, LEX_T_LPAREN);
++
++ struct expr_field f;
++ if (!expr_field_parse(ctx->lexer, ctx->pp->symtab, &f, &ctx->prereqs)) {
++ return;
++ }
++ size_t ofs = ctx->ovnacts->size;
++ char *error = expr_type_check(&f, f.n_bits, !is_push, ctx->scope);
++ if (error) {
++ ctx->ovnacts->size = ofs;
++ lexer_error(ctx->lexer, "%s", error);
++ free(error);
++ return;
++ }
++
++ lexer_force_match(ctx->lexer, LEX_T_RPAREN);
++
++ struct ovnact_push_pop *p;
++ if (is_push) {
++ p = ovnact_put_PUSH(ctx->ovnacts);
++ } else {
++ p = ovnact_put_POP(ctx->ovnacts);
++ }
++ p->field = f;
++}
++
++static void
++format_PUSH(const struct ovnact_push_pop *push, struct ds *s)
++{
++ ds_put_cstr(s, "push(");
++ expr_field_format(&push->field, s);
++ ds_put_cstr(s, ");");
++}
++
++static void
++encode_PUSH(const struct ovnact_push_pop *push,
++ const struct ovnact_encode_params *ep OVS_UNUSED,
++ struct ofpbuf *ofpacts)
++{
++ ofpact_put_STACK_PUSH(ofpacts)->subfield =
++ expr_resolve_field(&push->field);
++}
++
++static void
++format_POP(const struct ovnact_push_pop *pop, struct ds *s)
++{
++ ds_put_cstr(s, "pop(");
++ expr_field_format(&pop->field, s);
++ ds_put_cstr(s, ");");
++}
++
++static void
++encode_POP(const struct ovnact_push_pop *pop,
++ const struct ovnact_encode_params *ep OVS_UNUSED,
++ struct ofpbuf *ofpacts)
++{
++ ofpact_put_STACK_POP(ofpacts)->subfield =
++ expr_resolve_field(&pop->field);
++}
++
++static void
++ovnact_push_pop_free(struct ovnact_push_pop *push OVS_UNUSED)
++{
++}
++
+ static void
+ parse_DEC_TTL(struct action_context *ctx)
+ {
+@@ -1059,7 +1129,7 @@ encode_CT_DNAT_IN_CZONE(const struct ovnact_ct_nat *cn,
+ const struct ovnact_encode_params *ep,
+ struct ofpbuf *ofpacts)
+ {
+- encode_ct_nat(cn, ep, false, MFF_LOG_NAT_ZONE, ofpacts);
++ encode_ct_nat(cn, ep, false, ep->common_nat_ct_zone, ofpacts);
+ }
+
+ static void
+@@ -1067,7 +1137,7 @@ encode_CT_SNAT_IN_CZONE(const struct ovnact_ct_nat *cn,
+ const struct ovnact_encode_params *ep,
+ struct ofpbuf *ofpacts)
+ {
+- encode_ct_nat(cn, ep, true, MFF_LOG_NAT_ZONE, ofpacts);
++ encode_ct_nat(cn, ep, true, ep->common_nat_ct_zone, ofpacts);
+ }
+
+ static void
+@@ -1076,7 +1146,7 @@ ovnact_ct_nat_free(struct ovnact_ct_nat *ct_nat OVS_UNUSED)
+ }
+
+ static void
+-parse_ct_lb_action(struct action_context *ctx)
++parse_ct_lb_action(struct action_context *ctx, bool ct_lb_mark)
+ {
+ if (ctx->pp->cur_ltable >= ctx->pp->n_tables) {
+ lexer_error(ctx->lexer, "\"ct_lb\" action not allowed in last table.");
+@@ -1182,7 +1252,8 @@ parse_ct_lb_action(struct action_context *ctx)
+ }
+ }
+
+- struct ovnact_ct_lb *cl = ovnact_put_CT_LB(ctx->ovnacts);
++ struct ovnact_ct_lb *cl = ct_lb_mark ? ovnact_put_CT_LB_MARK(ctx->ovnacts)
++ : ovnact_put_CT_LB(ctx->ovnacts);
+ cl->ltable = ctx->pp->cur_ltable + 1;
+ cl->dsts = dsts;
+ cl->n_dsts = n_dsts;
+@@ -1190,9 +1261,13 @@ parse_ct_lb_action(struct action_context *ctx)
+ }
+
+ static void
+-format_CT_LB(const struct ovnact_ct_lb *cl, struct ds *s)
++format_ct_lb(const struct ovnact_ct_lb *cl, struct ds *s, bool ct_lb_mark)
+ {
+- ds_put_cstr(s, "ct_lb");
++ if (ct_lb_mark) {
++ ds_put_cstr(s, "ct_lb_mark");
++ } else {
++ ds_put_cstr(s, "ct_lb");
++ }
+ if (cl->n_dsts) {
+ ds_put_cstr(s, "(backends=");
+ for (size_t i = 0; i < cl->n_dsts; i++) {
+@@ -1228,9 +1303,22 @@ format_CT_LB(const struct ovnact_ct_lb *cl, struct ds *s)
+ }
+
+ static void
+-encode_CT_LB(const struct ovnact_ct_lb *cl,
++format_CT_LB(const struct ovnact_ct_lb *cl, struct ds *s)
++{
++ format_ct_lb(cl, s, false);
++}
++
++static void
++format_CT_LB_MARK(const struct ovnact_ct_lb *cl, struct ds *s)
++{
++ format_ct_lb(cl, s, true);
++}
++
++static void
++encode_ct_lb(const struct ovnact_ct_lb *cl,
+ const struct ovnact_encode_params *ep,
+- struct ofpbuf *ofpacts)
++ struct ofpbuf *ofpacts,
++ bool ct_lb_mark)
+ {
+ uint8_t recirc_table = cl->ltable + first_ptable(ep, ep->pipeline);
+ if (!cl->n_dsts) {
+@@ -1299,8 +1387,9 @@ encode_CT_LB(const struct ovnact_ct_lb *cl,
+ ds_put_format(&ds, "),commit,table=%d,zone=NXM_NX_REG%d[0..15],"
+ "exec(set_field:"
+ OVN_CT_MASKED_STR(OVN_CT_NATTED)
+- "->ct_label))",
+- recirc_table, zone_reg);
++ "->%s))",
++ recirc_table, zone_reg,
++ ct_lb_mark ? "ct_mark" : "ct_label");
+ }
+
+ table_id = ovn_extend_table_assign_id(ep->group_table, ds_cstr(&ds),
+@@ -1315,6 +1404,22 @@ encode_CT_LB(const struct ovnact_ct_lb *cl,
+ og->group_id = table_id;
+ }
+
++static void
++encode_CT_LB(const struct ovnact_ct_lb *cl,
++ const struct ovnact_encode_params *ep,
++ struct ofpbuf *ofpacts)
++{
++ encode_ct_lb(cl, ep, ofpacts, false);
++}
++
++static void
++encode_CT_LB_MARK(const struct ovnact_ct_lb *cl,
++ const struct ovnact_encode_params *ep,
++ struct ofpbuf *ofpacts)
++{
++ encode_ct_lb(cl, ep, ofpacts, true);
++}
++
+ static void
+ ovnact_ct_lb_free(struct ovnact_ct_lb *ct_lb)
+ {
+@@ -1842,19 +1947,20 @@ encode_event_empty_lb_backends_opts(struct ofpbuf *ofpacts,
{
for (const struct ovnact_gen_option *o = event->options;
o < &event->options[event->n_options]; o++) {
@@ -1642,7 +2449,16 @@ index da00ee349..c46772e78 100644
}
}
-@@ -2987,6 +2989,15 @@ parse_put_nd_ra_opts(struct action_context *ctx, const struct expr_field *dst,
+@@ -2330,7 +2436,7 @@ validate_empty_lb_backends(struct action_context *ctx,
+
+ switch (o->option->code) {
+ case EMPTY_LB_VIP:
+- if (!inet_parse_active(c->string, 0, &ss, false)) {
++ if (!inet_parse_active(c->string, 0, &ss, false, NULL)) {
+ lexer_error(ctx->lexer, "Invalid load balancer VIP '%s'",
+ c->string);
+ return;
+@@ -2987,6 +3093,15 @@ parse_put_nd_ra_opts(struct action_context *ctx, const struct expr_field *dst,
case ND_OPT_MTU:
ok = c->format == LEX_F_DECIMAL;
break;
@@ -1658,7 +2474,7 @@ index da00ee349..c46772e78 100644
}
if (!ok) {
-@@ -3017,6 +3028,109 @@ format_PUT_ND_RA_OPTS(const struct ovnact_put_opts *po,
+@@ -3017,6 +3132,109 @@ format_PUT_ND_RA_OPTS(const struct ovnact_put_opts *po,
format_put_opts("put_nd_ra_opts", po, s);
}
@@ -1768,7 +2584,7 @@ index da00ee349..c46772e78 100644
static void
encode_put_nd_ra_option(const struct ovnact_gen_option *o,
struct ofpbuf *ofpacts, ptrdiff_t ra_offset)
-@@ -3091,6 +3205,46 @@ encode_put_nd_ra_option(const struct ovnact_gen_option *o,
+@@ -3091,6 +3309,46 @@ encode_put_nd_ra_option(const struct ovnact_gen_option *o,
sizeof(ovs_be32[4]));
break;
}
@@ -1815,6 +2631,146 @@ index da00ee349..c46772e78 100644
}
}
+@@ -4044,6 +4302,10 @@ parse_action(struct action_context *ctx)
+ parse_set_action(ctx);
+ } else if (lexer_match_id(ctx->lexer, "next")) {
+ parse_NEXT(ctx);
++ } else if (lexer_match_id(ctx->lexer, "push")) {
++ parse_push_pop(ctx, true);
++ } else if (lexer_match_id(ctx->lexer, "pop")) {
++ parse_push_pop(ctx, false);
+ } else if (lexer_match_id(ctx->lexer, "output")) {
+ ovnact_put_OUTPUT(ctx->ovnacts);
+ } else if (lexer_match_id(ctx->lexer, "ip.ttl")) {
+@@ -4061,7 +4323,9 @@ parse_action(struct action_context *ctx)
+ } else if (lexer_match_id(ctx->lexer, "ct_snat_in_czone")) {
+ parse_CT_SNAT_IN_CZONE(ctx);
+ } else if (lexer_match_id(ctx->lexer, "ct_lb")) {
+- parse_ct_lb_action(ctx);
++ parse_ct_lb_action(ctx, false);
++ } else if (lexer_match_id(ctx->lexer, "ct_lb_mark")) {
++ parse_ct_lb_action(ctx, true);
+ } else if (lexer_match_id(ctx->lexer, "ct_clear")) {
+ ovnact_put_CT_CLEAR(ctx->ovnacts);
+ } else if (lexer_match_id(ctx->lexer, "clone")) {
+diff --git a/lib/expr.c b/lib/expr.c
+index e3f6bb892..30ed25c6e 100644
+--- a/lib/expr.c
++++ b/lib/expr.c
+@@ -203,16 +203,17 @@ expr_combine(enum expr_type type, struct expr *a, struct expr *b)
+ }
+
+ static void
+-expr_insert_andor(struct expr *andor, struct expr *before, struct expr *new)
++expr_insert_andor(struct expr *andor, struct ovs_list *before,
++ struct expr *new)
+ {
+ if (new->type == andor->type) {
+ if (andor->type == EXPR_T_AND) {
+ /* Conjunction junction, what's your function? */
+ }
+- ovs_list_splice(&before->node, new->andor.next, &new->andor);
+- free(new);
++ ovs_list_splice(before, new->andor.next, &new->andor);
++ expr_destroy(new);
+ } else {
+- ovs_list_insert(&before->node, &new->node);
++ ovs_list_insert(before, &new->node);
+ }
+ }
+
+@@ -1955,7 +1956,8 @@ expr_annotate__(struct expr *expr, const struct shash *symtab,
+ expr_destroy(expr);
+ return NULL;
+ }
+- expr_insert_andor(expr, next, new_sub);
++ expr_insert_andor(expr, next ? &next->node : &expr->andor,
++ new_sub);
+ }
+ *errorp = NULL;
+ return expr;
+@@ -2155,7 +2157,7 @@ expr_evaluate_condition(struct expr *expr,
+ struct expr *e = expr_evaluate_condition(sub, is_chassis_resident,
+ c_aux);
+ e = expr_fix(e);
+- expr_insert_andor(expr, next, e);
++ expr_insert_andor(expr, next ? &next->node : &expr->andor, e);
+ }
+ return expr_fix(expr);
+
+@@ -2188,7 +2190,8 @@ expr_simplify(struct expr *expr)
+ case EXPR_T_OR:
+ LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
+ ovs_list_remove(&sub->node);
+- expr_insert_andor(expr, next, expr_simplify(sub));
++ expr_insert_andor(expr, next ? &next->node : &expr->andor,
++ expr_simplify(sub));
+ }
+ return expr_fix(expr);
+
+@@ -2298,12 +2301,13 @@ crush_and_string(struct expr *expr, const struct expr_symbol *symbol)
+ * EXPR_T_OR with EXPR_T_CMP subexpressions. */
+ struct expr *sub, *next = NULL;
+ LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
++ struct ovs_list *next_list = next ? &next->node : &expr->andor;
+ ovs_list_remove(&sub->node);
+ struct expr *new = crush_cmps(sub, symbol);
+ switch (new->type) {
+ case EXPR_T_CMP:
+ if (!singleton) {
+- ovs_list_insert(&next->node, &new->node);
++ ovs_list_insert(next_list, &new->node);
+ singleton = new;
+ } else {
+ bool match = !strcmp(new->cmp.string, singleton->cmp.string);
+@@ -2317,7 +2321,7 @@ crush_and_string(struct expr *expr, const struct expr_symbol *symbol)
+ case EXPR_T_AND:
+ OVS_NOT_REACHED();
+ case EXPR_T_OR:
+- ovs_list_insert(&next->node, &new->node);
++ ovs_list_insert(next_list, &new->node);
+ break;
+ case EXPR_T_BOOLEAN:
+ if (!new->boolean) {
+@@ -2413,7 +2417,7 @@ crush_and_numeric(struct expr *expr, const struct expr_symbol *symbol)
+ case EXPR_T_AND:
+ OVS_NOT_REACHED();
+ case EXPR_T_OR:
+- ovs_list_insert(&next->node, &new->node);
++ ovs_list_insert(next ? &next->node : &expr->andor, &new->node);
+ break;
+ case EXPR_T_BOOLEAN:
+ if (!new->boolean) {
+@@ -2579,7 +2583,8 @@ crush_or(struct expr *expr, const struct expr_symbol *symbol)
+ * is now a disjunction of cmps over the same symbol. */
+ LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
+ ovs_list_remove(&sub->node);
+- expr_insert_andor(expr, next, crush_cmps(sub, symbol));
++ expr_insert_andor(expr, next ? &next->node : &expr->andor,
++ crush_cmps(sub, symbol));
+ }
+ expr = expr_fix(expr);
+ if (expr->type != EXPR_T_OR) {
+@@ -2737,8 +2742,7 @@ expr_normalize_and(struct expr *expr)
+
+ struct expr *a, *b;
+ LIST_FOR_EACH_SAFE (a, b, node, &expr->andor) {
+- if (&b->node == &expr->andor
+- || a->type != EXPR_T_CMP || b->type != EXPR_T_CMP
++ if (!b || a->type != EXPR_T_CMP || b->type != EXPR_T_CMP
+ || a->cmp.symbol != b->cmp.symbol) {
+ continue;
+ } else if (a->cmp.symbol->width
+@@ -2815,7 +2819,8 @@ expr_normalize_or(struct expr *expr)
+ }
+ free(new);
+ } else {
+- expr_insert_andor(expr, next, new);
++ expr_insert_andor(expr, next ? &next->node : &expr->andor,
++ new);
+ }
+ } else {
+ ovs_assert(sub->type == EXPR_T_CMP ||
diff --git a/lib/extend-table.c b/lib/extend-table.c
index c708e24b9..32d541b55 100644
--- a/lib/extend-table.c
@@ -1875,6 +2831,41 @@ index 2958a55e3..b098c5089 100644
if (!allowed) {
VLOG_DBG("node: %s, recompute (%s) aborted", node->name, reason);
+diff --git a/lib/logical-fields.c b/lib/logical-fields.c
+index 352a48c89..ed3ec62e1 100644
+--- a/lib/logical-fields.c
++++ b/lib/logical-fields.c
+@@ -133,6 +133,18 @@ ovn_init_symtab(struct shash *symtab)
+ /* Connection tracking state. */
+ expr_symtab_add_field_scoped(symtab, "ct_mark", MFF_CT_MARK, NULL, false,
+ WR_CT_COMMIT);
++ expr_symtab_add_subfield_scoped(symtab, "ct_mark.blocked", NULL,
++ "ct_mark["
++ OVN_CT_STR(OVN_CT_BLOCKED_BIT)
++ "]",
++ WR_CT_COMMIT);
++ expr_symtab_add_subfield_scoped(symtab, "ct_mark.natted", NULL,
++ "ct_mark["
++ OVN_CT_STR(OVN_CT_NATTED_BIT)
++ "]",
++ WR_CT_COMMIT);
++ expr_symtab_add_subfield_scoped(symtab, "ct_mark.ecmp_reply_port", NULL,
++ "ct_mark[16..31]", WR_CT_COMMIT);
+
+ expr_symtab_add_field_scoped(symtab, "ct_label", MFF_CT_LABEL, NULL,
+ false, WR_CT_COMMIT);
+@@ -147,7 +159,10 @@ ovn_init_symtab(struct shash *symtab)
+ "]",
+ WR_CT_COMMIT);
+ expr_symtab_add_subfield_scoped(symtab, "ct_label.ecmp_reply_eth", NULL,
+- "ct_label[32..79]", WR_CT_COMMIT);
++ "ct_label["
++ OVN_CT_STR(OVN_CT_ECMP_ETH_1ST_BIT) ".."
++ OVN_CT_STR(OVN_CT_ECMP_ETH_END_BIT) "]",
++ WR_CT_COMMIT);
+ expr_symtab_add_subfield_scoped(symtab, "ct_label.ecmp_reply_port", NULL,
+ "ct_label[80..95]", WR_CT_COMMIT);
+ expr_symtab_add_subfield_scoped(symtab, "ct_label.label", NULL,
diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h
index 9a33f5cda..49ecea81f 100644
--- a/lib/ovn-l7.h
@@ -1921,11 +2912,74 @@ index 56ceed8e8..7edc4c0b6 100644
nodes = ovs_numa_get_n_numas();
if (nodes == OVS_NUMA_UNSPEC || nodes <= 0) {
nodes = 1;
+diff --git a/lib/ovn-util.c b/lib/ovn-util.c
+index c3da413aa..ac63da5a2 100644
+--- a/lib/ovn-util.c
++++ b/lib/ovn-util.c
+@@ -747,7 +747,7 @@ ip_address_and_port_from_lb_key(const char *key, char **ip_address,
+ uint16_t *port, int *addr_family)
+ {
+ struct sockaddr_storage ss;
+- if (!inet_parse_active(key, 0, &ss, false)) {
++ if (!inet_parse_active(key, 0, &ss, false, NULL)) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "bad ip address or port for load balancer key %s",
+ key);
+@@ -766,8 +766,11 @@ ip_address_and_port_from_lb_key(const char *key, char **ip_address,
+ }
+
+ /* Increment this for any logical flow changes, if an existing OVN action is
+- * modified or a stage is added to a logical pipeline. */
+-#define OVN_INTERNAL_MINOR_VER 3
++ * modified or a stage is added to a logical pipeline.
++ *
++ * This value is also used to handle some backward compatibility during
++ * upgrading. It should never decrease or rewind. */
++#define OVN_INTERNAL_MINOR_VER 4
+
+ /* Returns the OVN version. The caller must free the returned value. */
+ char *
+@@ -778,6 +781,24 @@ ovn_get_internal_version(void)
+ N_OVNACTS, OVN_INTERNAL_MINOR_VER);
+ }
+
++unsigned int
++ovn_parse_internal_version_minor(const char *ver)
++{
++ const char *p = ver + strlen(ver);
++ for (int i = 0; i < strlen(ver); i++) {
++ if (*p == '.') {
++ break;
++ }
++ p--;
++ }
++
++ unsigned int minor;
++ if (ovs_scan(p, ".%u", &minor)) {
++ return minor;
++ }
++ return 0;
++}
++
+ #ifdef DDLOG
+ /* Callbacks used by the ddlog northd code to print warnings and errors. */
+ void
diff --git a/lib/ovn-util.h b/lib/ovn-util.h
-index a923c3b65..b212c64b7 100644
+index a923c3b65..a4f3187e3 100644
--- a/lib/ovn-util.h
+++ b/lib/ovn-util.h
-@@ -261,14 +261,16 @@ struct sctp_chunk_header {
+@@ -247,6 +247,10 @@ bool ip_address_and_port_from_lb_key(const char *key, char **ip_address,
+ * value. */
+ char *ovn_get_internal_version(void);
+
++/* Parse the provided internal version string and return the "minor" part which
++ * is expected to be an unsigned integer followed by the last "." in the
++ * string. Returns 0 if the string can't be parsed. */
++unsigned int ovn_parse_internal_version_minor(const char *ver);
+
+ /* OVN Packet definitions. These may eventually find a home in OVS's
+ * packets.h file. For the time being, they live here because OVN uses them
+@@ -261,14 +265,16 @@ struct sctp_chunk_header {
BUILD_ASSERT_DECL(SCTP_CHUNK_HEADER_LEN == sizeof(struct sctp_chunk_header));
#define SCTP_INIT_CHUNK_LEN 16
@@ -1947,8 +3001,20 @@ index a923c3b65..b212c64b7 100644
/* These are the only SCTP chunk types that OVN cares about.
* There is no need to define the other chunk types until they are
+diff --git a/northd/en-lflow.c b/northd/en-lflow.c
+index ffbdaf4e8..fa0dfcbe0 100644
+--- a/northd/en-lflow.c
++++ b/northd/en-lflow.c
+@@ -60,6 +60,7 @@ void en_lflow_run(struct engine_node *node, void *data OVS_UNUSED)
+ lflow_input.meter_groups = &northd_data->meter_groups;
+ lflow_input.lbs = &northd_data->lbs;
+ lflow_input.bfd_connections = &northd_data->bfd_connections;
++ lflow_input.features = &northd_data->features;
+ lflow_input.ovn_internal_version_changed =
+ northd_data->ovn_internal_version_changed;
+
diff --git a/northd/northd.c b/northd/northd.c
-index c714227b2..fbc29b554 100644
+index c714227b2..ac20ee849 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -112,18 +112,20 @@ enum ovn_stage {
@@ -1984,12 +3050,54 @@ index c714227b2..fbc29b554 100644
\
/* Logical switch egress stages. */ \
PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
-@@ -761,16 +763,6 @@ init_nat_entries(struct ovn_datapath *od)
- return;
- }
+@@ -236,6 +238,17 @@ enum ovn_stage {
+ /* Register used for setting a label for ACLs in a Logical Switch. */
+ #define REG_LABEL "reg3"
+
++/* Register used for temporarily store ECMP eth.src to avoid masked ct_label
++ * access. It doesn't really occupy registers because the content of the
++ * register is saved to stack and then restored in the same flow.
++ * Note: the bits must match ct_label.ecmp_reply_eth defined in
++ * logical-fields.c */
++#define REG_ECMP_ETH_FULL "xxreg1"
++#define REG_ECMP_ETH_FIELD REG_ECMP_ETH_FULL "[" \
++ OVN_CT_STR(OVN_CT_ECMP_ETH_1ST_BIT) \
++ ".." \
++ OVN_CT_STR(OVN_CT_ECMP_ETH_END_BIT) "]"
++
+ #define FLAGBIT_NOT_VXLAN "flags[1] == 0"
-- if (od->n_l3dgw_ports > 1) {
-- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+ /*
+@@ -376,6 +389,23 @@ ovn_stage_to_datapath_type(enum ovn_stage stage)
+ }
+ }
+
++static void
++build_chassis_features(const struct northd_input *input_data,
++ struct chassis_features *chassis_features)
++{
++ const struct sbrec_chassis *chassis;
++
++ SBREC_CHASSIS_TABLE_FOR_EACH (chassis, input_data->sbrec_chassis) {
++ if (!smap_get_bool(&chassis->other_config,
++ OVN_FEATURE_CT_NO_MASKED_LABEL,
++ false)) {
++ chassis_features->ct_no_masked_label = false;
++ return;
++ }
++ }
++ chassis_features->ct_no_masked_label = true;
++}
++
+ struct ovn_chassis_qdisc_queues {
+ struct hmap_node key_node;
+ uint32_t queue_id;
+@@ -761,16 +791,6 @@ init_nat_entries(struct ovn_datapath *od)
+ return;
+ }
+
+- if (od->n_l3dgw_ports > 1) {
+- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
- VLOG_WARN_RL(&rl, "NAT is configured on logical router %s, which has %"
- PRIuSIZE" distributed gateway ports. NAT is not supported"
- " yet when there is more than one distributed gateway "
@@ -2001,7 +3109,7 @@ index c714227b2..fbc29b554 100644
od->nat_entries = xmalloc(od->nbr->n_nat * sizeof *od->nat_entries);
for (size_t i = 0; i < od->nbr->n_nat; i++) {
-@@ -1641,13 +1633,13 @@ destroy_routable_addresses(struct ovn_port_routable_addresses *ra)
+@@ -1641,13 +1661,13 @@ destroy_routable_addresses(struct ovn_port_routable_addresses *ra)
}
static char **get_nat_addresses(const struct ovn_port *op, size_t *n,
@@ -2017,7 +3125,117 @@ index c714227b2..fbc29b554 100644
if (!nats) {
return;
-@@ -2711,7 +2703,8 @@ join_logical_ports(struct northd_input *input_data,
+@@ -1792,6 +1812,38 @@ lsp_is_router(const struct nbrec_logical_switch_port *nbsp)
+ return !strcmp(nbsp->type, "router");
+ }
+
++static bool
++lsp_is_type_changed(const struct sbrec_port_binding *sb,
++ const struct nbrec_logical_switch_port *nbsp,
++ bool *is_old_container_lport)
++{
++ *is_old_container_lport = false;
++ if (!sb || !nbsp) {
++ return false;
++ }
++
++ if (!sb->type[0] && !nbsp->type[0]) {
++ /* Two "VIF's" interface make sure both have parent_port
++ * set or both have parent_port unset, otherwisre they are
++ * different ports type.
++ */
++ if ((!sb->parent_port && nbsp->parent_name) ||
++ (sb->parent_port && !nbsp->parent_name)) {
++ *is_old_container_lport = true;
++ return true;
++ } else {
++ return false;
++ }
++ }
++
++ /* Both lports are not "VIF's" it is safe to use strcmp. */
++ if (sb->type[0] && nbsp->type[0]) {
++ return strcmp(sb->type, nbsp->type);
++ }
++
++ return true;
++}
++
+ static bool
+ lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
+ {
+@@ -2472,22 +2524,56 @@ join_logical_ports(struct northd_input *input_data,
+ VLOG_WARN_RL(&rl, "duplicate logical port %s", nbsp->name);
+ continue;
+ } else if (op && (!op->sb || op->sb->datapath == od->sb)) {
+- ovn_port_set_nb(op, nbsp, NULL);
+- ovs_list_remove(&op->list);
+-
+- uint32_t queue_id = smap_get_int(&op->sb->options,
+- "qdisc_queue_id", 0);
+- if (queue_id && op->sb->chassis) {
+- add_chassis_queue(
+- chassis_qdisc_queues, &op->sb->chassis->header_.uuid,
+- queue_id);
+- }
++ /*
++ * Handle cases where lport type was explicitly changed
++ * in the NBDB, in such cases:
++ * 1. remove the current sbrec of the affected lport from
++ * the port_binding table.
++ *
++ * 2. create a new sbrec with the same logical_port as the
++ * deleted lport and add it to the nb_only list which
++ * will make the northd handle this lport as a new
++ * created one and recompute everything that is needed
++ * for this lport.
++ *
++ * This change will affect container lport type changes
++ * only for now, this change is needed in container
++ * lport cases to avoid port type conflicts in the
++ * ovn-controller when the user clears the parent_port
++ * field in the container lport.
++ *
++ * This approach can be applied to all other lport types
++ * changes by removing the is_old_container_lport.
++ */
++ bool is_old_container_lport = false;
++ if (op->sb && lsp_is_type_changed(op->sb, nbsp,
++ &is_old_container_lport)
++ && is_old_container_lport) {
++ ovs_list_remove(&op->list);
++ sbrec_port_binding_delete(op->sb);
++ ovn_port_destroy(ports, op);
++ op = ovn_port_create(ports, nbsp->name, nbsp,
++ NULL, NULL);
++ ovs_list_push_back(nb_only, &op->list);
++ } else {
++ ovn_port_set_nb(op, nbsp, NULL);
++ ovs_list_remove(&op->list);
++
++ uint32_t queue_id = smap_get_int(&op->sb->options,
++ "qdisc_queue_id", 0);
++ if (queue_id && op->sb->chassis) {
++ add_chassis_queue(
++ chassis_qdisc_queues,
++ &op->sb->chassis->header_.uuid,
++ queue_id);
++ }
+
+- ovs_list_push_back(both, &op->list);
++ ovs_list_push_back(both, &op->list);
+
+- /* This port exists due to a SB binding, but should
+- * not have been initialized fully. */
+- ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs);
++ /* This port exists due to a SB binding, but should
++ * not have been initialized fully. */
++ ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs);
++ }
+ } else {
+ op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL);
+ ovs_list_push_back(nb_only, &op->list);
+@@ -2711,7 +2797,8 @@ join_logical_ports(struct northd_input *input_data,
* The caller must free each of the n returned strings with free(),
* and must free the returned array when it is no longer needed. */
static char **
@@ -2027,7 +3245,7 @@ index c714227b2..fbc29b554 100644
{
size_t n_nats = 0;
struct eth_addr mac;
-@@ -2791,24 +2784,26 @@ get_nat_addresses(const struct ovn_port *op, size_t *n, bool routable_only)
+@@ -2791,24 +2878,26 @@ get_nat_addresses(const struct ovn_port *op, size_t *n, bool routable_only)
}
}
@@ -2072,7 +3290,7 @@ index c714227b2..fbc29b554 100644
}
}
-@@ -3376,7 +3371,10 @@ ovn_port_update_sbrec(struct northd_input *input_data,
+@@ -3376,7 +3465,10 @@ ovn_port_update_sbrec(struct northd_input *input_data,
if (nat_addresses && !strcmp(nat_addresses, "router")) {
if (op->peer && op->peer->od
&& (chassis || op->peer->od->n_l3dgw_ports)) {
@@ -2084,7 +3302,33 @@ index c714227b2..fbc29b554 100644
}
/* Only accept manual specification of ethernet address
* followed by IPv4 addresses on type "l3gateway" ports. */
-@@ -3803,6 +3801,7 @@ build_ovn_lbs(struct northd_input *input_data,
+@@ -3661,12 +3753,13 @@ static bool
+ build_lb_vip_actions(struct ovn_lb_vip *lb_vip,
+ struct ovn_northd_lb_vip *lb_vip_nb,
+ struct ds *action, char *selection_fields,
+- bool ls_dp)
++ bool ls_dp, bool ct_lb_mark)
+ {
++ const char *ct_lb_action = ct_lb_mark ? "ct_lb_mark" : "ct_lb";
+ bool skip_hash_fields = false, reject = false;
+
+ if (lb_vip_nb->lb_health_check) {
+- ds_put_cstr(action, "ct_lb(backends=");
++ ds_put_format(action, "%s(backends=", ct_lb_action);
+
+ size_t n_active_backends = 0;
+ for (size_t i = 0; i < lb_vip->n_backends; i++) {
+@@ -3699,7 +3792,8 @@ build_lb_vip_actions(struct ovn_lb_vip *lb_vip,
+ } else if (lb_vip->empty_backend_rej && !lb_vip->n_backends) {
+ reject = true;
+ } else {
+- ds_put_format(action, "ct_lb(backends=%s);", lb_vip_nb->backend_ips);
++ ds_put_format(action, "%s(backends=%s);", ct_lb_action,
++ lb_vip_nb->backend_ips);
+ }
+
+ if (reject) {
+@@ -3803,6 +3897,7 @@ build_ovn_lbs(struct northd_input *input_data,
}
/* Delete any stale SB load balancer rows. */
@@ -2092,7 +3336,7 @@ index c714227b2..fbc29b554 100644
const struct sbrec_load_balancer *sbrec_lb, *next;
SBREC_LOAD_BALANCER_TABLE_FOR_EACH_SAFE (sbrec_lb, next,
input_data->sbrec_load_balancer_table) {
-@@ -3813,13 +3812,22 @@ build_ovn_lbs(struct northd_input *input_data,
+@@ -3813,13 +3908,22 @@ build_ovn_lbs(struct northd_input *input_data,
continue;
}
@@ -2118,7 +3362,145 @@ index c714227b2..fbc29b554 100644
/* Create SB Load balancer records if not present and sync
* the SB load balancer columns. */
-@@ -6122,7 +6130,7 @@ build_reject_acl_rules(struct ovn_datapath *od, struct hmap *lflows,
+@@ -5889,13 +5993,18 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
+ }
+
+ static void
+-build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
++build_pre_stateful(struct ovn_datapath *od,
++ const struct chassis_features *features,
++ struct hmap *lflows)
+ {
+ /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
+ * allowed by default. */
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
+
++ const char *ct_lb_action = features->ct_no_masked_label
++ ? "ct_lb_mark"
++ : "ct_lb";
+ const char *lb_protocols[] = {"tcp", "udp", "sctp"};
+ struct ds actions = DS_EMPTY_INITIALIZER;
+ struct ds match = DS_EMPTY_INITIALIZER;
+@@ -5906,8 +6015,8 @@ build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
+ ds_put_format(&match, REGBIT_CONNTRACK_NAT" == 1 && ip4 && %s",
+ lb_protocols[i]);
+ ds_put_format(&actions, REG_ORIG_DIP_IPV4 " = ip4.dst; "
+- REG_ORIG_TP_DPORT " = %s.dst; ct_lb;",
+- lb_protocols[i]);
++ REG_ORIG_TP_DPORT " = %s.dst; %s;",
++ lb_protocols[i], ct_lb_action);
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 120,
+ ds_cstr(&match), ds_cstr(&actions));
+
+@@ -5916,20 +6025,20 @@ build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
+ ds_put_format(&match, REGBIT_CONNTRACK_NAT" == 1 && ip6 && %s",
+ lb_protocols[i]);
+ ds_put_format(&actions, REG_ORIG_DIP_IPV6 " = ip6.dst; "
+- REG_ORIG_TP_DPORT " = %s.dst; ct_lb;",
+- lb_protocols[i]);
++ REG_ORIG_TP_DPORT " = %s.dst; %s;",
++ lb_protocols[i], ct_lb_action);
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 120,
+ ds_cstr(&match), ds_cstr(&actions));
+ }
+
+- ds_destroy(&actions);
+- ds_destroy(&match);
++ ds_clear(&actions);
++ ds_put_format(&actions, "%s;", ct_lb_action);
+
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 110,
+- REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
++ REGBIT_CONNTRACK_NAT" == 1", ds_cstr(&actions));
+
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 110,
+- REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
++ REGBIT_CONNTRACK_NAT" == 1", ds_cstr(&actions));
+
+ /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
+ * sent to conntrack for tracking and defragmentation. */
+@@ -5938,10 +6047,15 @@ build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
+
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
+ REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
++
++ ds_destroy(&actions);
++ ds_destroy(&match);
+ }
+
+ static void
+-build_acl_hints(struct ovn_datapath *od, struct hmap *lflows)
++build_acl_hints(struct ovn_datapath *od,
++ const struct chassis_features *features,
++ struct hmap *lflows)
+ {
+ /* This stage builds hints for the IN/OUT_ACL stage. Based on various
+ * combinations of ct flags packets may hit only a subset of the logical
+@@ -5963,6 +6077,7 @@ build_acl_hints(struct ovn_datapath *od, struct hmap *lflows)
+
+ for (size_t i = 0; i < ARRAY_SIZE(stages); i++) {
+ enum ovn_stage stage = stages[i];
++ const char *match;
+
+ /* In any case, advance to the next stage. */
+ if (!od->has_acls && !od->has_lb_vip) {
+@@ -5992,8 +6107,10 @@ build_acl_hints(struct ovn_datapath *od, struct hmap *lflows)
+ * REGBIT_ACL_HINT_ALLOW_NEW.
+ * - drop ACLs.
+ */
+- ovn_lflow_add(lflows, od, stage, 6,
+- "!ct.new && ct.est && !ct.rpl && ct_label.blocked == 1",
++ match = features->ct_no_masked_label
++ ? "!ct.new && ct.est && !ct.rpl && ct_mark.blocked == 1"
++ : "!ct.new && ct.est && !ct.rpl && ct_label.blocked == 1";
++ ovn_lflow_add(lflows, od, stage, 6, match,
+ REGBIT_ACL_HINT_ALLOW_NEW " = 1; "
+ REGBIT_ACL_HINT_DROP " = 1; "
+ "next;");
+@@ -6009,11 +6126,13 @@ build_acl_hints(struct ovn_datapath *od, struct hmap *lflows)
+ * - allow ACLs in which case the traffic should be allowed so we set
+ * REGBIT_ACL_HINT_ALLOW.
+ * - drop ACLs in which case the traffic should be blocked and the
+- * connection must be committed with ct_label.blocked set so we set
++ * connection must be committed with ct_mark.blocked set so we set
+ * REGBIT_ACL_HINT_BLOCK.
+ */
+- ovn_lflow_add(lflows, od, stage, 4,
+- "!ct.new && ct.est && !ct.rpl && ct_label.blocked == 0",
++ match = features->ct_no_masked_label
++ ? "!ct.new && ct.est && !ct.rpl && ct_mark.blocked == 0"
++ : "!ct.new && ct.est && !ct.rpl && ct_label.blocked == 0";
++ ovn_lflow_add(lflows, od, stage, 4, match,
+ REGBIT_ACL_HINT_ALLOW " = 1; "
+ REGBIT_ACL_HINT_BLOCK " = 1; "
+ "next;");
+@@ -6024,15 +6143,21 @@ build_acl_hints(struct ovn_datapath *od, struct hmap *lflows)
+ ovn_lflow_add(lflows, od, stage, 3, "!ct.est",
+ REGBIT_ACL_HINT_DROP " = 1; "
+ "next;");
+- ovn_lflow_add(lflows, od, stage, 2, "ct.est && ct_label.blocked == 1",
++ match = features->ct_no_masked_label
++ ? "ct.est && ct_mark.blocked == 1"
++ : "ct.est && ct_label.blocked == 1";
++ ovn_lflow_add(lflows, od, stage, 2, match,
+ REGBIT_ACL_HINT_DROP " = 1; "
+ "next;");
+
+ /* Established connections that were previously allowed might hit
+ * drop ACLs in which case the connection must be committed with
+- * ct_label.blocked set.
++ * ct_mark.blocked set.
+ */
+- ovn_lflow_add(lflows, od, stage, 1, "ct.est && ct_label.blocked == 0",
++ match = features->ct_no_masked_label
++ ? "ct.est && ct_mark.blocked == 0"
++ : "ct.est && ct_label.blocked == 0";
++ ovn_lflow_add(lflows, od, stage, 1, match,
+ REGBIT_ACL_HINT_BLOCK " = 1; "
+ "next;");
+ }
+@@ -6122,7 +6247,7 @@ build_reject_acl_rules(struct ovn_datapath *od, struct hmap *lflows,
{
struct ds match = DS_EMPTY_INITIALIZER;
struct ds actions = DS_EMPTY_INITIALIZER;
@@ -2127,9 +3509,18 @@ index c714227b2..fbc29b554 100644
char *next_action =
xasprintf("next(pipeline=%s,table=%d);",
-@@ -6163,7 +6171,15 @@ consider_acl(struct hmap *lflows, struct ovn_datapath *od,
+@@ -6158,12 +6283,23 @@ build_reject_acl_rules(struct ovn_datapath *od, struct hmap *lflows,
+
+ static void
+ consider_acl(struct hmap *lflows, struct ovn_datapath *od,
+- struct nbrec_acl *acl, bool has_stateful,
++ struct nbrec_acl *acl, bool has_stateful, bool ct_masked_mark,
+ const struct shash *meter_groups, struct ds *match,
struct ds *actions)
{
++ const char *ct_blocked_match = ct_masked_mark
++ ? "ct_mark.blocked"
++ : "ct_label.blocked";
bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
- enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
+ enum ovn_stage stage;
@@ -2144,7 +3535,40 @@ index c714227b2..fbc29b554 100644
if (!strcmp(acl->action, "allow-stateless")) {
ds_clear(actions);
-@@ -6342,6 +6358,72 @@ ovn_port_group_destroy(struct hmap *pgs, struct ovn_port_group *pg)
+@@ -6197,7 +6333,7 @@ consider_acl(struct hmap *lflows, struct ovn_datapath *od,
+ * It's also possible that a known connection was marked for
+ * deletion after a policy was deleted, but the policy was
+ * re-added while that connection is still known. We catch
+- * that case here and un-set ct_label.blocked (which will be done
++ * that case here and un-set ct_mark.blocked (which will be done
+ * by ct_commit in the "stateful" stage) to indicate that the
+ * connection should be allowed to resume.
+ */
+@@ -6267,11 +6403,11 @@ consider_acl(struct hmap *lflows, struct ovn_datapath *od,
+ ds_cstr(match), ds_cstr(actions),
+ &acl->header_);
+ }
+- /* For an existing connection without ct_label set, we've
++ /* For an existing connection without ct_mark.blocked set, we've
+ * encountered a policy change. ACLs previously allowed
+ * this connection and we committed the connection tracking
+ * entry. Current policy says that we should drop this
+- * connection. First, we set bit 0 of ct_label to indicate
++ * connection. First, we set ct_mark.blocked to indicate
+ * that this connection is set for deletion. By not
+ * specifying "next;", we implicitly drop the packet after
+ * updating conntrack state. We would normally defer
+@@ -6281,7 +6417,8 @@ consider_acl(struct hmap *lflows, struct ovn_datapath *od,
+ ds_clear(match);
+ ds_clear(actions);
+ ds_put_cstr(match, REGBIT_ACL_HINT_BLOCK " == 1");
+- ds_put_cstr(actions, "ct_commit { ct_label.blocked = 1; }; ");
++ ds_put_format(actions, "ct_commit { %s = 1; }; ",
++ ct_blocked_match);
+ if (!strcmp(acl->action, "reject")) {
+ build_reject_acl_rules(od, lflows, stage, acl, match,
+ actions, &acl->header_, meter_groups);
+@@ -6342,6 +6479,72 @@ ovn_port_group_destroy(struct hmap *pgs, struct ovn_port_group *pg)
}
}
@@ -2217,7 +3641,24 @@ index c714227b2..fbc29b554 100644
static void
build_port_group_lswitches(struct northd_input *input_data,
struct hmap *pgs,
-@@ -6405,6 +6487,8 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
+@@ -6383,10 +6586,14 @@ build_port_group_lswitches(struct northd_input *input_data,
+ }
+
+ static void
+-build_acls(struct ovn_datapath *od, struct hmap *lflows,
+- const struct hmap *port_groups, const struct shash *meter_groups)
++build_acls(struct ovn_datapath *od, const struct chassis_features *features,
++ struct hmap *lflows, const struct hmap *port_groups,
++ const struct shash *meter_groups)
+ {
+ bool has_stateful = od->has_stateful_acl || od->has_lb_vip;
++ const char *ct_blocked_match = features->ct_no_masked_label
++ ? "ct_mark.blocked"
++ : "ct_label.blocked";
+ struct ds match = DS_EMPTY_INITIALIZER;
+ struct ds actions = DS_EMPTY_INITIALIZER;
+
+@@ -6405,6 +6612,8 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
}
@@ -2226,9 +3667,67 @@ index c714227b2..fbc29b554 100644
if (has_stateful) {
/* Ingress and Egress ACL Table (Priority 1).
*
-@@ -6463,7 +6547,8 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
- "ct.rpl && ct_label.blocked == 0",
- use_ct_inv_match ? " && !ct.inv" : "");
+@@ -6420,30 +6629,34 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
+ * subsequent packets will hit the flow at priority 0 that just
+ * uses "next;"
+ *
+- * We also check for established connections that have ct_label.blocked
++ * We also check for established connections that have ct_mark.blocked
+ * set on them. That's a connection that was disallowed, but is
+ * now allowed by policy again since it hit this default-allow flow.
+- * We need to set ct_label.blocked=0 to let the connection continue,
++ * We need to set ct_mark.blocked=0 to let the connection continue,
+ * which will be done by ct_commit() in the "stateful" stage.
+ * Subsequent packets will hit the flow at priority 0 that just
+ * uses "next;". */
++ ds_clear(&match);
++ ds_put_format(&match, "ip && (!ct.est || (ct.est && %s == 1))",
++ ct_blocked_match);
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1,
+- "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
+- REGBIT_CONNTRACK_COMMIT" = 1; next;");
++ ds_cstr(&match),
++ REGBIT_CONNTRACK_COMMIT" = 1; next;");
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1,
+- "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
+- REGBIT_CONNTRACK_COMMIT" = 1; next;");
++ ds_cstr(&match),
++ REGBIT_CONNTRACK_COMMIT" = 1; next;");
+
+ /* Ingress and Egress ACL Table (Priority 65532).
+ *
+ * Always drop traffic that's in an invalid state. Also drop
+ * reply direction packets for connections that have been marked
+- * for deletion (bit 0 of ct_label is set).
++ * for deletion (ct_mark.blocked is set).
+ *
+ * This is enforced at a higher priority than ACLs can be defined. */
+ ds_clear(&match);
+- ds_put_format(&match, "%s(ct.est && ct.rpl && ct_label.blocked == 1)",
+- use_ct_inv_match ? "ct.inv || " : "");
++ ds_put_format(&match, "%s(ct.est && ct.rpl && %s == 1)",
++ use_ct_inv_match ? "ct.inv || " : "",
++ ct_blocked_match);
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX - 3,
+ ds_cstr(&match), "drop;");
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX - 3,
+@@ -6453,24 +6666,26 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
+ *
+ * Allow reply traffic that is part of an established
+ * conntrack entry that has not been marked for deletion
+- * (bit 0 of ct_label). We only match traffic in the
++ * (ct_mark.blocked). We only match traffic in the
+ * reply direction because we want traffic in the request
+ * direction to hit the currently defined policy from ACLs.
+ *
+ * This is enforced at a higher priority than ACLs can be defined. */
+ ds_clear(&match);
+ ds_put_format(&match, "ct.est && !ct.rel && !ct.new%s && "
+- "ct.rpl && ct_label.blocked == 0",
+- use_ct_inv_match ? " && !ct.inv" : "");
++ "ct.rpl && %s == 0",
++ use_ct_inv_match ? " && !ct.inv" : "",
++ ct_blocked_match);
ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX - 3,
- ds_cstr(&match), "next;");
+ ds_cstr(&match), REGBIT_ACL_HINT_DROP" = 0; "
@@ -2236,7 +3735,56 @@ index c714227b2..fbc29b554 100644
ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX - 3,
ds_cstr(&match), "next;");
-@@ -6670,6 +6755,10 @@ build_lb_rules(struct hmap *lflows, struct ovn_northd_lb *lb,
+ /* Ingress and Egress ACL Table (Priority 65535).
+ *
+ * Allow traffic that is related to an existing conntrack entry that
+- * has not been marked for deletion (bit 0 of ct_label).
++ * has not been marked for deletion (ct_mark.blocked).
+ *
+ * This is enforced at a higher priority than ACLs can be defined.
+ *
+@@ -6479,9 +6694,9 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
+ * related traffic such as an ICMP Port Unreachable through
+ * that's generated from a non-listening UDP port. */
+ ds_clear(&match);
+- ds_put_format(&match, "!ct.est && ct.rel && !ct.new%s && "
+- "ct_label.blocked == 0",
+- use_ct_inv_match ? " && !ct.inv" : "");
++ ds_put_format(&match, "!ct.est && ct.rel && !ct.new%s && %s == 0",
++ use_ct_inv_match ? " && !ct.inv" : "",
++ ct_blocked_match);
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX - 3,
+ ds_cstr(&match), "next;");
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX - 3,
+@@ -6499,14 +6714,16 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
+ /* Ingress or Egress ACL Table (Various priorities). */
+ for (size_t i = 0; i < od->nbs->n_acls; i++) {
+ struct nbrec_acl *acl = od->nbs->acls[i];
+- consider_acl(lflows, od, acl, has_stateful, meter_groups, &match,
+- &actions);
++ consider_acl(lflows, od, acl, has_stateful,
++ features->ct_no_masked_label,
++ meter_groups, &match, &actions);
+ }
+ struct ovn_port_group *pg;
+ HMAP_FOR_EACH (pg, key_node, port_groups) {
+ if (ovn_port_group_ls_find(pg, &od->nbs->header_.uuid)) {
+ for (size_t i = 0; i < pg->nb_pg->n_acls; i++) {
+ consider_acl(lflows, od, pg->nb_pg->acls[i], has_stateful,
++ features->ct_no_masked_label,
+ meter_groups, &match, &actions);
+ }
+ }
+@@ -6658,7 +6875,7 @@ build_qos(struct ovn_datapath *od, struct hmap *lflows) {
+ }
+
+ static void
+-build_lb_rules(struct hmap *lflows, struct ovn_northd_lb *lb,
++build_lb_rules(struct hmap *lflows, struct ovn_northd_lb *lb, bool ct_lb_mark,
+ struct ds *match, struct ds *action,
+ const struct shash *meter_groups)
+ {
+@@ -6670,6 +6887,10 @@ build_lb_rules(struct hmap *lflows, struct ovn_northd_lb *lb,
ds_clear(action);
ds_clear(match);
@@ -2247,7 +3795,17 @@ index c714227b2..fbc29b554 100644
/* Store the original destination IP to be used when generating
* hairpin flows.
*/
-@@ -6716,8 +6805,8 @@ build_lb_rules(struct hmap *lflows, struct ovn_northd_lb *lb,
+@@ -6704,7 +6925,8 @@ build_lb_rules(struct hmap *lflows, struct ovn_northd_lb *lb,
+ /* New connections in Ingress table. */
+ const char *meter = NULL;
+ bool reject = build_lb_vip_actions(lb_vip, lb_vip_nb, action,
+- lb->selection_fields, true);
++ lb->selection_fields, true,
++ ct_lb_mark);
+
+ ds_put_format(match, "ct.new && %s.dst == %s", ip_match,
+ lb_vip->vip_str);
+@@ -6716,8 +6938,8 @@ build_lb_rules(struct hmap *lflows, struct ovn_northd_lb *lb,
struct ovn_lflow *lflow_ref = NULL;
uint32_t hash = ovn_logical_flow_hash(
@@ -2258,27 +3816,90 @@ index c714227b2..fbc29b554 100644
ds_cstr(match), ds_cstr(action));
for (size_t j = 0; j < lb->n_nb_ls; j++) {
-@@ -6730,7 +6819,7 @@ build_lb_rules(struct hmap *lflows, struct ovn_northd_lb *lb,
- continue;
+@@ -6726,23 +6948,32 @@ build_lb_rules(struct hmap *lflows, struct ovn_northd_lb *lb,
+ if (reject) {
+ meter = copp_meter_get(COPP_REJECT, od->nbs->copp,
+ meter_groups);
+- } else if (ovn_dp_group_add_with_reference(lflow_ref, od)) {
+- continue;
}
- lflow_ref = ovn_lflow_add_at_with_hash(lflows, od,
+- lflow_ref = ovn_lflow_add_at_with_hash(lflows, od,
- S_SWITCH_IN_STATEFUL, priority,
-+ S_SWITCH_IN_LB, priority,
- ds_cstr(match), ds_cstr(action),
- NULL, meter, &lb->nlb->header_,
- OVS_SOURCE_LOCATOR, hash);
-@@ -6741,8 +6830,9 @@ build_lb_rules(struct hmap *lflows, struct ovn_northd_lb *lb,
+- ds_cstr(match), ds_cstr(action),
+- NULL, meter, &lb->nlb->header_,
+- OVS_SOURCE_LOCATOR, hash);
++ if (meter || !ovn_dp_group_add_with_reference(lflow_ref, od)) {
++ struct ovn_lflow *lflow = ovn_lflow_add_at_with_hash(
++ lflows, od, S_SWITCH_IN_LB, priority,
++ ds_cstr(match), ds_cstr(action),
++ NULL, meter, &lb->nlb->header_,
++ OVS_SOURCE_LOCATOR, hash);
++ lflow_ref = meter ? NULL : lflow;
++ }
+ }
+ }
+ }
+
static void
- build_stateful(struct ovn_datapath *od, struct hmap *lflows)
+-build_stateful(struct ovn_datapath *od, struct hmap *lflows)
++build_stateful(struct ovn_datapath *od,
++ const struct chassis_features *features,
++ struct hmap *lflows)
{
- /* Ingress and Egress stateful Table (Priority 0): Packets are
++ const char *ct_block_action = features->ct_no_masked_label
++ ? "ct_mark.blocked"
++ : "ct_label.blocked";
++ struct ds actions = DS_EMPTY_INITIALIZER;
++
+ /* Ingress LB, Ingress and Egress stateful Table (Priority 0): Packets are
* allowed by default. */
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
-@@ -7006,7 +7096,7 @@ build_lrouter_groups(struct hmap *ports, struct ovs_list *lr_list)
+@@ -6752,29 +6983,33 @@ build_stateful(struct ovn_datapath *od, struct hmap *lflows)
+ * We always set ct_mark.blocked to 0 here as
+ * any packet that makes it this far is part of a connection we
+ * want to allow to continue. */
++ ds_put_format(&actions, "ct_commit { %s = 0; "
++ "ct_label.label = " REG_LABEL "; }; next;",
++ ct_block_action);
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
+ REGBIT_CONNTRACK_COMMIT" == 1 && "
+ REGBIT_ACL_LABEL" == 1",
+- "ct_commit { ct_label.blocked = 0; "
+- "ct_label.label = " REG_LABEL "; }; next;");
++ ds_cstr(&actions));
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
+ REGBIT_CONNTRACK_COMMIT" == 1 && "
+ REGBIT_ACL_LABEL" == 1",
+- "ct_commit { ct_label.blocked = 0; "
+- "ct_label.label = " REG_LABEL "; }; next;");
++ ds_cstr(&actions));
+
+ /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
+- * committed to conntrack. We always set ct_label.blocked to 0 here as
++ * committed to conntrack. We always set ct_mark.blocked to 0 here as
+ * any packet that makes it this far is part of a connection we
+ * want to allow to continue. */
++ ds_clear(&actions);
++ ds_put_format(&actions, "ct_commit { %s = 0; }; next;", ct_block_action);
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
+ REGBIT_CONNTRACK_COMMIT" == 1 && "
+ REGBIT_ACL_LABEL" == 0",
+- "ct_commit { ct_label.blocked = 0; }; next;");
++ ds_cstr(&actions));
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
+ REGBIT_CONNTRACK_COMMIT" == 1 && "
+ REGBIT_ACL_LABEL" == 0",
+- "ct_commit { ct_label.blocked = 0; }; next;");
++ ds_cstr(&actions));
++ ds_destroy(&actions);
+ }
+
+ static void
+@@ -7006,7 +7241,7 @@ build_lrouter_groups(struct hmap *ports, struct ovs_list *lr_list)
}
/*
@@ -2287,7 +3908,7 @@ index c714227b2..fbc29b554 100644
* switching domain.
*/
static void
-@@ -7119,7 +7209,7 @@ lrouter_port_ipv6_reachable(const struct ovn_port *op,
+@@ -7119,7 +7354,7 @@ lrouter_port_ipv6_reachable(const struct ovn_port *op,
}
/*
@@ -2296,7 +3917,7 @@ index c714227b2..fbc29b554 100644
* that own the addresses. Other ARP/ND packets are still flooded in the
* switching domain as regular broadcast.
*/
-@@ -7156,7 +7246,7 @@ build_lswitch_rport_arp_req_flow(const char *ips,
+@@ -7156,7 +7391,7 @@ build_lswitch_rport_arp_req_flow(const char *ips,
}
/*
@@ -2305,7 +3926,7 @@ index c714227b2..fbc29b554 100644
* that own the addresses.
* Priorities:
* - 80: self originated GARPs that need to follow regular processing.
-@@ -7484,7 +7574,7 @@ build_lswitch_flows(const struct hmap *datapaths,
+@@ -7484,7 +7719,7 @@ build_lswitch_flows(const struct hmap *datapaths,
struct ovn_datapath *od;
@@ -2314,7 +3935,31 @@ index c714227b2..fbc29b554 100644
HMAP_FOR_EACH (od, key_node, datapaths) {
if (!od->nbs) {
continue;
-@@ -7553,7 +7643,7 @@ build_lswitch_lflows_admission_control(struct ovn_datapath *od,
+@@ -7512,6 +7747,7 @@ build_lswitch_flows(const struct hmap *datapaths,
+ static void
+ build_lswitch_lflows_pre_acl_and_acl(struct ovn_datapath *od,
+ const struct hmap *port_groups,
++ const struct chassis_features *features,
+ struct hmap *lflows,
+ const struct shash *meter_groups)
+ {
+@@ -7520,11 +7756,11 @@ build_lswitch_lflows_pre_acl_and_acl(struct ovn_datapath *od,
+
+ build_pre_acls(od, port_groups, lflows);
+ build_pre_lb(od, lflows);
+- build_pre_stateful(od, lflows);
+- build_acl_hints(od, lflows);
+- build_acls(od, lflows, port_groups, meter_groups);
++ build_pre_stateful(od, features, lflows);
++ build_acl_hints(od, features, lflows);
++ build_acls(od, features, lflows, port_groups, meter_groups);
+ build_qos(od, lflows);
+- build_stateful(od, lflows);
++ build_stateful(od, features, lflows);
+ build_lb_hairpin(od, lflows);
+ }
+ }
+@@ -7553,7 +7789,7 @@ build_lswitch_lflows_admission_control(struct ovn_datapath *od,
}
}
@@ -2323,7 +3968,7 @@ index c714227b2..fbc29b554 100644
* and vtep ports. (priority 100); see ovn-northd.8.xml for the
* rationale. */
-@@ -7575,7 +7665,7 @@ build_lswitch_arp_nd_responder_skip_local(struct ovn_port *op,
+@@ -7575,7 +7811,7 @@ build_lswitch_arp_nd_responder_skip_local(struct ovn_port *op,
}
}
@@ -2332,7 +3977,7 @@ index c714227b2..fbc29b554 100644
* (priority 50). */
static void
build_lswitch_arp_nd_responder_known_ips(struct ovn_port *op,
-@@ -7835,7 +7925,7 @@ build_lswitch_arp_nd_responder_known_ips(struct ovn_port *op,
+@@ -7835,7 +8071,7 @@ build_lswitch_arp_nd_responder_known_ips(struct ovn_port *op,
}
}
@@ -2341,7 +3986,7 @@ index c714227b2..fbc29b554 100644
* (priority 0)*/
static void
build_lswitch_arp_nd_responder_default(struct ovn_datapath *od,
-@@ -7846,7 +7936,7 @@ build_lswitch_arp_nd_responder_default(struct ovn_datapath *od,
+@@ -7846,7 +8082,7 @@ build_lswitch_arp_nd_responder_default(struct ovn_datapath *od,
}
}
@@ -2350,7 +3995,7 @@ index c714227b2..fbc29b554 100644
* (priority 110)*/
static void
build_lswitch_arp_nd_service_monitor(struct ovn_northd_lb *lb,
-@@ -7894,7 +7984,7 @@ build_lswitch_arp_nd_service_monitor(struct ovn_northd_lb *lb,
+@@ -7894,7 +8130,7 @@ build_lswitch_arp_nd_service_monitor(struct ovn_northd_lb *lb,
}
@@ -2359,7 +4004,7 @@ index c714227b2..fbc29b554 100644
* priority 100 flows. */
static void
build_lswitch_dhcp_options_and_response(struct ovn_port *op,
-@@ -7946,11 +8036,11 @@ build_lswitch_dhcp_options_and_response(struct ovn_port *op,
+@@ -7946,11 +8182,11 @@ build_lswitch_dhcp_options_and_response(struct ovn_port *op,
}
}
@@ -2374,7 +4019,7 @@ index c714227b2..fbc29b554 100644
* (priority 0). */
static void
build_lswitch_dhcp_and_dns_defaults(struct ovn_datapath *od,
-@@ -7965,7 +8055,7 @@ build_lswitch_dhcp_and_dns_defaults(struct ovn_datapath *od,
+@@ -7965,7 +8201,7 @@ build_lswitch_dhcp_and_dns_defaults(struct ovn_datapath *od,
}
}
@@ -2383,7 +4028,7 @@ index c714227b2..fbc29b554 100644
* priority 100 flows.
*/
static void
-@@ -7993,7 +8083,7 @@ build_lswitch_dns_lookup_and_response(struct ovn_datapath *od,
+@@ -7993,7 +8229,7 @@ build_lswitch_dns_lookup_and_response(struct ovn_datapath *od,
}
}
@@ -2392,7 +4037,7 @@ index c714227b2..fbc29b554 100644
* external ports on chassis not binding those ports.
* This makes the router pipeline to be run only on the chassis
* binding the external ports. */
-@@ -8010,7 +8100,7 @@ build_lswitch_external_port(struct ovn_port *op,
+@@ -8010,7 +8246,7 @@ build_lswitch_external_port(struct ovn_port *op,
}
}
@@ -2401,7 +4046,7 @@ index c714227b2..fbc29b554 100644
* (priority 70 - 100). */
static void
build_lswitch_destination_lookup_bmcast(struct ovn_datapath *od,
-@@ -8102,7 +8192,7 @@ build_lswitch_destination_lookup_bmcast(struct ovn_datapath *od,
+@@ -8102,7 +8338,7 @@ build_lswitch_destination_lookup_bmcast(struct ovn_datapath *od,
}
@@ -2410,7 +4055,7 @@ index c714227b2..fbc29b554 100644
* (priority 90). */
static void
build_lswitch_ip_mcast_igmp_mld(struct ovn_igmp_group *igmp_group,
-@@ -8180,7 +8270,7 @@ build_lswitch_ip_mcast_igmp_mld(struct ovn_igmp_group *igmp_group,
+@@ -8180,7 +8416,7 @@ build_lswitch_ip_mcast_igmp_mld(struct ovn_igmp_group *igmp_group,
static struct ovs_mutex mcgroup_mutex = OVS_MUTEX_INITIALIZER;
@@ -2419,7 +4064,237 @@ index c714227b2..fbc29b554 100644
static void
build_lswitch_ip_unicast_lookup(struct ovn_port *op,
struct hmap *lflows,
-@@ -10675,6 +10765,12 @@ build_neigh_learning_flows_for_lrouter(
+@@ -9219,6 +9455,7 @@ find_static_route_outport(struct ovn_datapath *od, const struct hmap *ports,
+ static void
+ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
+ struct ovn_datapath *od,
++ bool ct_masked_mark,
+ const char *port_ip,
+ struct ovn_port *out_port,
+ const struct parsed_route *route,
+@@ -9229,6 +9466,9 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
+ struct ds actions = DS_EMPTY_INITIALIZER;
+ struct ds ecmp_reply = DS_EMPTY_INITIALIZER;
+ char *cidr = normalize_v46_prefix(&route->prefix, route->plen);
++ const char *ct_ecmp_reply_port_match = ct_masked_mark
++ ? "ct_mark.ecmp_reply_port"
++ : "ct_label.ecmp_reply_port";
+
+ /* If symmetric ECMP replies are enabled, then packets that arrive over
+ * an ECMP route need to go through conntrack.
+@@ -9257,8 +9497,8 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
+ ds_put_cstr(&match, " && (ct.new && !ct.est)");
+
+ ds_put_format(&actions, "ct_commit { ct_label.ecmp_reply_eth = eth.src;"
+- " ct_label.ecmp_reply_port = %" PRId64 ";}; next;",
+- out_port->sb->tunnel_key);
++ " %s = %" PRId64 ";}; next;",
++ ct_ecmp_reply_port_match, out_port->sb->tunnel_key);
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 100,
+ ds_cstr(&match), ds_cstr(&actions),
+ &st_route->header_);
+@@ -9266,8 +9506,8 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
+ /* Bypass ECMP selection if we already have ct_label information
+ * for where to route the packet.
+ */
+- ds_put_format(&ecmp_reply, "ct.rpl && ct_label.ecmp_reply_port == %"
+- PRId64, out_port->sb->tunnel_key);
++ ds_put_format(&ecmp_reply, "ct.rpl && %s == %"PRId64,
++ ct_ecmp_reply_port_match, out_port->sb->tunnel_key);
+ ds_clear(&match);
+ ds_put_format(&match, "%s && %s", ds_cstr(&ecmp_reply),
+ ds_cstr(route_match));
+@@ -9286,7 +9526,18 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
+ ds_cstr(&ecmp_reply), "next;",
+ &st_route->header_);
+
+- const char *action = "eth.dst = ct_label.ecmp_reply_eth; next;";
++ /* Use REG_ECMP_ETH_FULL to pass the eth field from ct_label to eth.dst to
++ * avoid masked access to ct_label. Otherwise it may prevent OVS flow
++ * HW offloading to work for some NICs because masked-access of ct_label is
++ * not supported on those NICs due to HW limitations.
++ *
++ * Use push/pop to save the value of the register before using it and
++ * restore it immediately afterwards, so that the use of the register is
++ * temporary and doesn't interfere with other stages. */
++ const char *action = "push(" REG_ECMP_ETH_FULL "); "
++ REG_ECMP_ETH_FULL " = ct_label;"
++ " eth.dst = " REG_ECMP_ETH_FIELD ";"
++ " pop(" REG_ECMP_ETH_FULL "); next;";
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ARP_RESOLVE,
+ 200, ds_cstr(&ecmp_reply),
+ action, &st_route->header_);
+@@ -9298,7 +9549,8 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
+
+ static void
+ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od,
+- const struct hmap *ports, struct ecmp_groups_node *eg)
++ bool ct_masked_mark, const struct hmap *ports,
++ struct ecmp_groups_node *eg)
+
+ {
+ bool is_ipv4 = IN6_IS_ADDR_V4MAPPED(&eg->prefix);
+@@ -9352,7 +9604,8 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od,
+ if (smap_get(&od->nbr->options, "chassis") &&
+ route_->ecmp_symmetric_reply && sset_add(&visited_ports,
+ out_port->key)) {
+- add_ecmp_symmetric_reply_flows(lflows, od, lrp_addr_s, out_port,
++ add_ecmp_symmetric_reply_flows(lflows, od, ct_masked_mark,
++ lrp_addr_s, out_port,
+ route_, &route_match);
+ }
+ ds_clear(&match);
+@@ -9547,8 +9800,10 @@ build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip,
+ struct ovn_northd_lb_vip *vips_nb,
+ struct hmap *lflows,
+ struct ds *match, struct ds *action,
+- const struct shash *meter_groups)
++ const struct shash *meter_groups,
++ bool ct_lb_mark)
+ {
++ const char *ct_natted = ct_lb_mark ? "ct_mark.natted" : "ct_label.natted";
+ char *skip_snat_new_action = NULL;
+ char *skip_snat_est_action = NULL;
+ char *new_match;
+@@ -9558,12 +9813,13 @@ build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip,
+ ds_clear(action);
+
+ bool reject = build_lb_vip_actions(lb_vip, vips_nb, action,
+- lb->selection_fields, false);
++ lb->selection_fields, false,
++ ct_lb_mark);
+
+ /* Higher priority rules are added for load-balancing in DNAT
+ * table. For every match (on a VIP[:port]), we add two flows.
+ * One flow is for specific matching on ct.new with an action
+- * of "ct_lb($targets);". The other flow is for ct.est with
++ * of "ct_lb_mark($targets);". The other flow is for ct.est with
+ * an action of "next;".
+ */
+ if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) {
+@@ -9590,13 +9846,13 @@ build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip,
+ REG_ORIG_TP_DPORT_ROUTER" == %d",
+ ds_cstr(match), lb->proto, lb_vip->vip_port);
+ est_match = xasprintf("ct.est && %s && %s && "
+- REG_ORIG_TP_DPORT_ROUTER" == %d && "
+- "ct_label.natted == 1",
+- ds_cstr(match), lb->proto, lb_vip->vip_port);
++ REG_ORIG_TP_DPORT_ROUTER" == %d && %s == 1",
++ ds_cstr(match), lb->proto, lb_vip->vip_port,
++ ct_natted);
+ } else {
+ new_match = xasprintf("ct.new && %s", ds_cstr(match));
+- est_match = xasprintf("ct.est && %s && ct_label.natted == 1",
+- ds_cstr(match));
++ est_match = xasprintf("ct.est && %s && %s == 1",
++ ds_cstr(match), ct_natted);
+ }
+
+ const char *ip_match = NULL;
+@@ -9757,8 +10013,9 @@ next:
+
+ static void
+ build_lswitch_flows_for_lb(struct ovn_northd_lb *lb, struct hmap *lflows,
+- const struct shash *meter_groups, struct ds *match,
+- struct ds *action)
++ const struct shash *meter_groups,
++ const struct chassis_features *features,
++ struct ds *match, struct ds *action)
+ {
+ if (!lb->n_nb_ls) {
+ return;
+@@ -9794,7 +10051,8 @@ build_lswitch_flows_for_lb(struct ovn_northd_lb *lb, struct hmap *lflows,
+ * a higher priority rule for load balancing below also commits the
+ * connection, so it is okay if we do not hit the above match on
+ * REGBIT_CONNTRACK_COMMIT. */
+- build_lb_rules(lflows, lb, match, action, meter_groups);
++ build_lb_rules(lflows, lb, features->ct_no_masked_label,
++ match, action, meter_groups);
+ }
+
+ /* If there are any load balancing rules, we should send the packet to
+@@ -9864,8 +10122,9 @@ build_lrouter_defrag_flows_for_lb(struct ovn_northd_lb *lb,
+
+ static void
+ build_lrouter_flows_for_lb(struct ovn_northd_lb *lb, struct hmap *lflows,
+- const struct shash *meter_groups, struct ds *match,
+- struct ds *action)
++ const struct shash *meter_groups,
++ const struct chassis_features *features,
++ struct ds *match, struct ds *action)
+ {
+ if (!lb->n_nb_lr) {
+ return;
+@@ -9875,8 +10134,8 @@ build_lrouter_flows_for_lb(struct ovn_northd_lb *lb, struct hmap *lflows,
+ struct ovn_lb_vip *lb_vip = &lb->vips[i];
+
+ build_lrouter_nat_flows_for_lb(lb_vip, lb, &lb->vips_nb[i],
+- lflows, match, action,
+- meter_groups);
++ lflows, match, action, meter_groups,
++ features->ct_no_masked_label);
+
+ if (!build_empty_lb_event_flow(lb_vip, lb->nlb, match, action)) {
+ continue;
+@@ -10016,7 +10275,7 @@ static inline void
+ lrouter_nat_add_ext_ip_match(struct ovn_datapath *od,
+ struct hmap *lflows, struct ds *match,
+ const struct nbrec_nat *nat,
+- bool is_v6, bool is_src, ovs_be32 mask)
++ bool is_v6, bool is_src, int cidr_bits)
+ {
+ struct nbrec_address_set *allowed_ext_ips = nat->allowed_ext_ips;
+ struct nbrec_address_set *exempted_ext_ips = nat->exempted_ext_ips;
+@@ -10052,7 +10311,7 @@ lrouter_nat_add_ext_ip_match(struct ovn_datapath *od,
+ priority = 100 + 2;
+ } else {
+ /* S_ROUTER_OUT_SNAT uses priority (mask + 1 + 128 + 1) */
+- priority = count_1bits(ntohl(mask)) + 3;
++ priority = cidr_bits + 3;
+
+ if (!od->is_gw_router) {
+ priority += 128;
+@@ -10492,6 +10751,28 @@ build_adm_ctrl_flows_for_lrouter(
+ }
+ }
+
++static int
++build_gateway_get_l2_hdr_size(struct ovn_port *op)
++{
++ struct ovn_port *peer = op->peer;
++
++ if (peer && peer->od && peer->od->nbs) {
++ /* Check if vlans are enabled on a localnet port running the logical
++ * switch connected to this logical router.
++ */
++ for (size_t i = 0; i < peer->od->n_localnet_ports; i++) {
++ struct ovn_port *localnet_port = peer->od->localnet_ports[i];
++ const struct nbrec_logical_switch_port *nbsp = localnet_port->nbsp;
++
++ if (nbsp && nbsp->n_tag_request > 0) {
++ return VLAN_ETH_HEADER_LEN;
++ }
++ }
++ }
++
++ return ETH_HEADER_LEN;
++}
++
+ /* All 'gateway_mtu' and 'gateway_mtu_bypass' flows should be built with this
+ * function.
+ */
+@@ -10509,8 +10790,9 @@ build_gateway_mtu_flow(struct hmap *lflows, struct ovn_port *op,
+
+ ds_clear(actions);
+ if (gw_mtu > 0) {
++ int l2_hdr_size = build_gateway_get_l2_hdr_size(op);
+ ds_put_format(actions, REGBIT_PKT_LARGER" = check_pkt_larger(%d); ",
+- gw_mtu + VLAN_ETH_HEADER_LEN);
++ gw_mtu + l2_hdr_size);
+ }
+
+ ds_put_format_valist(actions, extra_actions_fmt, extra_actions_args);
+@@ -10675,6 +10957,12 @@ build_neigh_learning_flows_for_lrouter(
copp_meter_get(COPP_ARP, od->nbr->copp,
meter_groups));
@@ -2432,7 +4307,7 @@ index c714227b2..fbc29b554 100644
ovn_lflow_metered(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
"nd_na", "put_nd(inport, nd.target, nd.tll); next;",
copp_meter_get(COPP_ND_NA, od->nbr->copp,
-@@ -10767,34 +10863,6 @@ build_ND_RA_flows_for_lrouter_port(
+@@ -10767,34 +11055,6 @@ build_ND_RA_flows_for_lrouter_port(
return;
}
@@ -2467,7 +4342,7 @@ index c714227b2..fbc29b554 100644
const char *address_mode = smap_get(
&op->nbrp->ipv6_ra_configs, "address_mode");
-@@ -10810,11 +10878,6 @@ build_ND_RA_flows_for_lrouter_port(
+@@ -10810,11 +11070,6 @@ build_ND_RA_flows_for_lrouter_port(
return;
}
@@ -2479,7 +4354,7 @@ index c714227b2..fbc29b554 100644
ds_clear(match);
ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
op->json_key);
-@@ -10839,6 +10902,22 @@ build_ND_RA_flows_for_lrouter_port(
+@@ -10839,6 +11094,22 @@ build_ND_RA_flows_for_lrouter_port(
ds_put_format(actions, ", router_preference = \"%s\"", prf);
}
@@ -2502,7 +4377,174 @@ index c714227b2..fbc29b554 100644
bool add_rs_response_flow = false;
for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
-@@ -13055,6 +13134,18 @@ build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, struct hmap *lflows,
+@@ -10972,8 +11243,9 @@ build_ip_routing_flows_for_lrouter_port(
+
+ static void
+ build_static_route_flows_for_lrouter(
+- struct ovn_datapath *od, struct hmap *lflows,
+- const struct hmap *ports, const struct hmap *bfd_connections)
++ struct ovn_datapath *od, const struct chassis_features *features,
++ struct hmap *lflows, const struct hmap *ports,
++ const struct hmap *bfd_connections)
+ {
+ if (od->nbr) {
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, 150,
+@@ -11016,7 +11288,8 @@ build_static_route_flows_for_lrouter(
+ HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) {
+ /* add a flow in IP_ROUTING, and one flow for each member in
+ * IP_ROUTING_ECMP. */
+- build_ecmp_route_flow(lflows, od, ports, group);
++ build_ecmp_route_flow(lflows, od, features->ct_no_masked_label,
++ ports, group);
+ }
+ const struct unique_routes_node *ur;
+ HMAP_FOR_EACH (ur, hmap_node, &unique_routes) {
+@@ -11696,6 +11969,7 @@ build_gateway_redirect_flows_for_lrouter(
+ }
+ for (size_t i = 0; i < od->n_l3dgw_ports; i++) {
+ const struct ovsdb_idl_row *stage_hint = NULL;
++ bool add_def_flow = true;
+
+ if (od->l3dgw_ports[i]->nbrp) {
+ stage_hint = &od->l3dgw_ports[i]->nbrp->header_;
+@@ -11714,7 +11988,44 @@ build_gateway_redirect_flows_for_lrouter(
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50,
+ ds_cstr(match), ds_cstr(actions),
+ stage_hint);
++ for (int j = 0; j < od->n_nat_entries; j++) {
++ const struct ovn_nat *nat = &od->nat_entries[j];
++
++ if (!lrouter_nat_is_stateless(nat->nb) ||
++ strcmp(nat->nb->type, "dnat_and_snat") ||
++ (!nat->nb->allowed_ext_ips && !nat->nb->exempted_ext_ips)) {
++ continue;
++ }
++
++ struct ds match_ext = DS_EMPTY_INITIALIZER;
++ struct nbrec_address_set *as = nat->nb->allowed_ext_ips
++ ? nat->nb->allowed_ext_ips : nat->nb->exempted_ext_ips;
++ ds_put_format(&match_ext, "%s && ip%s.src == $%s",
++ ds_cstr(match), nat_entry_is_v6(nat) ? "6" : "4",
++ as->name);
++
++ if (nat->nb->allowed_ext_ips) {
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT,
++ 75, ds_cstr(&match_ext),
++ ds_cstr(actions), stage_hint);
++ if (add_def_flow) {
++ ds_clear(&match_ext);
++ ds_put_format(&match_ext, "ip && ip%s.dst == %s",
++ nat_entry_is_v6(nat) ? "6" : "4",
++ nat->nb->external_ip);
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 70,
++ ds_cstr(&match_ext), "drop;");
++ add_def_flow = false;
++ }
++ } else if (nat->nb->exempted_ext_ips) {
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT,
++ 75, ds_cstr(&match_ext), "drop;",
++ stage_hint);
++ }
++ ds_destroy(&match_ext);
++ }
+ }
++
+ /* Packets are allowed by default. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
+ }
+@@ -12453,8 +12764,7 @@ build_lrouter_in_unsnat_flow(struct hmap *lflows, struct ovn_datapath *od,
+ ds_put_format(match, "ip && ip%s.dst == %s",
+ is_v6 ? "6" : "4", nat->external_ip);
+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+- ds_put_format(actions, "ip%s.dst=%s; next;",
+- is_v6 ? "6" : "4", nat->logical_ip);
++ ds_put_format(actions, "next;");
+ } else {
+ ds_put_cstr(actions, "ct_snat;");
+ }
+@@ -12479,8 +12789,7 @@ build_lrouter_in_unsnat_flow(struct hmap *lflows, struct ovn_datapath *od,
+ }
+
+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+- ds_put_format(actions, "ip%s.dst=%s; next;",
+- is_v6 ? "6" : "4", nat->logical_ip);
++ ds_put_format(actions, "next;");
+ } else {
+ ds_put_cstr(actions, "ct_snat_in_czone;");
+ }
+@@ -12514,7 +12823,7 @@ static void
+ build_lrouter_in_dnat_flow(struct hmap *lflows, struct ovn_datapath *od,
+ const struct nbrec_nat *nat, struct ds *match,
+ struct ds *actions, bool distributed,
+- ovs_be32 mask, bool is_v6)
++ int cidr_bits, bool is_v6)
+ {
+ /* Ingress DNAT table: Packets enter the pipeline with destination
+ * IP address that needs to be DNATted from a external IP address
+@@ -12532,7 +12841,7 @@ build_lrouter_in_dnat_flow(struct hmap *lflows, struct ovn_datapath *od,
+ ds_clear(actions);
+ if (nat->allowed_ext_ips || nat->exempted_ext_ips) {
+ lrouter_nat_add_ext_ip_match(od, lflows, match, nat,
+- is_v6, true, mask);
++ is_v6, true, cidr_bits);
+ }
+
+ if (!lport_addresses_is_empty(&od->dnat_force_snat_addrs)) {
+@@ -12576,7 +12885,7 @@ build_lrouter_in_dnat_flow(struct hmap *lflows, struct ovn_datapath *od,
+ ds_clear(actions);
+ if (nat->allowed_ext_ips || nat->exempted_ext_ips) {
+ lrouter_nat_add_ext_ip_match(od, lflows, match, nat,
+- is_v6, true, mask);
++ is_v6, true, cidr_bits);
+ }
+
+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+@@ -12633,8 +12942,7 @@ build_lrouter_out_undnat_flow(struct hmap *lflows, struct ovn_datapath *od,
+
+ if (!strcmp(nat->type, "dnat_and_snat") &&
+ lrouter_nat_is_stateless(nat)) {
+- ds_put_format(actions, "ip%s.src=%s; next;",
+- is_v6 ? "6" : "4", nat->external_ip);
++ ds_put_format(actions, "next;");
+ } else {
+ ds_put_format(actions,
+ od->is_gw_router ? "ct_dnat;" : "ct_dnat_in_czone;");
+@@ -12679,8 +12987,7 @@ static void
+ build_lrouter_out_snat_flow(struct hmap *lflows, struct ovn_datapath *od,
+ const struct nbrec_nat *nat, struct ds *match,
+ struct ds *actions, bool distributed,
+- struct eth_addr mac, ovs_be32 mask,
+- int cidr_bits, bool is_v6)
++ struct eth_addr mac, int cidr_bits, bool is_v6)
+ {
+ /* Egress SNAT table: Packets enter the egress pipeline with
+ * source ip address that needs to be SNATted to a external ip
+@@ -12698,13 +13005,14 @@ build_lrouter_out_snat_flow(struct hmap *lflows, struct ovn_datapath *od,
+
+ if (nat->allowed_ext_ips || nat->exempted_ext_ips) {
+ lrouter_nat_add_ext_ip_match(od, lflows, match, nat,
+- is_v6, false, mask);
++ is_v6, false, cidr_bits);
+ }
+
+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+ ds_put_format(actions, "ip%s.src=%s; next;",
+ is_v6 ? "6" : "4", nat->external_ip);
+ } else {
++ ds_put_format(match, " && (!ct.trk || !ct.rpl)");
+ ds_put_format(actions, "ct_snat(%s", nat->external_ip);
+
+ if (nat->external_port_range[0]) {
+@@ -12746,7 +13054,7 @@ build_lrouter_out_snat_flow(struct hmap *lflows, struct ovn_datapath *od,
+
+ if (nat->allowed_ext_ips || nat->exempted_ext_ips) {
+ lrouter_nat_add_ext_ip_match(od, lflows, match, nat,
+- is_v6, false, mask);
++ is_v6, false, cidr_bits);
+ }
+
+ if (distributed) {
+@@ -13055,6 +13363,18 @@ build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, struct hmap *lflows,
return;
}
@@ -2521,7 +4563,158 @@ index c714227b2..fbc29b554 100644
struct sset nat_entries = SSET_INITIALIZER(&nat_entries);
bool dnat_force_snat_ip =
-@@ -14948,6 +15039,7 @@ ovnnb_db_run(struct northd_input *input_data,
+@@ -13079,7 +13399,7 @@ build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, struct hmap *lflows,
+ is_v6);
+ /* S_ROUTER_IN_DNAT */
+ build_lrouter_in_dnat_flow(lflows, od, nat, match, actions, distributed,
+- mask, is_v6);
++ cidr_bits, is_v6);
+
+ /* ARP resolve for NAT IPs. */
+ if (od->is_gw_router) {
+@@ -13118,7 +13438,7 @@ build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, struct hmap *lflows,
+ mac, is_v6);
+ /* S_ROUTER_OUT_SNAT */
+ build_lrouter_out_snat_flow(lflows, od, nat, match, actions, distributed,
+- mac, mask, cidr_bits, is_v6);
++ mac, cidr_bits, is_v6);
+
+ /* S_ROUTER_IN_ADMISSION - S_ROUTER_IN_IP_INPUT */
+ build_lrouter_ingress_flow(lflows, od, nat, match, actions,
+@@ -13241,6 +13561,7 @@ struct lswitch_flow_build_info {
+ const struct shash *meter_groups;
+ const struct hmap *lbs;
+ const struct hmap *bfd_connections;
++ const struct chassis_features *features;
+ char *svc_check_match;
+ struct ds match;
+ struct ds actions;
+@@ -13259,7 +13580,9 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od,
+ struct lswitch_flow_build_info *lsi)
+ {
+ /* Build Logical Switch Flows. */
+- build_lswitch_lflows_pre_acl_and_acl(od, lsi->port_groups, lsi->lflows,
++ build_lswitch_lflows_pre_acl_and_acl(od, lsi->port_groups,
++ lsi->features,
++ lsi->lflows,
+ lsi->meter_groups);
+
+ build_fwd_group_lflows(od, lsi->lflows);
+@@ -13279,7 +13602,8 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od,
+ &lsi->actions, lsi->meter_groups);
+ build_ND_RA_flows_for_lrouter(od, lsi->lflows);
+ build_ip_routing_pre_flows_for_lrouter(od, lsi->lflows);
+- build_static_route_flows_for_lrouter(od, lsi->lflows, lsi->ports,
++ build_static_route_flows_for_lrouter(od, lsi->features,
++ lsi->lflows, lsi->ports,
+ lsi->bfd_connections);
+ build_mcast_lookup_flows_for_lrouter(od, lsi->lflows, &lsi->match,
+ &lsi->actions);
+@@ -13409,10 +13733,12 @@ build_lflows_thread(void *arg)
+ build_lrouter_defrag_flows_for_lb(lb, lsi->lflows,
+ &lsi->match);
+ build_lrouter_flows_for_lb(lb, lsi->lflows,
+- lsi->meter_groups, &lsi->match,
+- &lsi->actions);
++ lsi->meter_groups,
++ lsi->features,
++ &lsi->match, &lsi->actions);
+ build_lswitch_flows_for_lb(lb, lsi->lflows,
+ lsi->meter_groups,
++ lsi->features,
+ &lsi->match, &lsi->actions);
+ }
+ }
+@@ -13504,7 +13830,8 @@ build_lswitch_and_lrouter_flows(const struct hmap *datapaths,
+ struct hmap *igmp_groups,
+ const struct shash *meter_groups,
+ const struct hmap *lbs,
+- const struct hmap *bfd_connections)
++ const struct hmap *bfd_connections,
++ const struct chassis_features *features)
+ {
+
+ char *svc_check_match = xasprintf("eth.dst == %s", svc_monitor_mac);
+@@ -13548,6 +13875,7 @@ build_lswitch_and_lrouter_flows(const struct hmap *datapaths,
+ lsiv[index].meter_groups = meter_groups;
+ lsiv[index].lbs = lbs;
+ lsiv[index].bfd_connections = bfd_connections;
++ lsiv[index].features = features;
+ lsiv[index].svc_check_match = svc_check_match;
+ lsiv[index].thread_lflow_counter = 0;
+ ds_init(&lsiv[index].match);
+@@ -13586,6 +13914,7 @@ build_lswitch_and_lrouter_flows(const struct hmap *datapaths,
+ .meter_groups = meter_groups,
+ .lbs = lbs,
+ .bfd_connections = bfd_connections,
++ .features = features,
+ .svc_check_match = svc_check_match,
+ .match = DS_EMPTY_INITIALIZER,
+ .actions = DS_EMPTY_INITIALIZER,
+@@ -13611,9 +13940,9 @@ build_lswitch_and_lrouter_flows(const struct hmap *datapaths,
+ &lsi.match);
+ build_lrouter_defrag_flows_for_lb(lb, lsi.lflows, &lsi.match);
+ build_lrouter_flows_for_lb(lb, lsi.lflows, lsi.meter_groups,
+- &lsi.match, &lsi.actions);
++ lsi.features, &lsi.match, &lsi.actions);
+ build_lswitch_flows_for_lb(lb, lsi.lflows, lsi.meter_groups,
+- &lsi.match, &lsi.actions);
++ lsi.features, &lsi.match, &lsi.actions);
+ }
+ stopwatch_stop(LFLOWS_LBS_STOPWATCH_NAME, time_msec());
+ stopwatch_start(LFLOWS_IGMP_STOPWATCH_NAME, time_msec());
+@@ -13745,7 +14074,8 @@ void build_lflows(struct lflow_input *input_data,
+ input_data->port_groups, &lflows,
+ &mcast_groups, &igmp_groups,
+ input_data->meter_groups, input_data->lbs,
+- input_data->bfd_connections);
++ input_data->bfd_connections,
++ input_data->features);
+
+ /* Parallel build may result in a suboptimal hash. Resize the
+ * hash to a correct size before doing lookups */
+@@ -14801,6 +15131,7 @@ northd_init(struct northd_data *data)
+ hmap_init(&data->lbs);
+ hmap_init(&data->bfd_connections);
+ ovs_list_init(&data->lr_list);
++ memset(&data->features, 0, sizeof data->features);
+ data->ovn_internal_version_changed = false;
+ }
+
+@@ -14861,15 +15192,6 @@ ovnnb_db_run(struct northd_input *input_data,
+ if (!nb) {
+ nb = nbrec_nb_global_insert(ovnnb_txn);
+ }
+- const struct sbrec_sb_global *sb = sbrec_sb_global_table_first(
+- input_data->sbrec_sb_global_table);
+- if (!sb) {
+- sb = sbrec_sb_global_insert(ovnsb_txn);
+- }
+- if (nb->ipsec != sb->ipsec) {
+- sbrec_sb_global_set_ipsec(sb, nb->ipsec);
+- }
+- sbrec_sb_global_set_options(sb, &nb->options);
+
+ const char *mac_addr_prefix = set_mac_prefix(smap_get(&nb->options,
+ "mac_prefix"));
+@@ -14915,8 +15237,6 @@ ovnnb_db_run(struct northd_input *input_data,
+ nbrec_nb_global_set_options(nb, &options);
+ }
+
+- smap_destroy(&options);
+-
+ use_parallel_build =
+ (smap_get_bool(&nb->options, "use_parallel_build", false) &&
+ can_parallelize_hashes(false));
+@@ -14932,6 +15252,7 @@ ovnnb_db_run(struct northd_input *input_data,
+ check_lsp_is_up = !smap_get_bool(&nb->options,
+ "ignore_lsp_down", true);
+
++ build_chassis_features(input_data, &data->features);
+ build_datapaths(input_data, ovnsb_txn, &data->datapaths, &data->lr_list);
+ build_ovn_lbs(input_data, ovnsb_txn, &data->datapaths, &data->lbs);
+ build_lrouter_lbs(&data->datapaths, &data->lbs);
+@@ -14948,6 +15269,7 @@ ovnnb_db_run(struct northd_input *input_data,
build_meter_groups(input_data, &data->meter_groups);
stopwatch_stop(BUILD_LFLOWS_CTX_STOPWATCH_NAME, time_msec());
stopwatch_start(CLEAR_LFLOWS_CTX_STOPWATCH_NAME, time_msec());
@@ -2529,10 +4722,86 @@ index c714227b2..fbc29b554 100644
ovn_update_ipv6_prefix(&data->ports);
sync_address_sets(input_data, ovnsb_txn, &data->datapaths);
+@@ -14956,6 +15278,26 @@ ovnnb_db_run(struct northd_input *input_data,
+ sync_dns_entries(input_data, ovnsb_txn, &data->datapaths);
+ cleanup_stale_fdp_entries(input_data, &data->datapaths);
+ stopwatch_stop(CLEAR_LFLOWS_CTX_STOPWATCH_NAME, time_msec());
++
++ /* Set up SB_Global (depends on chassis features). */
++ const struct sbrec_sb_global *sb = sbrec_sb_global_table_first(
++ input_data->sbrec_sb_global_table);
++ if (!sb) {
++ sb = sbrec_sb_global_insert(ovnsb_txn);
++ }
++ if (nb->ipsec != sb->ipsec) {
++ sbrec_sb_global_set_ipsec(sb, nb->ipsec);
++ }
++
++ /* Inform ovn-controllers whether LB flows will use ct_mark
++ * (i.e., only if all chassis support it).
++ */
++ smap_replace(&options, "lb_hairpin_use_ct_mark",
++ data->features.ct_no_masked_label ? "true" : "false");
++ if (!smap_equal(&sb->options, &options)) {
++ sbrec_sb_global_set_options(sb, &options);
++ }
++ smap_destroy(&options);
+ }
+
+ /* Stores the list of chassis which references an ha_chassis_group.
+diff --git a/northd/northd.h b/northd/northd.h
+index ebcb40de7..0d9c7b802 100644
+--- a/northd/northd.h
++++ b/northd/northd.h
+@@ -53,6 +53,10 @@ struct northd_input {
+ struct ovsdb_idl_index *sbrec_ip_mcast_by_dp;
+ };
+
++struct chassis_features {
++ bool ct_no_masked_label;
++};
++
+ struct northd_data {
+ /* Global state for 'en-northd'. */
+ struct hmap datapaths;
+@@ -63,6 +67,7 @@ struct northd_data {
+ struct hmap bfd_connections;
+ struct ovs_list lr_list;
+ bool ovn_internal_version_changed;
++ struct chassis_features features;
+ };
+
+ struct lflow_input {
+@@ -84,6 +89,7 @@ struct lflow_input {
+ const struct shash *meter_groups;
+ const struct hmap *lbs;
+ const struct hmap *bfd_connections;
++ const struct chassis_features *features;
+ bool ovn_internal_version_changed;
+ };
+
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
-index 79f35bc16..ad79a32d2 100644
+index 79f35bc16..f2cee69d2 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
+@@ -550,7 +550,7 @@
+
+ -
+ Priority-120 flows that send the packets to connection tracker using
+-
ct_lb;
as the action so that the already established
++ ct_lb_mark;
as the action so that the already established
+ traffic destined to the load balancer VIP gets DNATted based on a hint
+ provided by the previous tables (with a match
+ for reg0[2] == 1
and on supported load balancer protocols
+@@ -565,7 +565,7 @@
+ A priority-110 flow sends the packets to connection tracker based
+ on a hint provided by the previous tables
+ (with a match for reg0[2] == 1
) by using the
+- ct_lb;
action. This flow is added to handle
++ ct_lb_mark;
action. This flow is added to handle
+ the traffic for load balancer VIPs whose protocol is not defined
+ (mainly for ICMP traffic).
+
@@ -663,15 +663,16 @@
@@ -2555,17 +4824,42 @@ index 79f35bc16..ad79a32d2 100644
-
-@@ -746,7 +747,9 @@
+@@ -740,20 +741,22 @@
+ A priority-65532 flow that allows any traffic in the reply
+ direction for a connection that has been committed to the
+ connection tracker (i.e., established flows), as long as
+- the committed flow does not have
ct_label.blocked
set.
++ the committed flow does not have ct_mark.blocked
set.
+ We only handle traffic in the reply direction here because
+ we want all packets going in the request direction to still
go through the flows that implement the currently defined
policy based on ACLs. If a connection is no longer allowed by
- policy, ct_label.blocked
will get set and packets in the
+- policy, ct_label.blocked
will get set and packets in the
- reply direction will no longer be allowed, either.
++ policy, ct_mark.blocked
will get set and packets in the
+ reply direction will no longer be allowed, either. This flow also
+ clears the register bits reg0[9]
and
+ reg0[10]
.
-
+ A priority-65532 flow that allows any traffic that is considered
+ related to a committed flow in the connection tracker (e.g., an
+ ICMP Port Unreachable from a non-listening UDP port), as long
+- as the committed flow does not have
ct_label.blocked
set.
++ as the committed flow does not have ct_mark.blocked
set.
+
+
+ -
+@@ -763,7 +766,7 @@
+
+
-
+ A priority-65532 flow that drops all traffic in the reply direction
+- with
ct_label.blocked
set meaning that the connection
++ with ct_mark.blocked
set meaning that the connection
+ should no longer be allowed due to a policy change. Packets
+ in the request direction are skipped here to let a newly created
+ ACL re-allow this connection.
@@ -838,7 +841,7 @@
@@ -2575,6 +4869,24 @@ index 79f35bc16..ad79a32d2 100644
-
+@@ -851,7 +854,7 @@
+ P.dst == PORT. For IPv6 VIPs,
+ the flow matches
ct.new && ip && ip6.dst ==
+ VIP && P && P.dst ==
+- PORT
. The flow's action is ct_lb(args)
++ PORT
. The flow's action is ct_lb_mark(args)
+
, where args contains comma separated IP addresses
+ (and optional port numbers) to load balance to. The address family of
+ the IP addresses of args is the same as the address family
+@@ -872,7 +875,7 @@
+ ip4.dst == VIP. For IPv6 VIPs,
+ the flow matches ct.new && ip && ip6.dst ==
+ VIP
. The action on this flow is
+- ct_lb(args)
, where args contains comma
++ ct_lb_mark(args), where args contains comma
+ separated IP addresses of the same address family as VIP.
+ For IPv4 traffic the flow also loads the original destination
+ IP and transport port in registers reg1
and
@@ -889,7 +892,73 @@
Please note using --reject
option will disable
empty_lb SB controller event for this load balancer.
@@ -2582,7 +4894,7 @@ index 79f35bc16..ad79a32d2 100644
+
+
+ Ingress table 13: from-lport
ACLs after LB
-+
+
+
+ Logical flows in this table closely reproduce those in the
+ ACL
table in the OVN_Northbound
database
@@ -2592,7 +4904,7 @@ index 79f35bc16..ad79a32d2 100644
+ limited range and have 1000 added to them to leave room for OVN default
+ flows at both higher and lower priorities.
+
-
++
+