diff --git a/.ovn.metadata b/.ovn.metadata index 1e1f41f..b668ec7 100644 --- a/.ovn.metadata +++ b/.ovn.metadata @@ -1,5 +1,5 @@ 002450621b33c5690060345b0aac25bc2426d675 SOURCES/docutils-0.12.tar.gz -6d9f9a2a605a1310ee9b27a9374db308fc742814 SOURCES/openvswitch-fe55ce3.tar.gz +05213fb15c10f668ae2f6b07be8afef5449e4c59 SOURCES/openvswitch-f19448b.tar.gz 2ddef5605ed74dd6c2331789129d27946508afee SOURCES/ovn-24.03.1.tar.gz d34f96421a86004aa5d26ecf975edefd09f948b1 SOURCES/Pygments-1.4.tar.gz 6beb30f18ffac3de7689b7fd63e9a8a7d9c8df3a SOURCES/Sphinx-1.1.3.tar.gz diff --git a/SOURCES/ovn24.03.patch b/SOURCES/ovn24.03.patch index fbef749..90c0e40 100644 --- a/SOURCES/ovn24.03.patch +++ b/SOURCES/ovn24.03.patch @@ -17,6 +17,30 @@ index 0cb981775..456ab5c69 100644 - name: load image run: | sudo podman load -i /tmp/image.tar +diff --git a/Documentation/internals/release-process.rst b/Documentation/internals/release-process.rst +index 26d3f8d4d..988257975 100644 +--- a/Documentation/internals/release-process.rst ++++ b/Documentation/internals/release-process.rst +@@ -203,5 +203,5 @@ Contact + + Use dev@openvswitch.org to discuss the OVN development and release process. + +-__ https://www.ovn.org/en/releases/long_term_support/ ++__ https://www.ovn.org/en/releases/#long-term-support + __ https://www.ovn.org +diff --git a/Documentation/intro/install/general.rst b/Documentation/intro/install/general.rst +index ab6209482..6efb3a701 100644 +--- a/Documentation/intro/install/general.rst ++++ b/Documentation/intro/install/general.rst +@@ -428,7 +428,7 @@ the first time after you create the databases with ovsdb-tool, though running + it at any time is harmless:: + + $ ovn-nbctl --no-wait init +- $ ovn-sbctl --no-wait init ++ $ ovn-sbctl init + + Start ``ovn-northd``, telling it to connect to the OVN db servers same + Unix domain socket:: diff --git a/NEWS b/NEWS index e91cbe43f..ca3562425 100644 --- a/NEWS @@ -41,6 +65,786 @@ index 5f15422f2..962422bd2 100644 AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([build-aux]) AC_CONFIG_HEADERS([config.h]) +diff --git a/controller/binding.c b/controller/binding.c +index 2afc5d48a..8ac2ce3e2 100644 +--- a/controller/binding.c ++++ b/controller/binding.c +@@ -1709,7 +1709,10 @@ consider_container_lport(const struct sbrec_port_binding *pb, + } + + ovs_assert(parent_b_lport && parent_b_lport->pb); +- bool can_bind = lport_can_bind_on_this_chassis(b_ctx_in->chassis_rec, pb); ++ /* cannot bind to this chassis if the parent_port cannot be bounded. */ ++ bool can_bind = lport_can_bind_on_this_chassis(b_ctx_in->chassis_rec, ++ parent_b_lport->pb) && ++ lport_can_bind_on_this_chassis(b_ctx_in->chassis_rec, pb); + + return consider_vif_lport_(pb, can_bind, b_ctx_in, b_ctx_out, + container_b_lport); +diff --git a/controller/chassis.c b/controller/chassis.c +index ad75df288..9bb2eba95 100644 +--- a/controller/chassis.c ++++ b/controller/chassis.c +@@ -371,6 +371,7 @@ chassis_build_other_config(const struct ovs_chassis_cfg *ovs_cfg, + smap_replace(config, OVN_FEATURE_FDB_TIMESTAMP, "true"); + smap_replace(config, OVN_FEATURE_LS_DPG_COLUMN, "true"); + smap_replace(config, OVN_FEATURE_CT_COMMIT_NAT_V2, "true"); ++ smap_replace(config, OVN_FEATURE_CT_COMMIT_TO_ZONE, "true"); + } + + /* +@@ -516,6 +517,12 @@ chassis_other_config_changed(const struct ovs_chassis_cfg *ovs_cfg, + return true; + } + ++ if (!smap_get_bool(&chassis_rec->other_config, ++ OVN_FEATURE_CT_COMMIT_TO_ZONE, ++ false)) { ++ return true; ++ } ++ + return false; + } + +@@ -648,6 +655,7 @@ update_supported_sset(struct sset *supported) + sset_add(supported, OVN_FEATURE_FDB_TIMESTAMP); + sset_add(supported, OVN_FEATURE_LS_DPG_COLUMN); + sset_add(supported, OVN_FEATURE_CT_COMMIT_NAT_V2); ++ sset_add(supported, OVN_FEATURE_CT_COMMIT_TO_ZONE); + } + + static void +diff --git a/controller/lflow.c b/controller/lflow.c +index 895d17d19..760ec0b41 100644 +--- a/controller/lflow.c ++++ b/controller/lflow.c +@@ -874,6 +874,7 @@ add_matches_to_flow_table(const struct sbrec_logical_flow *lflow, + .collector_ids = l_ctx_in->collector_ids, + .lflow_uuid = lflow->header_.uuid, + .dp_key = ldp->datapath->tunnel_key, ++ .explicit_arp_ns_output = l_ctx_in->explicit_arp_ns_output, + + .pipeline = ingress ? OVNACT_P_INGRESS : OVNACT_P_EGRESS, + .ingress_ptable = OFTABLE_LOG_INGRESS_PIPELINE, +diff --git a/controller/lflow.h b/controller/lflow.h +index 9b7ffa19c..295d004f4 100644 +--- a/controller/lflow.h ++++ b/controller/lflow.h +@@ -130,6 +130,7 @@ struct lflow_ctx_in { + bool lb_hairpin_use_ct_mark; + bool localnet_learn_fdb; + bool localnet_learn_fdb_changed; ++ bool explicit_arp_ns_output; + }; + + struct lflow_ctx_out { +diff --git a/controller/mac-learn.c b/controller/mac-learn.c +index 071f01b4f..0c3b60c23 100644 +--- a/controller/mac-learn.c ++++ b/controller/mac-learn.c +@@ -199,15 +199,24 @@ ovn_fdb_add(struct hmap *fdbs, uint32_t dp_key, struct eth_addr mac, + /* packet buffering functions */ + + struct packet_data * +-ovn_packet_data_create(struct ofpbuf ofpacts, +- const struct dp_packet *original_packet) ++ovn_packet_data_create(const struct ofputil_packet_in *pin, ++ const struct ofpbuf *continuation) + { + struct packet_data *pd = xmalloc(sizeof *pd); + +- pd->ofpacts = ofpacts; +- /* clone the packet to send it later with correct L2 address */ +- pd->p = dp_packet_clone_data(dp_packet_data(original_packet), +- dp_packet_size(original_packet)); ++ pd->pin = (struct ofputil_packet_in) { ++ .packet = xmemdup(pin->packet, pin->packet_len), ++ .packet_len = pin->packet_len, ++ .flow_metadata = pin->flow_metadata, ++ .reason = pin->reason, ++ .table_id = pin->table_id, ++ .cookie = pin->cookie, ++ /* Userdata are empty on purpose, ++ * it is not needed for the continuation. */ ++ .userdata = NULL, ++ .userdata_len = 0, ++ }; ++ pd->continuation = ofpbuf_clone(continuation); + + return pd; + } +@@ -216,8 +225,8 @@ ovn_packet_data_create(struct ofpbuf ofpacts, + void + ovn_packet_data_destroy(struct packet_data *pd) + { +- dp_packet_delete(pd->p); +- ofpbuf_uninit(&pd->ofpacts); ++ free(pd->pin.packet); ++ ofpbuf_delete(pd->continuation); + free(pd); + } + +@@ -307,7 +316,10 @@ ovn_buffered_packets_ctx_run(struct buffered_packets_ctx *ctx, + + struct packet_data *pd; + LIST_FOR_EACH_POP (pd, node, &bp->queue) { +- struct eth_header *eth = dp_packet_data(pd->p); ++ struct dp_packet packet; ++ dp_packet_use_const(&packet, pd->pin.packet, pd->pin.packet_len); ++ ++ struct eth_header *eth = dp_packet_data(&packet); + eth->eth_dst = mac; + + ovs_list_push_back(&ctx->ready_packets_data, &pd->node); +diff --git a/controller/mac-learn.h b/controller/mac-learn.h +index e0fd6a8d1..20a015e1a 100644 +--- a/controller/mac-learn.h ++++ b/controller/mac-learn.h +@@ -24,6 +24,7 @@ + #include "openvswitch/hmap.h" + #include "openvswitch/list.h" + #include "openvswitch/ofpbuf.h" ++#include "openvswitch/ofp-packet.h" + + struct ovsdb_idl_index; + +@@ -91,8 +92,8 @@ struct fdb_entry *ovn_fdb_add(struct hmap *fdbs, + struct packet_data { + struct ovs_list node; + +- struct ofpbuf ofpacts; +- struct dp_packet *p; ++ struct ofpbuf *continuation; ++ struct ofputil_packet_in pin; + }; + + struct buffered_packets { +@@ -120,8 +121,8 @@ struct buffered_packets_ctx { + }; + + struct packet_data * +-ovn_packet_data_create(struct ofpbuf ofpacts, +- const struct dp_packet *original_packet); ++ovn_packet_data_create(const struct ofputil_packet_in *pin, ++ const struct ofpbuf *continuation); + void ovn_packet_data_destroy(struct packet_data *pd); + struct buffered_packets * + ovn_buffered_packets_add(struct buffered_packets_ctx *ctx, uint64_t dp_key, +diff --git a/controller/ofctrl.c b/controller/ofctrl.c +index f14cd79a8..6a2564604 100644 +--- a/controller/ofctrl.c ++++ b/controller/ofctrl.c +@@ -634,7 +634,6 @@ run_S_WAIT_BEFORE_CLEAR(void) + if (!wait_before_clear_time || + (wait_before_clear_expire && + time_msec() >= wait_before_clear_expire)) { +- wait_before_clear_expire = 0; + state = S_CLEAR_FLOWS; + return; + } +@@ -787,7 +786,7 @@ ofctrl_run(const struct ovsrec_bridge *br_int, + + rconn_run(swconn); + +- if (!rconn_is_connected(swconn)) { ++ if (!rconn_is_connected(swconn) || !pending_ct_zones) { + return reconnected; + } + +@@ -1112,6 +1111,12 @@ sb_to_flow_size(const struct sb_to_flow *stf) + return sizeof *stf; + } + ++static size_t ++sb_addrset_ref_size(const struct sb_addrset_ref *sar) ++{ ++ return sizeof *sar + strlen(sar->name) + 1; ++} ++ + static struct sb_to_flow * + sb_to_flow_find(struct hmap *uuid_flow_table, const struct uuid *sb_uuid) + { +@@ -1181,8 +1186,8 @@ link_flow_to_sb(struct ovn_desired_flow_table *flow_table, + } + if (!found) { + sar = xmalloc(sizeof *sar); +- mem_stats.sb_flow_ref_usage += sizeof *sar; + sar->name = xstrdup(as_info->name); ++ mem_stats.sb_flow_ref_usage += sb_addrset_ref_size(sar); + hmap_init(&sar->as_ip_to_flow_map); + ovs_list_insert(&stf->addrsets, &sar->list_node); + } +@@ -1568,7 +1573,7 @@ remove_flows_from_sb_to_flow(struct ovn_desired_flow_table *flow_table, + free(itfn); + } + hmap_destroy(&sar->as_ip_to_flow_map); +- mem_stats.sb_flow_ref_usage -= (sizeof *sar + strlen(sar->name) + 1); ++ mem_stats.sb_flow_ref_usage -= sb_addrset_ref_size(sar); + free(sar->name); + free(sar); + } +diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c +index 1c9960c70..a40712e53 100644 +--- a/controller/ovn-controller.c ++++ b/controller/ovn-controller.c +@@ -3667,6 +3667,7 @@ non_vif_data_ovs_iface_handler(struct engine_node *node, void *data OVS_UNUSED) + + struct ed_type_northd_options { + bool lb_hairpin_use_ct_mark; ++ bool explicit_arp_ns_output; + }; + + +@@ -3697,6 +3698,13 @@ en_northd_options_run(struct engine_node *node, void *data) + ? smap_get_bool(&sb_global->options, "lb_hairpin_use_ct_mark", + DEFAULT_SB_GLOBAL_LB_HAIRPIN_USE_CT_MARK) + : DEFAULT_SB_GLOBAL_LB_HAIRPIN_USE_CT_MARK; ++ ++ n_opts->explicit_arp_ns_output = ++ sb_global ++ ? smap_get_bool(&sb_global->options, "arp_ns_explicit_output", ++ false) ++ : false; ++ + engine_set_node_state(node, EN_UPDATED); + } + +@@ -3719,6 +3727,18 @@ en_northd_options_sb_sb_global_handler(struct engine_node *node, void *data) + n_opts->lb_hairpin_use_ct_mark = lb_hairpin_use_ct_mark; + engine_set_node_state(node, EN_UPDATED); + } ++ ++ bool explicit_arp_ns_output = ++ sb_global ++ ? smap_get_bool(&sb_global->options, "arp_ns_explicit_output", ++ false) ++ : false; ++ ++ if (explicit_arp_ns_output != n_opts->explicit_arp_ns_output) { ++ n_opts->explicit_arp_ns_output = explicit_arp_ns_output; ++ engine_set_node_state(node, EN_UPDATED); ++ } ++ + return true; + } + +@@ -3948,6 +3968,7 @@ init_lflow_ctx(struct engine_node *node, + l_ctx_in->localnet_learn_fdb_changed = rt_data->localnet_learn_fdb_changed; + l_ctx_in->chassis_tunnels = &non_vif_data->chassis_tunnels; + l_ctx_in->lb_hairpin_use_ct_mark = n_opts->lb_hairpin_use_ct_mark; ++ l_ctx_in->explicit_arp_ns_output = n_opts->explicit_arp_ns_output; + l_ctx_in->nd_ra_opts = &fo->nd_ra_opts; + l_ctx_in->dhcp_opts = &dhcp_opts->v4_opts; + l_ctx_in->dhcpv6_opts = &dhcp_opts->v6_opts; +@@ -5736,10 +5757,11 @@ main(int argc, char *argv[]) + } + } + +- if (br_int && ovs_feature_set_discovered()) { ++ if (br_int) { + ct_zones_data = engine_get_data(&en_ct_zones); +- if (ct_zones_data && ofctrl_run(br_int, ovs_table, +- &ct_zones_data->pending)) { ++ if (ofctrl_run(br_int, ovs_table, ++ ct_zones_data ? &ct_zones_data->pending ++ : NULL)) { + static struct vlog_rate_limit rl + = VLOG_RATE_LIMIT_INIT(1, 1); + +@@ -5748,7 +5770,7 @@ main(int argc, char *argv[]) + engine_set_force_recompute(true); + } + +- if (chassis) { ++ if (chassis && ovs_feature_set_discovered()) { + encaps_run(ovs_idl_txn, br_int, + sbrec_chassis_table_get(ovnsb_idl_loop.idl), + chassis, +diff --git a/controller/physical.c b/controller/physical.c +index 7ef259da4..86d4b4578 100644 +--- a/controller/physical.c ++++ b/controller/physical.c +@@ -1631,6 +1631,15 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name, + nested_container = true; + parent_port = lport_lookup_by_name( + sbrec_port_binding_by_name, binding->parent_port); ++ ++ if (parent_port ++ && !lport_can_bind_on_this_chassis(chassis, parent_port)) { ++ /* Even though there is an ofport for this container ++ * parent port, it is requested on different chassis ignore ++ * this container port. ++ */ ++ return; ++ } + } + } else if (!strcmp(binding->type, "localnet") + || !strcmp(binding->type, "l2gateway")) { +diff --git a/controller/pinctrl.c b/controller/pinctrl.c +index 98b29de9f..b2a380437 100644 +--- a/controller/pinctrl.c ++++ b/controller/pinctrl.c +@@ -257,9 +257,9 @@ static void pinctrl_handle_put_nd_ra_opts( + struct ofpbuf *continuation); + static void pinctrl_handle_nd_ns(struct rconn *swconn, + const struct flow *ip_flow, +- struct dp_packet *pkt_in, +- const struct match *md, +- struct ofpbuf *userdata); ++ const struct ofputil_packet_in *pin, ++ struct ofpbuf *userdata, ++ const struct ofpbuf *continuation); + static void pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn, + const struct flow *in_flow, + struct dp_packet *pkt_in, +@@ -660,6 +660,8 @@ pinctrl_forward_pkt(struct rconn *swconn, int64_t dp_key, + put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts); + put_load(in_port_key, MFF_LOG_INPORT, 0, 32, &ofpacts); + put_load(out_port_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts); ++ /* Avoid re-injecting packet already consumed. */ ++ put_load(1, MFF_LOG_FLAGS, MLF_IGMP_IGMP_SNOOP_INJECT_BIT, 1, &ofpacts); + + struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); + resubmit->in_port = OFPP_CONTROLLER; +@@ -701,9 +703,6 @@ struct ipv6_prefixd_state { + long long int next_announce; + long long int last_complete; + long long int last_used; +- /* IPv6 PD server info */ +- struct in6_addr server_addr; +- struct eth_addr sa; + /* server_id_info */ + struct { + uint8_t data[DHCPV6_MAX_DUID_LEN]; +@@ -860,12 +859,13 @@ pinctrl_parse_dhcpv6_advt(struct rconn *swconn, const struct flow *ip_flow, + struct dp_packet packet; + + dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); +- eth_compose(&packet, ip_flow->dl_src, ip_flow->dl_dst, ETH_TYPE_IPV6, +- IPV6_HEADER_LEN); ++ eth_compose(&packet, (struct eth_addr) ETH_ADDR_C(33,33,00,01,00,02), ++ ip_flow->dl_dst, ETH_TYPE_IPV6, IPV6_HEADER_LEN); + + struct udp_header *udp_h = compose_ipv6(&packet, IPPROTO_UDP, + &ip_flow->ipv6_dst, +- &ip_flow->ipv6_src, 0, 0, 255, ++ &in6addr_all_dhcp_agents, ++ 0, 0, 255, + len + UDP_HEADER_LEN + 4); + udp_h->udp_len = htons(len + UDP_HEADER_LEN + 4); + udp_h->udp_csum = 0; +@@ -919,7 +919,6 @@ out: + static void + pinctrl_prefixd_state_handler(const struct flow *ip_flow, + struct in6_addr addr, unsigned aid, +- struct eth_addr sa, struct in6_addr server_addr, + char prefix_len, unsigned t1, unsigned t2, + unsigned plife_time, unsigned vlife_time, + const uint8_t *uuid, uint8_t uuid_len) +@@ -929,8 +928,6 @@ pinctrl_prefixd_state_handler(const struct flow *ip_flow, + pfd = pinctrl_find_prefixd_state(ip_flow, aid); + if (pfd) { + pfd->state = PREFIX_PENDING; +- pfd->server_addr = server_addr; +- pfd->sa = sa; + memcpy(pfd->uuid.data, uuid, uuid_len); + pfd->uuid.len = uuid_len; + pfd->plife_time = plife_time * 1000; +@@ -948,10 +945,6 @@ pinctrl_parse_dhcpv6_reply(struct dp_packet *pkt_in, + const struct flow *ip_flow) + OVS_REQUIRES(pinctrl_mutex) + { +- struct eth_header *eth = dp_packet_eth(pkt_in); +- struct ovs_16aligned_ip6_hdr *in_ip = dp_packet_l3(pkt_in); +- struct in6_addr ip6_src; +- memcpy(&ip6_src, &in_ip->ip6_src, sizeof ip6_src); + struct udp_header *udp_in = dp_packet_l4(pkt_in); + unsigned char *in_dhcpv6_data = (unsigned char *)(udp_in + 1); + size_t dlen = MIN(ntohs(udp_in->udp_len), dp_packet_l4_size(pkt_in)); +@@ -1034,8 +1027,7 @@ pinctrl_parse_dhcpv6_reply(struct dp_packet *pkt_in, + VLOG_DBG_RL(&rl, "Received DHCPv6 reply from %s with prefix %s/%d" + " aid %d", ip6_s, prefix, prefix_len, aid); + } +- pinctrl_prefixd_state_handler(ip_flow, ipv6, aid, eth->eth_src, +- ip6_src, prefix_len, t1, t2, ++ pinctrl_prefixd_state_handler(ip_flow, ipv6, aid, prefix_len, t1, t2, + plife_time, vlife_time, uuid, uuid_len); + } + } +@@ -1068,28 +1060,22 @@ pinctrl_handle_dhcp6_server(struct rconn *swconn, const struct flow *ip_flow, + static void + compose_prefixd_packet(struct dp_packet *b, struct ipv6_prefixd_state *pfd) + { +- struct in6_addr ipv6_dst; +- struct eth_addr eth_dst; +- + int payload = sizeof(struct dhcpv6_opt_server_id) + + sizeof(struct dhcpv6_opt_ia_na); + if (pfd->uuid.len) { + payload += pfd->uuid.len + sizeof(struct dhcpv6_opt_header); +- ipv6_dst = pfd->server_addr; +- eth_dst = pfd->sa; +- } else { +- eth_dst = (struct eth_addr) ETH_ADDR_C(33,33,00,01,00,02); +- ipv6_parse("ff02::1:2", &ipv6_dst); + } + if (ipv6_addr_is_set(&pfd->prefix)) { + payload += sizeof(struct dhcpv6_opt_ia_prefix); + } + +- eth_compose(b, eth_dst, pfd->ea, ETH_TYPE_IPV6, IPV6_HEADER_LEN); ++ eth_compose(b, (struct eth_addr) ETH_ADDR_C(33,33,00,01,00,02), pfd->ea, ++ ETH_TYPE_IPV6, IPV6_HEADER_LEN); + + int len = UDP_HEADER_LEN + 4 + payload; + struct udp_header *udp_h = compose_ipv6(b, IPPROTO_UDP, &pfd->ipv6_addr, +- &ipv6_dst, 0, 0, 255, len); ++ &in6addr_all_dhcp_agents, ++ 0, 0, 255, len); + udp_h->udp_len = htons(len); + udp_h->udp_csum = 0; + packet_set_udp_port(b, htons(546), htons(547)); +@@ -1171,7 +1157,7 @@ ipv6_prefixd_send(struct rconn *swconn, struct ipv6_prefixd_state *pfd) + return pfd->next_announce; + } + +- if (pfd->state == PREFIX_DONE) { ++ if ((pfd->state == PREFIX_PENDING) || (pfd->state == PREFIX_DONE)) { + goto out; + } + +@@ -1222,33 +1208,58 @@ static bool ipv6_prefixd_should_inject(void) + struct ipv6_prefixd_state *pfd = iter->data; + long long int cur_time = time_msec(); + +- if (pfd->state == PREFIX_SOLICIT) { ++ if (pfd->state == PREFIX_SOLICIT || pfd->state == PREFIX_REQUEST) { + return true; + } + if (pfd->state == PREFIX_DONE && + cur_time > pfd->last_complete + pfd->t1) { +- pfd->state = PREFIX_RENEW; + return true; + } + if (pfd->state == PREFIX_RENEW && + cur_time > pfd->last_complete + pfd->t2) { +- pfd->state = PREFIX_REBIND; + pfd->uuid.len = 0; + return true; + } + if (pfd->state == PREFIX_REBIND && + cur_time > pfd->last_complete + pfd->vlife_time) { +- pfd->state = PREFIX_SOLICIT; + return true; + } + } + return false; + } + ++static void ipv6_prefixd_update_state(struct ipv6_prefixd_state *pfd) ++{ ++ long long int cur_time = time_msec(); ++ ++ if (pfd->state == PREFIX_DONE && ++ cur_time > pfd->last_complete + pfd->t1) { ++ pfd->state = PREFIX_RENEW; ++ return; ++ } ++ if (pfd->state == PREFIX_RENEW && ++ cur_time > pfd->last_complete + pfd->t2) { ++ pfd->state = PREFIX_REBIND; ++ pfd->uuid.len = 0; ++ return; ++ } ++ if (pfd->state == PREFIX_REBIND && ++ cur_time > pfd->last_complete + pfd->vlife_time) { ++ pfd->state = PREFIX_SOLICIT; ++ return; ++ } ++} ++ + static void + ipv6_prefixd_wait(long long int timeout) + { +- if (ipv6_prefixd_should_inject()) { ++ /* We need to wake up in all states : ++ * - In SOLICIT and REQUEST states we need to wakeup to handle ++ * next_announce timer. ++ * - In DONE, PENDING, RENEW and REBIND states, we need to wake up to ++ * handle T1, T2 timers. ++ */ ++ if (!shash_is_empty(&ipv6_prefixd)) { + poll_timer_wait_until(timeout); + } + } +@@ -1266,6 +1277,7 @@ send_ipv6_prefixd(struct rconn *swconn, long long int *send_prefixd_time) + if (*send_prefixd_time > next_msg) { + *send_prefixd_time = next_msg; + } ++ ipv6_prefixd_update_state(pfd); + } + } + +@@ -1464,11 +1476,13 @@ destroy_buffered_packets_ctx(void) + + /* Called with in the pinctrl_handler thread context. */ + static void +-pinctrl_handle_buffered_packets(struct dp_packet *pkt_in, +- const struct match *md, bool is_arp) ++pinctrl_handle_buffered_packets(const struct ofputil_packet_in *pin, ++ const struct ofpbuf *continuation, ++ bool is_arp) + OVS_REQUIRES(pinctrl_mutex) + { + struct in6_addr ip; ++ const struct match *md = &pin->flow_metadata; + uint64_t dp_key = ntohll(md->flow.metadata); + uint64_t oport_key = md->flow.regs[MFF_LOG_OUTPORT - MFF_REG0]; + +@@ -1486,20 +1500,7 @@ OVS_REQUIRES(pinctrl_mutex) + return; + } + +- struct ofpbuf ofpacts; +- ofpbuf_init(&ofpacts, 4096); +- reload_metadata(&ofpacts, md); +- /* reload pkt_mark field */ +- const struct mf_field *pkt_mark_field = mf_from_id(MFF_PKT_MARK); +- union mf_value pkt_mark_value; +- mf_get_value(pkt_mark_field, &md->flow, &pkt_mark_value); +- ofpact_put_set_field(&ofpacts, pkt_mark_field, &pkt_mark_value, NULL); +- +- struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); +- resubmit->in_port = OFPP_CONTROLLER; +- resubmit->table_id = OFTABLE_OUTPUT_INIT; +- +- struct packet_data *pd = ovn_packet_data_create(ofpacts, pkt_in); ++ struct packet_data *pd = ovn_packet_data_create(pin, continuation); + ovn_buffered_packets_packet_data_enqueue(bp, pd); + + /* There is a chance that the MAC binding was already created. */ +@@ -1509,8 +1510,8 @@ OVS_REQUIRES(pinctrl_mutex) + /* Called with in the pinctrl_handler thread context. */ + static void + pinctrl_handle_arp(struct rconn *swconn, const struct flow *ip_flow, +- struct dp_packet *pkt_in, +- const struct match *md, struct ofpbuf *userdata) ++ const struct ofputil_packet_in *pin, ++ struct ofpbuf *userdata, const struct ofpbuf *continuation) + { + /* This action only works for IP packets, and the switch should only send + * us IP packets this way, but check here just to be sure. */ +@@ -1522,7 +1523,7 @@ pinctrl_handle_arp(struct rconn *swconn, const struct flow *ip_flow, + } + + ovs_mutex_lock(&pinctrl_mutex); +- pinctrl_handle_buffered_packets(pkt_in, md, true); ++ pinctrl_handle_buffered_packets(pin, continuation, true); + ovs_mutex_unlock(&pinctrl_mutex); + + /* Compose an ARP packet. */ +@@ -1547,7 +1548,8 @@ pinctrl_handle_arp(struct rconn *swconn, const struct flow *ip_flow, + ip_flow->vlans[0].tci); + } + +- set_actions_and_enqueue_msg(swconn, &packet, md, userdata); ++ set_actions_and_enqueue_msg(swconn, &packet, ++ &pin->flow_metadata, userdata); + dp_packet_uninit(&packet); + } + +@@ -3200,8 +3202,7 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg) + + switch (ntohl(ah->opcode)) { + case ACTION_OPCODE_ARP: +- pinctrl_handle_arp(swconn, &headers, &packet, &pin.flow_metadata, +- &userdata); ++ pinctrl_handle_arp(swconn, &headers, &pin, &userdata, &continuation); + break; + case ACTION_OPCODE_IGMP: + pinctrl_ip_mcast_handle(swconn, &headers, &packet, &pin.flow_metadata, +@@ -3267,8 +3268,7 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg) + break; + + case ACTION_OPCODE_ND_NS: +- pinctrl_handle_nd_ns(swconn, &headers, &packet, &pin.flow_metadata, +- &userdata); ++ pinctrl_handle_nd_ns(swconn, &headers, &pin, &userdata, &continuation); + break; + + case ACTION_OPCODE_ICMP: +@@ -3519,13 +3519,14 @@ pinctrl_handler(void *arg_) + + rconn_run_wait(swconn); + rconn_recv_wait(swconn); +- send_garp_rarp_wait(send_garp_rarp_time); +- ipv6_ra_wait(send_ipv6_ra_time); +- ip_mcast_querier_wait(send_mcast_query_time); +- svc_monitors_wait(svc_monitors_next_run_time); +- ipv6_prefixd_wait(send_prefixd_time); +- bfd_monitor_wait(bfd_time); +- ++ if (rconn_is_connected(swconn)) { ++ send_garp_rarp_wait(send_garp_rarp_time); ++ ipv6_ra_wait(send_ipv6_ra_time); ++ ip_mcast_querier_wait(send_mcast_query_time); ++ svc_monitors_wait(svc_monitors_next_run_time); ++ ipv6_prefixd_wait(send_prefixd_time); ++ bfd_monitor_wait(bfd_time); ++ } + seq_wait(pinctrl_handler_seq, new_seq); + + latch_wait(&pctrl->pinctrl_thread_exit); +@@ -4269,16 +4270,8 @@ send_mac_binding_buffered_pkts(struct rconn *swconn) + + struct packet_data *pd; + LIST_FOR_EACH_POP (pd, node, &buffered_packets_ctx.ready_packets_data) { +- struct ofputil_packet_out po = { +- .packet = dp_packet_data(pd->p), +- .packet_len = dp_packet_size(pd->p), +- .buffer_id = UINT32_MAX, +- .ofpacts = pd->ofpacts.data, +- .ofpacts_len = pd->ofpacts.size, +- }; +- match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); +- queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); +- ++ queue_msg(swconn, ofputil_encode_resume(&pd->pin, pd->continuation, ++ proto)); + ovn_packet_data_destroy(pd); + } + +@@ -4592,7 +4585,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn, + garp_rarp->announce_time = time_msec() + 1000; + garp_rarp->backoff = 1000; /* msec. */ + } +- } else { ++ } else if (ovnsb_idl_txn) { + add_garp_rarp(name, laddrs->ea, + laddrs->ipv4_addrs[i].addr, + binding_rec->datapath->tunnel_key, +@@ -6212,8 +6205,9 @@ pinctrl_handle_nd_na(struct rconn *swconn, const struct flow *ip_flow, + /* Called with in the pinctrl_handler thread context. */ + static void + pinctrl_handle_nd_ns(struct rconn *swconn, const struct flow *ip_flow, +- struct dp_packet *pkt_in, +- const struct match *md, struct ofpbuf *userdata) ++ const struct ofputil_packet_in *pin, ++ struct ofpbuf *userdata, ++ const struct ofpbuf *continuation) + { + /* This action only works for IPv6 packets. */ + if (get_dl_type(ip_flow) != htons(ETH_TYPE_IPV6)) { +@@ -6223,7 +6217,7 @@ pinctrl_handle_nd_ns(struct rconn *swconn, const struct flow *ip_flow, + } + + ovs_mutex_lock(&pinctrl_mutex); +- pinctrl_handle_buffered_packets(pkt_in, md, false); ++ pinctrl_handle_buffered_packets(pin, continuation, false); + ovs_mutex_unlock(&pinctrl_mutex); + + uint64_t packet_stub[128 / 8]; +@@ -6236,7 +6230,8 @@ pinctrl_handle_nd_ns(struct rconn *swconn, const struct flow *ip_flow, + &ip_flow->ipv6_dst); + + /* Reload previous packet metadata and set actions from userdata. */ +- set_actions_and_enqueue_msg(swconn, &packet, md, userdata); ++ set_actions_and_enqueue_msg(swconn, &packet, ++ &pin->flow_metadata, userdata); + dp_packet_uninit(&packet); + } + +@@ -6529,11 +6524,46 @@ struct put_vport_binding { + uint32_t vport_key; + + uint32_t vport_parent_key; ++ ++ /* This vport record Only relevant if "new_record" is true. */ ++ bool new_record; + }; + + /* Contains "struct put_vport_binding"s. */ + static struct hmap put_vport_bindings; + ++/* ++ * Validate if the vport_binding record that was added ++ * by the pinctrl thread is still relevant and needs ++ * to be updated in the SBDB or not. ++ * ++ * vport_binding record is only relevant and needs to be updated in SB if: ++ * 2. The put_vport_binding:new_record is true: ++ * The new_record will be set to "true" when this vport record is created ++ * by function "pinctrl_handle_bind_vport". ++ * ++ * After the first attempt to bind this vport to the chassis and ++ * virtual_parent by function "run_put_vport_bindings" we will set the ++ * value of vpb:new_record to "false" and keep it in "put_vport_bindings" ++ * ++ * After the second attempt of binding the vpb it will be removed by ++ * this function. ++ * ++ * The above guarantees that we will try to bind the vport twice in ++ * a certain amount of time. ++ * ++*/ ++static bool ++is_vport_binding_relevant(struct put_vport_binding *vpb) ++{ ++ ++ if (vpb->new_record) { ++ vpb->new_record = false; ++ return true; ++ } ++ return false; ++} ++ + static void + init_put_vport_bindings(void) + { +@@ -6541,18 +6571,21 @@ init_put_vport_bindings(void) + } + + static void +-flush_put_vport_bindings(void) ++flush_put_vport_bindings(bool force_flush) + { + struct put_vport_binding *vport_b; +- HMAP_FOR_EACH_POP (vport_b, hmap_node, &put_vport_bindings) { +- free(vport_b); ++ HMAP_FOR_EACH_SAFE (vport_b, hmap_node, &put_vport_bindings) { ++ if (!is_vport_binding_relevant(vport_b) || force_flush) { ++ hmap_remove(&put_vport_bindings, &vport_b->hmap_node); ++ free(vport_b); ++ } + } + } + + static void + destroy_put_vport_bindings(void) + { +- flush_put_vport_bindings(); ++ flush_put_vport_bindings(true); + hmap_destroy(&put_vport_bindings); + } + +@@ -6630,7 +6663,7 @@ run_put_vport_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn, + sbrec_port_binding_by_key, chassis, vpb); + } + +- flush_put_vport_bindings(); ++ flush_put_vport_bindings(false); + } + + /* Called with in the pinctrl_handler thread context. */ +@@ -6668,7 +6701,7 @@ pinctrl_handle_bind_vport( + vpb->dp_key = dp_key; + vpb->vport_key = vport_key; + vpb->vport_parent_key = vport_parent_key; +- ++ vpb->new_record = true; + notify_pinctrl_main(); + } + diff --git a/debian/changelog b/debian/changelog index 2f20941be..acf278a15 100644 --- a/debian/changelog @@ -55,3 +859,5264 @@ index 2f20941be..acf278a15 100644 OVN (24.03.1-1) unstable; urgency=low [ OVN team ] * New upstream version +diff --git a/include/ovn/actions.h b/include/ovn/actions.h +index 49fb96fc6..f0d39f147 100644 +--- a/include/ovn/actions.h ++++ b/include/ovn/actions.h +@@ -67,6 +67,7 @@ struct collector_set_ids; + OVNACT(CT_NEXT, ovnact_ct_next) \ + OVNACT(CT_COMMIT_V1, ovnact_ct_commit_v1) \ + OVNACT(CT_COMMIT_V2, ovnact_nest) \ ++ OVNACT(CT_COMMIT_TO_ZONE, ovnact_ct_commit_to_zone) \ + OVNACT(CT_DNAT, ovnact_ct_nat) \ + OVNACT(CT_SNAT, ovnact_ct_nat) \ + OVNACT(CT_DNAT_IN_CZONE, ovnact_ct_nat) \ +@@ -75,7 +76,7 @@ struct collector_set_ids; + OVNACT(CT_LB_MARK, ovnact_ct_lb) \ + OVNACT(SELECT, ovnact_select) \ + OVNACT(CT_CLEAR, ovnact_null) \ +- OVNACT(CT_COMMIT_NAT, ovnact_ct_commit_nat) \ ++ OVNACT(CT_COMMIT_NAT, ovnact_ct_commit_to_zone) \ + OVNACT(CLONE, ovnact_nest) \ + OVNACT(ARP, ovnact_nest) \ + OVNACT(ICMP4, ovnact_nest) \ +@@ -296,11 +297,12 @@ struct ovnact_ct_nat { + uint8_t ltable; /* Logical table ID of next table. */ + }; + +-/* OVNACT_CT_COMMIT_NAT. */ +-struct ovnact_ct_commit_nat { ++/* OVNACT_CT_COMMIT_TO_ZONE, OVNACT_CT_COMMIT_NAT. */ ++struct ovnact_ct_commit_to_zone { + struct ovnact ovnact; + + bool dnat_zone; ++ bool do_nat; + uint8_t ltable; + }; + +@@ -847,6 +849,9 @@ struct ovnact_encode_params { + /* The datapath key. */ + uint32_t dp_key; + ++ /* Indication if we should add explicit output after arp/nd_ns action. */ ++ bool explicit_arp_ns_output; ++ + /* OVN maps each logical flow table (ltable), one-to-one, onto a physical + * OpenFlow flow table (ptable). A number of parameters describe this + * mapping and data related to flow tables: +diff --git a/include/ovn/features.h b/include/ovn/features.h +index 08f1d8288..35a5d8ba0 100644 +--- a/include/ovn/features.h ++++ b/include/ovn/features.h +@@ -28,6 +28,7 @@ + #define OVN_FEATURE_FDB_TIMESTAMP "fdb-timestamp" + #define OVN_FEATURE_LS_DPG_COLUMN "ls-dpg-column" + #define OVN_FEATURE_CT_COMMIT_NAT_V2 "ct-commit-nat-v2" ++#define OVN_FEATURE_CT_COMMIT_TO_ZONE "ct-commit-to-zone" + + /* OVS datapath supported features. Based on availability OVN might generate + * different types of openflows. +diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h +index ce79b501c..8854dae7a 100644 +--- a/include/ovn/logical-fields.h ++++ b/include/ovn/logical-fields.h +@@ -82,6 +82,7 @@ enum mff_log_flags_bits { + MLF_LOCALNET_BIT = 15, + MLF_RX_FROM_TUNNEL_BIT = 16, + MLF_ICMP_SNAT_BIT = 17, ++ MLF_IGMP_IGMP_SNOOP_INJECT_BIT = 18, + }; + + /* MFF_LOG_FLAGS_REG flag assignments */ +@@ -137,6 +138,8 @@ enum mff_log_flags { + MLF_RX_FROM_TUNNEL = (1 << MLF_RX_FROM_TUNNEL_BIT), + + MLF_ICMP_SNAT = (1 << MLF_ICMP_SNAT_BIT), ++ ++ MLF_IGMP_IGMP_SNOOP = (1 << MLF_IGMP_IGMP_SNOOP_INJECT_BIT), + }; + + /* OVN logical fields +diff --git a/lib/acl-log.c b/lib/acl-log.c +index 9530dd763..b3eb4bbd0 100644 +--- a/lib/acl-log.c ++++ b/lib/acl-log.c +@@ -34,7 +34,9 @@ log_verdict_to_string(uint8_t verdict) + return "drop"; + } else if (verdict == LOG_VERDICT_REJECT) { + return "reject"; +- } else { ++ } else if (verdict == LOG_VERDICT_PASS) { ++ return "pass"; ++ } else { + return ""; + } + } +diff --git a/lib/acl-log.h b/lib/acl-log.h +index da7fa2f02..3973a8e0b 100644 +--- a/lib/acl-log.h ++++ b/lib/acl-log.h +@@ -33,6 +33,7 @@ enum log_verdict { + LOG_VERDICT_ALLOW, + LOG_VERDICT_DROP, + LOG_VERDICT_REJECT, ++ LOG_VERDICT_PASS, + LOG_VERDICT_UNKNOWN = UINT8_MAX + }; + +diff --git a/lib/actions.c b/lib/actions.c +index a45874dfb..2d9629209 100644 +--- a/lib/actions.c ++++ b/lib/actions.c +@@ -985,6 +985,45 @@ parse_ct_nat(struct action_context *ctx, const char *name, + } + } + ++static void ++parse_ct_commit_to_zone(struct action_context *ctx, const char *name, ++ bool do_nat, bool require_param, ++ struct ovnact_ct_commit_to_zone *cn) ++{ ++ add_prerequisite(ctx, "ip"); ++ ++ if (ctx->pp->cur_ltable >= ctx->pp->n_tables) { ++ lexer_error(ctx->lexer, ++ "\"%s\" action not allowed in last table.", name); ++ return; ++ } ++ ++ cn->ltable = ctx->pp->cur_ltable + 1; ++ cn->do_nat = do_nat; ++ cn->dnat_zone = true; ++ ++ if (require_param) { ++ lexer_force_match(ctx->lexer, LEX_T_LPAREN); ++ } else { ++ if (!lexer_match(ctx->lexer, LEX_T_LPAREN)) { ++ return; ++ } ++ } ++ ++ if (lexer_match_id(ctx->lexer, "dnat")) { ++ cn->dnat_zone = true; ++ } else if (lexer_match_id(ctx->lexer, "snat")) { ++ cn->dnat_zone = false; ++ } else { ++ lexer_error(ctx->lexer, "\"%s\" action accepts" ++ " only \"dnat\" or \"snat\" parameter.", name); ++ return; ++ } ++ ++ lexer_force_match(ctx->lexer, LEX_T_RPAREN); ++} ++ ++ + static void + parse_CT_DNAT(struct action_context *ctx) + { +@@ -1016,33 +1055,17 @@ parse_CT_SNAT_IN_CZONE(struct action_context *ctx) + static void + parse_CT_COMMIT_NAT(struct action_context *ctx) + { +- add_prerequisite(ctx, "ip"); +- +- if (ctx->pp->cur_ltable >= ctx->pp->n_tables) { +- lexer_error(ctx->lexer, +- "\"ct_commit_nat\" action not allowed in last table."); +- return; +- } +- +- struct ovnact_ct_commit_nat *cn = ovnact_put_CT_COMMIT_NAT(ctx->ovnacts); +- cn->ltable = ctx->pp->cur_ltable + 1; +- cn->dnat_zone = true; +- +- if (!lexer_match(ctx->lexer, LEX_T_LPAREN)) { +- return; +- } +- +- if (lexer_match_id(ctx->lexer, "dnat")) { +- cn->dnat_zone = true; +- } else if (lexer_match_id(ctx->lexer, "snat")) { +- cn->dnat_zone = false; +- } else { +- lexer_error(ctx->lexer, "\"ct_commit_nat\" action accepts" +- " only \"dnat\" or \"snat\" parameter."); +- return; +- } ++ parse_ct_commit_to_zone(ctx, "ct_commit_nat", ++ true, false, ++ ovnact_put_CT_COMMIT_NAT(ctx->ovnacts)); ++} + +- lexer_force_match(ctx->lexer, LEX_T_RPAREN); ++static void ++parse_CT_COMMIT_TO_ZONE(struct action_context *ctx) ++{ ++ parse_ct_commit_to_zone(ctx, "ct_commit_to_zone", ++ false, true, ++ ovnact_put_CT_COMMIT_TO_ZONE(ctx->ovnacts)); + } + + static void +@@ -1095,12 +1118,20 @@ format_CT_SNAT_IN_CZONE(const struct ovnact_ct_nat *cn, struct ds *s) + } + + static void +-format_CT_COMMIT_NAT(const struct ovnact_ct_commit_nat *cn, struct ds *s) ++format_CT_COMMIT_NAT(const struct ovnact_ct_commit_to_zone *cn, struct ds *s) + { + ds_put_cstr(s, "ct_commit_nat"); + ds_put_cstr(s, cn->dnat_zone ? "(dnat);" : "(snat);"); + } + ++static void ++format_CT_COMMIT_TO_ZONE(const struct ovnact_ct_commit_to_zone *cn, ++ struct ds *s) ++{ ++ ds_put_cstr(s, "ct_commit_to_zone"); ++ ds_put_cstr(s, cn->dnat_zone ? "(dnat);" : "(snat);"); ++} ++ + static void + encode_ct_nat(const struct ovnact_ct_nat *cn, + const struct ovnact_encode_params *ep, +@@ -1170,6 +1201,39 @@ encode_ct_nat(const struct ovnact_ct_nat *cn, + ofpbuf_push_uninit(ofpacts, ct_offset); + } + ++static void ++encode_ct_commit_to_zone(const struct ovnact_ct_commit_to_zone *cn, ++ const struct ovnact_encode_params *ep, ++ struct ofpbuf *ofpacts) ++{ ++ const size_t ct_offset = ofpacts->size; ++ ++ struct ofpact_conntrack *ct = ofpact_put_CT(ofpacts); ++ ct->recirc_table = cn->ltable + first_ptable(ep, ep->pipeline); ++ ct->zone_src.ofs = 0; ++ ct->zone_src.n_bits = 16; ++ ct->flags = NX_CT_F_COMMIT; ++ ct->alg = 0; ++ ++ if (ep->is_switch) { ++ ct->zone_src.field = mf_from_id(MFF_LOG_CT_ZONE); ++ } else { ++ ct->zone_src.field = mf_from_id(cn->dnat_zone ++ ? MFF_LOG_DNAT_ZONE ++ : MFF_LOG_SNAT_ZONE); ++ } ++ ++ if (cn->do_nat) { ++ struct ofpact_nat *nat = ofpact_put_NAT(ofpacts); ++ nat->range_af = AF_UNSPEC; ++ nat->flags = 0; ++ } ++ ++ ct = ofpbuf_at_assert(ofpacts, ct_offset, sizeof *ct); ++ ofpacts->header = ct; ++ ofpact_finish_CT(ofpacts, &ct); ++} ++ + static void + encode_CT_DNAT(const struct ovnact_ct_nat *cn, + const struct ovnact_encode_params *ep, +@@ -1203,34 +1267,19 @@ encode_CT_SNAT_IN_CZONE(const struct ovnact_ct_nat *cn, + } + + static void +-encode_CT_COMMIT_NAT(const struct ovnact_ct_commit_nat *cn, ++encode_CT_COMMIT_NAT(const struct ovnact_ct_commit_to_zone *cn, + const struct ovnact_encode_params *ep, + struct ofpbuf *ofpacts) + { +- const size_t ct_offset = ofpacts->size; +- +- struct ofpact_conntrack *ct = ofpact_put_CT(ofpacts); +- ct->recirc_table = cn->ltable + first_ptable(ep, ep->pipeline); +- ct->zone_src.ofs = 0; +- ct->zone_src.n_bits = 16; +- ct->flags = NX_CT_F_COMMIT; +- ct->alg = 0; +- +- if (ep->is_switch) { +- ct->zone_src.field = mf_from_id(MFF_LOG_CT_ZONE); +- } else { +- ct->zone_src.field = mf_from_id(cn->dnat_zone +- ? MFF_LOG_DNAT_ZONE +- : MFF_LOG_SNAT_ZONE); +- } +- +- struct ofpact_nat *nat = ofpact_put_NAT(ofpacts); +- nat->range_af = AF_UNSPEC; +- nat->flags = 0; ++ encode_ct_commit_to_zone(cn, ep, ofpacts); ++} + +- ct = ofpbuf_at_assert(ofpacts, ct_offset, sizeof *ct); +- ofpacts->header = ct; +- ofpact_finish_CT(ofpacts, &ct); ++static void ++encode_CT_COMMIT_TO_ZONE(const struct ovnact_ct_commit_to_zone *cn, ++ const struct ovnact_encode_params *ep, ++ struct ofpbuf *ofpacts) ++{ ++ encode_ct_commit_to_zone(cn, ep, ofpacts); + } + + static void +@@ -1239,7 +1288,7 @@ ovnact_ct_nat_free(struct ovnact_ct_nat *ct_nat OVS_UNUSED) + } + + static void +-ovnact_ct_commit_nat_free(struct ovnact_ct_commit_nat *cn OVS_UNUSED) ++ovnact_ct_commit_to_zone_free(struct ovnact_ct_commit_to_zone *cn OVS_UNUSED) + { + } + +@@ -1951,6 +2000,44 @@ format_REJECT(const struct ovnact_nest *nest, struct ds *s) + format_nested_action(nest, "reject", s); + } + ++static bool ++is_paused_nested_action(enum action_opcode opcode) ++{ ++ switch (opcode) { ++ case ACTION_OPCODE_ARP: ++ case ACTION_OPCODE_ND_NS: ++ return true; ++ case ACTION_OPCODE_IGMP: ++ case ACTION_OPCODE_PUT_ARP: ++ case ACTION_OPCODE_PUT_DHCP_OPTS: ++ case ACTION_OPCODE_ND_NA: ++ case ACTION_OPCODE_ND_NA_ROUTER: ++ case ACTION_OPCODE_PUT_ND: ++ case ACTION_OPCODE_PUT_FDB: ++ case ACTION_OPCODE_PUT_DHCPV6_OPTS: ++ case ACTION_OPCODE_DNS_LOOKUP: ++ case ACTION_OPCODE_LOG: ++ case ACTION_OPCODE_PUT_ND_RA_OPTS: ++ case ACTION_OPCODE_ICMP: ++ case ACTION_OPCODE_ICMP4_ERROR: ++ case ACTION_OPCODE_ICMP6_ERROR: ++ case ACTION_OPCODE_TCP_RESET: ++ case ACTION_OPCODE_SCTP_ABORT: ++ case ACTION_OPCODE_REJECT: ++ case ACTION_OPCODE_PUT_ICMP4_FRAG_MTU: ++ case ACTION_OPCODE_PUT_ICMP6_FRAG_MTU: ++ case ACTION_OPCODE_EVENT: ++ case ACTION_OPCODE_BIND_VPORT: ++ case ACTION_OPCODE_DHCP6_SERVER: ++ case ACTION_OPCODE_HANDLE_SVC_CHECK: ++ case ACTION_OPCODE_BFD_MSG: ++ case ACTION_OPCODE_ACTIVATION_STRATEGY_RARP: ++ case ACTION_OPCODE_MG_SPLIT_BUF: ++ default: ++ return false; ++ } ++} ++ + static void + encode_nested_actions(const struct ovnact_nest *on, + const struct ovnact_encode_params *ep, +@@ -1966,7 +2053,8 @@ encode_nested_actions(const struct ovnact_nest *on, + * converted to OpenFlow, as its userdata. ovn-controller will convert the + * packet to ARP or NA and then send the packet and actions back to the + * switch inside an OFPT_PACKET_OUT message. */ +- size_t oc_offset = encode_start_controller_op(opcode, false, ++ bool pause = is_paused_nested_action(opcode); ++ size_t oc_offset = encode_start_controller_op(opcode, pause, + ep->ctrl_meter_id, ofpacts); + ofpacts_put_openflow_actions(inner_ofpacts.data, inner_ofpacts.size, + ofpacts, OFP15_VERSION); +@@ -1982,6 +2070,9 @@ encode_ARP(const struct ovnact_nest *on, + struct ofpbuf *ofpacts) + { + encode_nested_actions(on, ep, ACTION_OPCODE_ARP, ofpacts); ++ if (!ep->explicit_arp_ns_output) { ++ emit_resubmit(ofpacts, ep->output_ptable); ++ } + } + + static void +@@ -2070,6 +2161,9 @@ encode_ND_NS(const struct ovnact_nest *on, + struct ofpbuf *ofpacts) + { + encode_nested_actions(on, ep, ACTION_OPCODE_ND_NS, ofpacts); ++ if (!ep->explicit_arp_ns_output) { ++ emit_resubmit(ofpacts, ep->output_ptable); ++ } + } + + static void +@@ -3570,6 +3664,8 @@ parse_log_arg(struct action_context *ctx, struct ovnact_log *log) + log->verdict = LOG_VERDICT_REJECT; + } else if (lexer_match_id(ctx->lexer, "allow")) { + log->verdict = LOG_VERDICT_ALLOW; ++ } else if (lexer_match_id(ctx->lexer, "pass")) { ++ log->verdict = LOG_VERDICT_PASS; + } else { + lexer_syntax_error(ctx->lexer, "unknown verdict"); + return; +@@ -5423,6 +5519,8 @@ parse_action(struct action_context *ctx) + parse_CT_NEXT(ctx); + } else if (lexer_match_id(ctx->lexer, "ct_commit")) { + parse_CT_COMMIT(ctx); ++ } else if (lexer_match_id(ctx->lexer, "ct_commit_to_zone")) { ++ parse_CT_COMMIT_TO_ZONE(ctx); + } else if (lexer_match_id(ctx->lexer, "ct_dnat")) { + parse_CT_DNAT(ctx); + } else if (lexer_match_id(ctx->lexer, "ct_snat")) { +diff --git a/lib/logical-fields.c b/lib/logical-fields.c +index 20219a67a..68892dba5 100644 +--- a/lib/logical-fields.c ++++ b/lib/logical-fields.c +@@ -139,6 +139,10 @@ ovn_init_symtab(struct shash *symtab) + flags_str); + snprintf(flags_str, sizeof flags_str, "flags[%d]", MLF_RX_FROM_TUNNEL_BIT); + expr_symtab_add_subfield(symtab, "flags.tunnel_rx", NULL, flags_str); ++ snprintf(flags_str, sizeof flags_str, "flags[%d]", ++ MLF_IGMP_IGMP_SNOOP_INJECT_BIT); ++ expr_symtab_add_subfield(symtab, "flags.igmp_loopback", NULL, ++ flags_str); + + /* Connection tracking state. */ + expr_symtab_add_field_scoped(symtab, "ct_mark", MFF_CT_MARK, NULL, false, +diff --git a/lib/ovn-l7.c b/lib/ovn-l7.c +index 3a5f3f3ec..2ddb68cb0 100644 +--- a/lib/ovn-l7.c ++++ b/lib/ovn-l7.c +@@ -18,6 +18,8 @@ + + #include "ovn-l7.h" + ++const struct in6_addr in6addr_all_dhcp_agents = IN6ADDR_ALL_DHCP_AGENTS_INIT; ++ + bool + ipv6_addr_is_routable_multicast(const struct in6_addr *ip) + { +diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h +index ad514a922..f4a30cc00 100644 +--- a/lib/ovn-l7.h ++++ b/lib/ovn-l7.h +@@ -305,6 +305,12 @@ BUILD_ASSERT_DECL(DHCP_OPT_HEADER_LEN == sizeof(struct dhcp_opt_header)); + #define DHCPV6_FQDN_FLAGS_O 1 << 1 + #define DHCPV6_FQDN_FLAGS_N 1 << 2 + ++extern const struct in6_addr in6addr_all_dhcp_agents; ++#define IN6ADDR_ALL_DHCP_AGENTS_INIT { { { 0xff,0x02,0x00,0x00,0x00,0x00, \ ++ 0x00,0x00,0x00,0x00,0x00,0x00, \ ++ 0x00,0x01,0x00,0x02 } } } ++ ++ + #define DHCP6_OPT_HEADER_LEN 4 + OVS_PACKED( + struct dhcpv6_opt_header { +diff --git a/lib/ovn-util.c b/lib/ovn-util.c +index ee5cbcdc3..9f97ae2ca 100644 +--- a/lib/ovn-util.c ++++ b/lib/ovn-util.c +@@ -693,13 +693,17 @@ uint32_t + ovn_allocate_tnlid(struct hmap *set, const char *name, uint32_t min, + uint32_t max, uint32_t *hint) + { +- for (uint32_t tnlid = next_tnlid(*hint, min, max); tnlid != *hint; +- tnlid = next_tnlid(tnlid, min, max)) { ++ /* Normalize hint, because it can be outside of [min, max]. */ ++ *hint = next_tnlid(*hint, min, max); ++ ++ uint32_t tnlid = *hint; ++ do { + if (ovn_add_tnlid(set, tnlid)) { + *hint = tnlid; + return tnlid; + } +- } ++ tnlid = next_tnlid(tnlid, min, max); ++ } while (tnlid != *hint); + + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name); +diff --git a/northd/en-global-config.c b/northd/en-global-config.c +index 34e393b33..28c78a12c 100644 +--- a/northd/en-global-config.c ++++ b/northd/en-global-config.c +@@ -370,6 +370,7 @@ northd_enable_all_features(struct ed_type_global_config *data) + .fdb_timestamp = true, + .ls_dpg_column = true, + .ct_commit_nat_v2 = true, ++ .ct_commit_to_zone = true, + }; + } + +@@ -439,6 +440,15 @@ build_chassis_features(const struct sbrec_chassis_table *sbrec_chassis_table, + chassis_features->ct_commit_nat_v2) { + chassis_features->ct_commit_nat_v2 = false; + } ++ ++ bool ct_commit_to_zone = ++ smap_get_bool(&chassis->other_config, ++ OVN_FEATURE_CT_COMMIT_TO_ZONE, ++ false); ++ if (!ct_commit_to_zone && ++ chassis_features->ct_commit_to_zone) { ++ chassis_features->ct_commit_to_zone = false; ++ } + } + } + +@@ -553,6 +563,10 @@ update_sb_config_options_to_sbrec(struct ed_type_global_config *config_data, + smap_replace(options, "sbctl_probe_interval", sip); + } + ++ /* Adds indication that northd is handling explicit output after ++ * arp/nd_ns action. */ ++ smap_add(options, "arp_ns_explicit_output", "true"); ++ + if (!smap_equal(&sb->options, options)) { + sbrec_sb_global_set_options(sb, options); + } +diff --git a/northd/en-global-config.h b/northd/en-global-config.h +index 38d732808..842bcee70 100644 +--- a/northd/en-global-config.h ++++ b/northd/en-global-config.h +@@ -20,6 +20,7 @@ struct chassis_features { + bool fdb_timestamp; + bool ls_dpg_column; + bool ct_commit_nat_v2; ++ bool ct_commit_to_zone; + }; + + struct global_config_tracked_data { +diff --git a/northd/northd.c b/northd/northd.c +index 0f95578a3..8f20c4be3 100644 +--- a/northd/northd.c ++++ b/northd/northd.c +@@ -4668,6 +4668,11 @@ fail: + static bool + lr_changes_can_be_handled(const struct nbrec_logical_router *lr) + { ++ /* We can't do I-P processing when the router is disabled. */ ++ if (!lrouter_is_enabled(lr)) { ++ return false; ++ } ++ + /* Check if the columns are changed in this row. */ + enum nbrec_logical_router_column_id col; + for (col = 0; col < NBREC_LOGICAL_ROUTER_N_COLUMNS; col++) { +@@ -5357,11 +5362,13 @@ ovn_igmp_group_get_ports(const struct sbrec_igmp_group *sb_igmp_group, + continue; + } + +- /* If this is already a port of a router on which relay is enabled, +- * skip it for the group. Traffic is flooded there anyway. ++ /* If this is already a port of a router on which relay is enabled ++ * and it's not a transit switch to router port, skip it for the ++ * group. Traffic is flooded there anyway. + */ + if (port->peer && port->peer->od && +- port->peer->od->mcast_info.rtr.relay) { ++ port->peer->od->mcast_info.rtr.relay && ++ !ovn_datapath_is_transit_switch(port->od)) { + continue; + } + +@@ -6069,7 +6076,8 @@ build_interconn_mcast_snoop_flows(struct ovn_datapath *od, + continue; + } + /* Punt IGMP traffic to controller. */ +- char *match = xasprintf("inport == %s && igmp", op->json_key); ++ char *match = xasprintf("inport == %s && igmp && " ++ "flags.igmp_loopback == 0", op->json_key); + ovn_lflow_metered(lflows, od, S_SWITCH_OUT_PRE_LB, 120, match, + "clone { igmp; }; next;", + copp_meter_get(COPP_IGMP, od->nbs->copp, +@@ -6078,7 +6086,8 @@ build_interconn_mcast_snoop_flows(struct ovn_datapath *od, + free(match); + + /* Punt MLD traffic to controller. */ +- match = xasprintf("inport == %s && (mldv1 || mldv2)", op->json_key); ++ match = xasprintf("inport == %s && (mldv1 || mldv2) && " ++ "flags.igmp_loopback == 0", op->json_key); + ovn_lflow_metered(lflows, od, S_SWITCH_OUT_PRE_LB, 120, match, + "clone { igmp; }; next;", + copp_meter_get(COPP_IGMP, od->nbs->copp, +@@ -9263,14 +9272,15 @@ build_lswitch_destination_lookup_bmcast(struct ovn_datapath *od, + ds_put_cstr(actions, "igmp;"); + /* Punt IGMP traffic to controller. */ + ovn_lflow_metered(lflows, od, S_SWITCH_IN_L2_LKUP, 100, +- "igmp", ds_cstr(actions), ++ "flags.igmp_loopback == 0 && igmp", ds_cstr(actions), + copp_meter_get(COPP_IGMP, od->nbs->copp, + meter_groups), + lflow_ref); + + /* Punt MLD traffic to controller. */ + ovn_lflow_metered(lflows, od, S_SWITCH_IN_L2_LKUP, 100, +- "mldv1 || mldv2", ds_cstr(actions), ++ "flags.igmp_loopback == 0 && (mldv1 || mldv2)", ++ ds_cstr(actions), + copp_meter_get(COPP_IGMP, od->nbs->copp, + meter_groups), + lflow_ref); +@@ -9338,8 +9348,16 @@ build_lswitch_destination_lookup_bmcast(struct ovn_datapath *od, + } + + +-/* Ingress table 25: Add IP multicast flows learnt from IGMP/MLD +- * (priority 90). */ ++/* Ingress table 27: Add IP multicast flows learnt from IGMP/MLD ++ * (priority 90). ++ * ++ * OR, for transit switches: ++ * ++ * Add IP multicast flows learnt from IGMP/MLD to forward traffic ++ * explicitly to the ports that are part of the IGMP/MLD group, ++ * and ignore MROUTER Ports. ++ * (priority 90). ++ */ + static void + build_lswitch_ip_mcast_igmp_mld(struct ovn_igmp_group *igmp_group, + struct lflow_table *lflows, +@@ -9353,6 +9371,9 @@ build_lswitch_ip_mcast_igmp_mld(struct ovn_igmp_group *igmp_group, + ds_clear(match); + ds_clear(actions); + ++ bool transit_switch = ++ ovn_datapath_is_transit_switch(igmp_group->datapath); ++ + struct mcast_switch_info *mcast_sw_info = + &igmp_group->datapath->mcast_info.sw; + uint64_t table_size = mcast_sw_info->table_size; +@@ -9398,7 +9419,7 @@ build_lswitch_ip_mcast_igmp_mld(struct ovn_igmp_group *igmp_group, + } + + /* Also flood traffic to all multicast routers with relay enabled. */ +- if (mcast_sw_info->flood_relay) { ++ if (mcast_sw_info->flood_relay && !transit_switch) { + ds_put_cstr(actions, + "clone { " + "outport = \""MC_MROUTER_FLOOD "\"; " +@@ -10059,19 +10080,30 @@ build_ecmp_routing_policy_flows(struct lflow_table *lflows, + lflow_ref); + } + ++ if (!n_valid_nexthops) { ++ goto cleanup; ++ } ++ + ds_clear(&actions); +- ds_put_format(&actions, "%s = %"PRIu16 +- "; %s = select(", REG_ECMP_GROUP_ID, ecmp_group_id, +- REG_ECMP_MEMBER_ID); ++ if (n_valid_nexthops > 1) { ++ ds_put_format(&actions, "%s = %"PRIu16 ++ "; %s = select(", REG_ECMP_GROUP_ID, ecmp_group_id, ++ REG_ECMP_MEMBER_ID); ++ ++ for (size_t i = 0; i < n_valid_nexthops; i++) { ++ if (i > 0) { ++ ds_put_cstr(&actions, ", "); ++ } + +- for (size_t i = 0; i < n_valid_nexthops; i++) { +- if (i > 0) { +- ds_put_cstr(&actions, ", "); ++ ds_put_format(&actions, "%"PRIuSIZE, valid_nexthops[i]); + } +- +- ds_put_format(&actions, "%"PRIuSIZE, valid_nexthops[i]); ++ ds_put_cstr(&actions, ");"); ++ } else { ++ ds_put_format(&actions, "%s = %"PRIu16 ++ "; %s = %"PRIuSIZE"; next;", REG_ECMP_GROUP_ID, ++ ecmp_group_id, REG_ECMP_MEMBER_ID, ++ valid_nexthops[0]); + } +- ds_put_cstr(&actions, ");"); + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY, + rule->priority, rule->match, + ds_cstr(&actions), &rule->header_, +@@ -11349,10 +11381,11 @@ copy_ra_to_sb(struct ovn_port *op, const char *address_mode) + ds_put_format(&s, "%s/%u ", addrs->network_s, addrs->plen); + } + +- const char *ra_pd_list = smap_get(&op->sb->options, "ipv6_ra_pd_list"); +- if (ra_pd_list) { +- ds_put_cstr(&s, ra_pd_list); ++ for (size_t i = 0; i < op->nbrp->n_ipv6_prefix; i++) { ++ ds_put_cstr(&s, op->nbrp->ipv6_prefix[i]); ++ ds_put_char(&s, ' '); + } ++ + /* Remove trailing space */ + ds_chomp(&s, ' '); + smap_add(&options, "ipv6_ra_prefixes", ds_cstr(&s)); +@@ -13381,7 +13414,7 @@ build_arp_request_flows_for_lrouter( + "ip6.dst = %s; " + "nd.target = %s; " + "output; " +- "};", ETH_ADDR_ARGS(eth_dst), sn_addr_s, ++ "}; output;", ETH_ADDR_ARGS(eth_dst), sn_addr_s, + route->nexthop); + + ovn_lflow_add_with_hint__(lflows, od, S_ROUTER_IN_ARP_REQUEST, 200, +@@ -13401,7 +13434,7 @@ build_arp_request_flows_for_lrouter( + "arp.tpa = " REG_NEXT_HOP_IPV4 "; " + "arp.op = 1; " /* ARP request */ + "output; " +- "};", ++ "}; output;", + copp_meter_get(COPP_ARP_RESOLVE, od->nbr->copp, + meter_groups), + lflow_ref); +@@ -13410,7 +13443,7 @@ build_arp_request_flows_for_lrouter( + "nd_ns { " + "nd.target = " REG_NEXT_HOP_IPV6 "; " + "output; " +- "};", ++ "}; output;", + copp_meter_get(COPP_ND_NS_RESOLVE, od->nbr->copp, + meter_groups), + lflow_ref); +@@ -14385,19 +14418,26 @@ build_lrouter_out_is_dnat_local(struct lflow_table *lflows, + static void + build_lrouter_out_snat_match(struct lflow_table *lflows, + const struct ovn_datapath *od, +- const struct nbrec_nat *nat, struct ds *match, +- bool distributed_nat, int cidr_bits, bool is_v6, ++ const struct nbrec_nat *nat, ++ struct ds *match, ++ bool distributed_nat, int cidr_bits, ++ bool is_v6, + struct ovn_port *l3dgw_port, +- struct lflow_ref *lflow_ref) ++ struct lflow_ref *lflow_ref, ++ bool is_reverse) + { + ds_clear(match); + +- ds_put_format(match, "ip && ip%c.src == %s", is_v6 ? '6' : '4', ++ ds_put_format(match, "ip && ip%c.%s == %s", ++ is_v6 ? '6' : '4', ++ is_reverse ? "dst" : "src", + nat->logical_ip); + + if (!od->is_gw_router) { + /* Distributed router. */ +- ds_put_format(match, " && outport == %s", l3dgw_port->json_key); ++ ds_put_format(match, " && %s == %s", ++ is_reverse ? "inport" : "outport", ++ l3dgw_port->json_key); + if (od->n_l3dgw_ports) { + ds_put_format(match, " && is_chassis_resident(\"%s\")", + distributed_nat +@@ -14408,7 +14448,7 @@ build_lrouter_out_snat_match(struct lflow_table *lflows, + + if (nat->allowed_ext_ips || nat->exempted_ext_ips) { + lrouter_nat_add_ext_ip_match(od, lflows, match, nat, +- is_v6, false, cidr_bits, ++ is_v6, is_reverse, cidr_bits, + lflow_ref); + } + } +@@ -14435,7 +14475,8 @@ build_lrouter_out_snat_stateless_flow(struct lflow_table *lflows, + uint16_t priority = cidr_bits + 1; + + build_lrouter_out_snat_match(lflows, od, nat, match, distributed_nat, +- cidr_bits, is_v6, l3dgw_port, lflow_ref); ++ cidr_bits, is_v6, l3dgw_port, lflow_ref, ++ false); + + if (!od->is_gw_router) { + /* Distributed router. */ +@@ -14482,7 +14523,7 @@ build_lrouter_out_snat_in_czone_flow(struct lflow_table *lflows, + + build_lrouter_out_snat_match(lflows, od, nat, match, distributed_nat, + cidr_bits, is_v6, l3dgw_port, +- lflow_ref); ++ lflow_ref, false); + + if (od->n_l3dgw_ports) { + priority += 128; +@@ -14531,7 +14572,8 @@ build_lrouter_out_snat_flow(struct lflow_table *lflows, + struct ds *actions, bool distributed_nat, + struct eth_addr mac, int cidr_bits, bool is_v6, + struct ovn_port *l3dgw_port, +- struct lflow_ref *lflow_ref) ++ struct lflow_ref *lflow_ref, ++ const struct chassis_features *features) + { + if (strcmp(nat->type, "snat") && strcmp(nat->type, "dnat_and_snat")) { + return; +@@ -14545,7 +14587,9 @@ build_lrouter_out_snat_flow(struct lflow_table *lflows, + uint16_t priority = cidr_bits + 1; + + build_lrouter_out_snat_match(lflows, od, nat, match, distributed_nat, +- cidr_bits, is_v6, l3dgw_port, lflow_ref); ++ cidr_bits, is_v6, l3dgw_port, lflow_ref, ++ false); ++ size_t original_match_len = match->length; + + if (!od->is_gw_router) { + /* Distributed router. */ +@@ -14570,6 +14614,35 @@ build_lrouter_out_snat_flow(struct lflow_table *lflows, + priority, ds_cstr(match), + ds_cstr(actions), &nat->header_, + lflow_ref); ++ ++ /* For the SNAT networks, we need to make sure that connections are ++ * properly tracked so we can decide whether to perform SNAT on traffic ++ * exiting the network. */ ++ if (features->ct_commit_to_zone && !strcmp(nat->type, "snat") && ++ !od->is_gw_router) { ++ /* For traffic that comes from SNAT network, initiate CT state before ++ * entering S_ROUTER_OUT_SNAT to allow matching on various CT states. ++ */ ++ ds_truncate(match, original_match_len); ++ ovn_lflow_add(lflows, od, S_ROUTER_OUT_POST_UNDNAT, 70, ++ ds_cstr(match), "ct_snat;", ++ lflow_ref); ++ ++ build_lrouter_out_snat_match(lflows, od, nat, match, ++ distributed_nat, cidr_bits, is_v6, ++ l3dgw_port, lflow_ref, true); ++ ++ /* New traffic that goes into SNAT network is committed to CT to avoid ++ * SNAT-ing replies.*/ ++ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, priority, ++ ds_cstr(match), "ct_snat;", ++ lflow_ref); ++ ++ ds_put_cstr(match, " && ct.new"); ++ ovn_lflow_add(lflows, od, S_ROUTER_OUT_POST_SNAT, priority, ++ ds_cstr(match), "ct_commit_to_zone(snat);", ++ lflow_ref); ++ } + } + + static void +@@ -15108,7 +15181,7 @@ build_lrouter_nat_defrag_and_lb( + } else { + build_lrouter_out_snat_flow(lflows, od, nat, match, actions, + distributed_nat, mac, cidr_bits, is_v6, +- l3dgw_port, lflow_ref); ++ l3dgw_port, lflow_ref, features); + } + + /* S_ROUTER_IN_ADMISSION - S_ROUTER_IN_IP_INPUT */ +@@ -15310,7 +15383,7 @@ build_routable_flows_for_router_port( + } + + if (lrp->nbrp->ha_chassis_group || +- lrp->nbrp->n_gateway_chassis) { ++ lrp->nbrp->n_gateway_chassis || lrp->od->is_gw_router) { + for (size_t j = 0; j < ra.n_addrs; j++) { + struct lport_addresses *laddrs = &ra.laddrs[j]; + for (size_t k = 0; k < laddrs->n_ipv4_addrs; k++) { +diff --git a/northd/northd.h b/northd/northd.h +index 3f1cd8341..5e9fa4745 100644 +--- a/northd/northd.h ++++ b/northd/northd.h +@@ -362,6 +362,12 @@ ovn_datapath_is_stale(const struct ovn_datapath *od) + return !od->nbr && !od->nbs; + }; + ++static inline bool ++ovn_datapath_is_transit_switch(const struct ovn_datapath *od) ++{ ++ return od->tunnel_key >= OVN_MIN_DP_KEY_GLOBAL; ++} ++ + /* Pipeline stages. */ + + /* The two purposes for which ovn-northd uses OVN logical datapaths. */ +diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml +index 9583abeff..474f54017 100644 +--- a/northd/ovn-northd.8.xml ++++ b/northd/ovn-northd.8.xml +@@ -1933,13 +1933,20 @@ output; + + +
  • +- Priority-90 flows that forward registered IP multicast traffic to +- their corresponding multicast group, which ovn-northd +- creates based on learnt +- entries. The flows also forward packets to the +- MC_MROUTER_FLOOD multicast group, which +- ovn-nortdh populates with all the logical ports that +- are connected to logical routers with ++ Priority-90 flows for transit switches that forward registered ++ IP multicast traffic to their corresponding multicast group , which ++ ovn-northd creates based on learnt ++ entries. ++
  • ++ ++
  • ++ Priority-90 flows for non-transit switches that forward registered ++ IP multicast traffic to their corresponding multicast group, which ++ ovn-northd creates based on learnt ++ entries. The flows ++ also forward packets to the MC_MROUTER_FLOOD multicast ++ group, which ovn-nortdh populates with all the logical ++ ports that are connected to logical routers with + :mcast_relay='true'. +
  • + +@@ -4865,6 +4872,13 @@ nd_ns { + +

    +