diff --git a/SOURCES/0001-Support-configuring-Load-Balancer-hairpin-source-IP.patch b/SOURCES/0001-Support-configuring-Load-Balancer-hairpin-source-IP.patch new file mode 100644 index 0000000..f135d78 --- /dev/null +++ b/SOURCES/0001-Support-configuring-Load-Balancer-hairpin-source-IP.patch @@ -0,0 +1,762 @@ +From 8788ac191a4e0689f0287695c181fe1a781b0d31 Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Fri, 15 Jan 2021 19:26:13 +0100 +Subject: [PATCH 1/2] Support configuring Load Balancer hairpin source IP. + +In case traffic that gets load balanced is DNAT-ed to a backend IP that +happens to be the source of the traffic then OVN performs an additional +SNAT to ensure that return traffic is directed through OVN. + +Until now the load balancer VIP was chosen as SNAT IP. However, in +specific scenarios, the CMS may prefer a different IP, e.g., a single +cluster-wide IP. This commit adds support, through the newly added +Load_Balancer.option 'hairpin_snat_ip', to allow the CMS to explicitly +chose a SNAT IP. + +Due to the fact that now traffic that was hairpinned might need to be +SNAT-ed to different IPs for different load balancers that share the +same VIP address value we need to also explicitly match on L4 protocol +and ports in the 'OFTABLE_CT_SNAT_FOR_VIP' table. + +Signed-off-by: Dumitru Ceara +Signed-off-by: Numan Siddique +(cherry picked from upstream commit cc4d5520064f294d2b011be10ec5ff5f1a85bfd0) + +Conflicts: + NEWS + +Change-Id: Ie5ba5d9f3811ee577377e3e2cd700d2949a174da +--- + NEWS | 3 + + controller/lflow.c | 53 +++++++++------ + lib/lb.c | 26 ++++++++ + lib/lb.h | 11 ++++ + lib/ovn-util.c | 21 ++++++ + lib/ovn-util.h | 1 + + northd/ovn-northd.c | 9 +-- + ovn-nb.xml | 8 +++ + ovn-sb.ovsschema | 9 ++- + ovn-sb.xml | 8 +++ + tests/ovn-northd.at | 6 ++ + tests/ovn.at | 182 ++++++++++++++++++++++++++++++++++++++-------------- + 12 files changed, 261 insertions(+), 76 deletions(-) + +diff --git a/NEWS b/NEWS +index e89c5f4..57a9ba9 100644 +--- a/NEWS ++++ b/NEWS +@@ -10,6 +10,9 @@ Post-v20.12.0 + "ovn-installed". This external-id is set by ovn-controller only after all + openflow operations corresponding to the OVS interface being added have + been processed. ++ - Add a new option to Load_Balancer.options, "hairpin_snat_ip", to allow ++ users to explicitly select which source IP should be used for load ++ balancer hairpin traffic. + + OVN v20.12.0 - 18 Dec 2020 + -------------------------- +diff --git a/controller/lflow.c b/controller/lflow.c +index 9f6aece..946c1e0 100644 +--- a/controller/lflow.c ++++ b/controller/lflow.c +@@ -1189,26 +1189,30 @@ add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb, + struct match hairpin_reply_match = MATCH_CATCHALL_INITIALIZER; + + if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { +- ovs_be32 ip4 = in6_addr_get_mapped_ipv4(&lb_backend->ip); ++ ovs_be32 bip4 = in6_addr_get_mapped_ipv4(&lb_backend->ip); ++ ovs_be32 vip4 = lb->hairpin_snat_ips.n_ipv4_addrs ++ ? lb->hairpin_snat_ips.ipv4_addrs[0].addr ++ : in6_addr_get_mapped_ipv4(&lb_vip->vip); + + match_set_dl_type(&hairpin_match, htons(ETH_TYPE_IP)); +- match_set_nw_src(&hairpin_match, ip4); +- match_set_nw_dst(&hairpin_match, ip4); +- +- match_set_dl_type(&hairpin_reply_match, +- htons(ETH_TYPE_IP)); +- match_set_nw_src(&hairpin_reply_match, ip4); +- match_set_nw_dst(&hairpin_reply_match, +- in6_addr_get_mapped_ipv4(&lb_vip->vip)); ++ match_set_nw_src(&hairpin_match, bip4); ++ match_set_nw_dst(&hairpin_match, bip4); ++ ++ match_set_dl_type(&hairpin_reply_match, htons(ETH_TYPE_IP)); ++ match_set_nw_src(&hairpin_reply_match, bip4); ++ match_set_nw_dst(&hairpin_reply_match, vip4); + } else { ++ struct in6_addr *bip6 = &lb_backend->ip; ++ struct in6_addr *vip6 = lb->hairpin_snat_ips.n_ipv6_addrs ++ ? &lb->hairpin_snat_ips.ipv6_addrs[0].addr ++ : &lb_vip->vip; + match_set_dl_type(&hairpin_match, htons(ETH_TYPE_IPV6)); +- match_set_ipv6_src(&hairpin_match, &lb_backend->ip); +- match_set_ipv6_dst(&hairpin_match, &lb_backend->ip); ++ match_set_ipv6_src(&hairpin_match, bip6); ++ match_set_ipv6_dst(&hairpin_match, bip6); + +- match_set_dl_type(&hairpin_reply_match, +- htons(ETH_TYPE_IPV6)); +- match_set_ipv6_src(&hairpin_reply_match, &lb_backend->ip); +- match_set_ipv6_dst(&hairpin_reply_match, &lb_vip->vip); ++ match_set_dl_type(&hairpin_reply_match, htons(ETH_TYPE_IPV6)); ++ match_set_ipv6_src(&hairpin_reply_match, bip6); ++ match_set_ipv6_dst(&hairpin_reply_match, vip6); + } + + if (lb_backend->port) { +@@ -1254,6 +1258,7 @@ add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb, + static void + add_lb_ct_snat_vip_flows(struct ovn_controller_lb *lb, + struct ovn_lb_vip *lb_vip, ++ uint8_t lb_proto, + struct ovn_desired_flow_table *flow_table) + { + uint64_t stub[1024 / 8]; +@@ -1277,10 +1282,16 @@ add_lb_ct_snat_vip_flows(struct ovn_controller_lb *lb, + + if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { + nat->range_af = AF_INET; +- nat->range.addr.ipv4.min = in6_addr_get_mapped_ipv4(&lb_vip->vip); ++ nat->range.addr.ipv4.min = ++ lb->hairpin_snat_ips.n_ipv4_addrs ++ ? lb->hairpin_snat_ips.ipv4_addrs[0].addr ++ : in6_addr_get_mapped_ipv4(&lb_vip->vip); + } else { + nat->range_af = AF_INET6; +- nat->range.addr.ipv6.min = lb_vip->vip; ++ nat->range.addr.ipv6.min ++ = lb->hairpin_snat_ips.n_ipv6_addrs ++ ? lb->hairpin_snat_ips.ipv6_addrs[0].addr ++ : lb_vip->vip; + } + ofpacts.header = ofpbuf_push_uninit(&ofpacts, nat_offset); + ofpact_finish(&ofpacts, &ct->ofpact); +@@ -1288,12 +1299,16 @@ add_lb_ct_snat_vip_flows(struct ovn_controller_lb *lb, + struct match match = MATCH_CATCHALL_INITIALIZER; + if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { + match_set_dl_type(&match, htons(ETH_TYPE_IP)); +- match_set_ct_nw_dst(&match, nat->range.addr.ipv4.min); ++ match_set_ct_nw_dst(&match, in6_addr_get_mapped_ipv4(&lb_vip->vip)); + } else { + match_set_dl_type(&match, htons(ETH_TYPE_IPV6)); + match_set_ct_ipv6_dst(&match, &lb_vip->vip); + } + ++ match_set_nw_proto(&match, lb_proto); ++ match_set_ct_nw_proto(&match, lb_proto); ++ match_set_ct_tp_dst(&match, htons(lb_vip->vip_port)); ++ + uint32_t ct_state = OVS_CS_F_TRACKED | OVS_CS_F_DST_NAT; + match_set_ct_state_masked(&match, ct_state, ct_state); + +@@ -1349,7 +1364,7 @@ consider_lb_hairpin_flows(const struct sbrec_load_balancer *sbrec_lb, + flow_table); + } + +- add_lb_ct_snat_vip_flows(lb, lb_vip, flow_table); ++ add_lb_ct_snat_vip_flows(lb, lb_vip, lb_proto, flow_table); + } + + ovn_controller_lb_destroy(lb); +diff --git a/lib/lb.c b/lib/lb.c +index 2517c02..e11ac00 100644 +--- a/lib/lb.c ++++ b/lib/lb.c +@@ -170,6 +170,24 @@ void ovn_northd_lb_vip_destroy(struct ovn_northd_lb_vip *vip) + free(vip->backends_nb); + } + ++static void ++ovn_lb_get_hairpin_snat_ip(const struct uuid *lb_uuid, ++ const struct smap *lb_options, ++ struct lport_addresses *hairpin_addrs) ++{ ++ const char *addresses = smap_get(lb_options, "hairpin_snat_ip"); ++ ++ if (!addresses) { ++ return; ++ } ++ ++ if (!extract_ip_address(addresses, hairpin_addrs)) { ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); ++ VLOG_WARN_RL(&rl, "bad hairpin_snat_ip %s in load balancer "UUID_FMT, ++ addresses, UUID_ARGS(lb_uuid)); ++ } ++} ++ + struct ovn_northd_lb * + ovn_northd_lb_create(const struct nbrec_load_balancer *nbrec_lb, + struct hmap *ports, +@@ -224,6 +242,9 @@ ovn_northd_lb_create(const struct nbrec_load_balancer *nbrec_lb, + ds_chomp(&sel_fields, ','); + lb->selection_fields = ds_steal_cstr(&sel_fields); + } ++ ++ ovn_lb_get_hairpin_snat_ip(&nbrec_lb->header_.uuid, &nbrec_lb->options, ++ &lb->hairpin_snat_ips); + return lb; + } + +@@ -260,6 +281,7 @@ ovn_northd_lb_destroy(struct ovn_northd_lb *lb) + free(lb->vips); + free(lb->vips_nb); + free(lb->selection_fields); ++ destroy_lport_addresses(&lb->hairpin_snat_ips); + free(lb->dps); + free(lb); + } +@@ -289,6 +311,9 @@ ovn_controller_lb_create(const struct sbrec_load_balancer *sbrec_lb) + * correct value. + */ + lb->n_vips = n_vips; ++ ++ ovn_lb_get_hairpin_snat_ip(&sbrec_lb->header_.uuid, &sbrec_lb->options, ++ &lb->hairpin_snat_ips); + return lb; + } + +@@ -299,5 +324,6 @@ ovn_controller_lb_destroy(struct ovn_controller_lb *lb) + ovn_lb_vip_destroy(&lb->vips[i]); + } + free(lb->vips); ++ destroy_lport_addresses(&lb->hairpin_snat_ips); + free(lb); + } +diff --git a/lib/lb.h b/lib/lb.h +index 42c580b..dfce51c 100644 +--- a/lib/lb.h ++++ b/lib/lb.h +@@ -20,6 +20,7 @@ + #include + #include + #include "openvswitch/hmap.h" ++#include "ovn-util.h" + + struct nbrec_load_balancer; + struct sbrec_load_balancer; +@@ -37,6 +38,11 @@ struct ovn_northd_lb { + struct ovn_northd_lb_vip *vips_nb; + size_t n_vips; + ++ struct lport_addresses hairpin_snat_ips; /* IP (v4 and/or v6) to be used ++ * as source for hairpinned ++ * traffic. ++ */ ++ + size_t n_dps; + size_t n_allocated_dps; + const struct sbrec_datapath_binding **dps; +@@ -89,6 +95,11 @@ struct ovn_controller_lb { + + struct ovn_lb_vip *vips; + size_t n_vips; ++ ++ struct lport_addresses hairpin_snat_ips; /* IP (v4 and/or v6) to be used ++ * as source for hairpinned ++ * traffic. ++ */ + }; + + struct ovn_controller_lb *ovn_controller_lb_create( +diff --git a/lib/ovn-util.c b/lib/ovn-util.c +index 2136f90..b647106 100644 +--- a/lib/ovn-util.c ++++ b/lib/ovn-util.c +@@ -232,6 +232,27 @@ extract_ip_addresses(const char *address, struct lport_addresses *laddrs) + return false; + } + ++/* Extracts at most one IPv4 and at most one IPv6 address from 'address' ++ * which should be of the format 'IP1 [IP2]'. ++ * ++ * Return true if at most one IPv4 address and at most one IPv6 address ++ * is found in 'address'. IPs must be host IPs, i.e., no unmasked bits. ++ * ++ * The caller must call destroy_lport_addresses(). ++ */ ++bool extract_ip_address(const char *address, struct lport_addresses *laddrs) ++{ ++ if (!extract_ip_addresses(address, laddrs) || ++ laddrs->n_ipv4_addrs > 1 || ++ laddrs->n_ipv6_addrs > 1 || ++ (laddrs->n_ipv4_addrs && laddrs->ipv4_addrs[0].plen != 32) || ++ (laddrs->n_ipv6_addrs && laddrs->ipv6_addrs[0].plen != 128)) { ++ destroy_lport_addresses(laddrs); ++ return false; ++ } ++ return true; ++} ++ + /* Extracts the mac, IPv4 and IPv6 addresses from the + * "nbrec_logical_router_port" parameter 'lrp'. Stores the IPv4 and + * IPv6 addresses in the 'ipv4_addrs' and 'ipv6_addrs' fields of +diff --git a/lib/ovn-util.h b/lib/ovn-util.h +index 679f47a..a711363 100644 +--- a/lib/ovn-util.h ++++ b/lib/ovn-util.h +@@ -72,6 +72,7 @@ bool extract_addresses(const char *address, struct lport_addresses *, + int *ofs); + bool extract_lsp_addresses(const char *address, struct lport_addresses *); + bool extract_ip_addresses(const char *address, struct lport_addresses *); ++bool extract_ip_address(const char *address, struct lport_addresses *); + bool extract_lrp_networks(const struct nbrec_logical_router_port *, + struct lport_addresses *); + bool extract_sbrec_binding_first_mac(const struct sbrec_port_binding *binding, +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 62d45f9..d9bcd6f 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -3606,6 +3606,7 @@ build_ovn_lbs(struct northd_context *ctx, struct hmap *datapaths, + sbrec_load_balancer_set_name(lb->slb, lb->nlb->name); + sbrec_load_balancer_set_vips(lb->slb, &lb->nlb->vips); + sbrec_load_balancer_set_protocol(lb->slb, lb->nlb->protocol); ++ sbrec_load_balancer_set_options(lb->slb, &lb->nlb->options); + sbrec_load_balancer_set_datapaths( + lb->slb, (struct sbrec_datapath_binding **)lb->dps, + lb->n_dps); +@@ -8593,15 +8594,10 @@ get_force_snat_ip(struct ovn_datapath *od, const char *key_type, + return false; + } + +- if (!extract_ip_addresses(addresses, laddrs) || +- laddrs->n_ipv4_addrs > 1 || +- laddrs->n_ipv6_addrs > 1 || +- (laddrs->n_ipv4_addrs && laddrs->ipv4_addrs[0].plen != 32) || +- (laddrs->n_ipv6_addrs && laddrs->ipv6_addrs[0].plen != 128)) { ++ if (!extract_ip_address(addresses, laddrs)) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"", + addresses, UUID_ARGS(&od->key)); +- destroy_lport_addresses(laddrs); + return false; + } + +@@ -13852,6 +13848,7 @@ main(int argc, char *argv[]) + add_column_noalert(ovnsb_idl_loop.idl, &sbrec_load_balancer_col_name); + add_column_noalert(ovnsb_idl_loop.idl, &sbrec_load_balancer_col_vips); + add_column_noalert(ovnsb_idl_loop.idl, &sbrec_load_balancer_col_protocol); ++ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_load_balancer_col_options); + add_column_noalert(ovnsb_idl_loop.idl, + &sbrec_load_balancer_col_external_ids); + +diff --git a/ovn-nb.xml b/ovn-nb.xml +index 105d869..86aa438 100644 +--- a/ovn-nb.xml ++++ b/ovn-nb.xml +@@ -1644,6 +1644,14 @@ + Please note using --reject option will disable empty_lb + SB controller event for this load balancer. + ++ ++ ++ IP to be used as source IP for packets that have been hair-pinned ++ after load balancing. The default behavior when the option is not set ++ is to use the load balancer VIP as source IP. This option may have ++ exactly one IPv4 and/or one IPv6 address on it, separated by a space ++ character. ++ + + + +diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema +index b418434..0d20f08 100644 +--- a/ovn-sb.ovsschema ++++ b/ovn-sb.ovsschema +@@ -1,7 +1,7 @@ + { + "name": "OVN_Southbound", +- "version": "20.14.0", +- "cksum": "1412040198 25748", ++ "version": "20.15.0", ++ "cksum": "539683023 25965", + "tables": { + "SB_Global": { + "columns": { +@@ -482,6 +482,11 @@ + "type": {"key": {"type": "uuid", + "refTable": "Datapath_Binding"}, + "min": 0, "max": "unlimited"}}, ++ "options": { ++ "type": {"key": "string", ++ "value": "string", ++ "min": 0, ++ "max": "unlimited"}}, + "external_ids": { + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}}, +diff --git a/ovn-sb.xml b/ovn-sb.xml +index 980a096..2f251bd 100644 +--- a/ovn-sb.xml ++++ b/ovn-sb.xml +@@ -4238,6 +4238,14 @@ tcp.flags = RST; + Datapaths to which this load balancer applies to. + + ++ ++ ++ IP to be used as source IP for packets that have been hair-pinned after ++ load balancing. This value is automatically populated by ++ ovn-northd. ++ ++ ++ + + + See External IDs at the beginning of this document. +diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at +index d52aeed..c02c5d7 100644 +--- a/tests/ovn-northd.at ++++ b/tests/ovn-northd.at +@@ -2131,6 +2131,12 @@ echo + echo "__file__:__line__: check that datapath sw1 has lb0 and lb1 set in the load_balancers column." + check_column "$lb0_uuid $lb1_uuid" sb:datapath_binding load_balancers external_ids:name=sw1 + ++ ++echo ++echo "__file__:__line__: Set hairpin_snat_ip on lb1 and check that SB DB is updated." ++check ovn-nbctl --wait=sb set Load_Balancer lb1 options:hairpin_snat_ip="42.42.42.42 4242::4242" ++check_column "$lb1_uuid" sb:load_balancer _uuid name=lb1 options='{hairpin_snat_ip="42.42.42.42 4242::4242"}' ++ + echo + echo "__file__:__line__: Delete load balancer lb1 an check that datapath sw1's load_balancers are updated accordingly." + +diff --git a/tests/ovn.at b/tests/ovn.at +index 9f2e152..14072ec 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -20742,8 +20742,9 @@ build_tcp_syn() { + + send_ipv4_pkt() { + local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 +- local ip_src=$5 ip_dst=$6 ip_proto=$7 ip_len=$8 ip_chksum=$9 +- local l4_payload=${10} ++ local ip_src=$5 ip_dst=$6 ip_proto=$7 ip_len=$8 ++ local l4_payload=$9 ++ local hp_ip_src=${10} + local hp_l4_payload=${11} + local outfile=${12} + +@@ -20751,8 +20752,10 @@ send_ipv4_pkt() { + + local eth=${eth_dst}${eth_src}0800 + local hp_eth=${eth_src}${eth_dst}0800 +- local ip=4500${ip_len}00004000${ip_ttl}${ip_proto}${ip_chksum}${ip_src}${ip_dst} +- local hp_ip=4500${ip_len}00004000${ip_ttl}${ip_proto}${ip_chksum}${ip_dst}${ip_src} ++ local ip=4500${ip_len}00004000${ip_ttl}${ip_proto}0000${ip_src}${ip_dst} ++ ip=$(ip4_csum_inplace $ip) ++ local hp_ip=4500${ip_len}00004000${ip_ttl}${ip_proto}0000${hp_ip_src}${ip_src} ++ hp_ip=$(ip4_csum_inplace ${hp_ip}) + local packet=${eth}${ip}${l4_payload} + local hp_packet=${hp_eth}${hp_ip}${hp_l4_payload} + +@@ -20764,15 +20767,16 @@ send_ipv6_pkt() { + local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 + local ip_src=$5 ip_dst=$6 ip_proto=$7 ip_len=$8 + local l4_payload=$9 +- local hp_l4_payload=${10} +- local outfile=${11} ++ local hp_ip_src=${10} ++ local hp_l4_payload=${11} ++ local outfile=${12} + + local ip_ttl=40 + + local eth=${eth_dst}${eth_src}86dd + local hp_eth=${eth_src}${eth_dst}86dd + local ip=60000000${ip_len}${ip_proto}${ip_ttl}${ip_src}${ip_dst} +- local hp_ip=60000000${ip_len}${ip_proto}${ip_ttl}${ip_dst}${ip_src} ++ local hp_ip=60000000${ip_len}${ip_proto}${ip_ttl}${hp_ip_src}${ip_src} + local packet=${eth}${ip}${l4_payload} + local hp_packet=${hp_eth}${hp_ip}${hp_l4_payload} + +@@ -20814,18 +20818,35 @@ ovn-nbctl lsp-add sw sw-rtr \ + + ovn-nbctl --wait=hv sync + +-# Inject IPv4 TCP packet from lsp. ++ovn-sbctl dump-flows > sbflows ++AT_CAPTURE_FILE([sbflows]) + > expected ++ ++# Inject IPv4 TCP packet from lsp. + tcp_payload=$(build_tcp_syn 84d0 1f90 05a7) + hp_tcp_payload=$(build_tcp_syn 84d0 0fc9 156e) + send_ipv4_pkt hv1 hv1-vif1 000000000001 000000000100 \ + $(ip_to_hex 42 42 42 1) $(ip_to_hex 88 88 88 88) \ +- 06 0028 35f5 \ +- ${tcp_payload} ${hp_tcp_payload} \ ++ 06 0028 \ ++ ${tcp_payload} \ ++ $(ip_to_hex 88 88 88 88) ${hp_tcp_payload} \ + expected + +-ovn-sbctl dump-flows > sbflows +-AT_CAPTURE_FILE([sbflows]) ++# Check that traffic is hairpinned. ++OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) ++ ++# Change LB Hairpin SNAT IP. ++# Also flush conntrack to avoid reusing an existing entry. ++as hv1 ovs-appctl dpctl/flush-conntrack ++ovn-nbctl --wait=hv set load_balancer lb-ipv4-tcp options:hairpin_snat_ip="88.88.88.87" ++# Inject IPv4 TCP packet from lsp. ++hp_tcp_payload=$(build_tcp_syn 84d0 0fc9 156f) ++send_ipv4_pkt hv1 hv1-vif1 000000000001 000000000100 \ ++ $(ip_to_hex 42 42 42 1) $(ip_to_hex 88 88 88 88) \ ++ 06 0028 \ ++ ${tcp_payload} \ ++ $(ip_to_hex 88 88 88 87) ${hp_tcp_payload} \ ++ expected + + # Check that traffic is hairpinned. + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) +@@ -20835,8 +20856,25 @@ udp_payload=$(build_udp 84d0 0fc8 6666) + hp_udp_payload=$(build_udp 84d0 07e5 6e49) + send_ipv4_pkt hv1 hv1-vif1 000000000001 000000000100 \ + $(ip_to_hex 42 42 42 1) $(ip_to_hex 88 88 88 88) \ +- 11 001e 35f4 \ +- ${udp_payload} ${hp_udp_payload} \ ++ 11 001e \ ++ ${udp_payload} \ ++ $(ip_to_hex 88 88 88 88) ${hp_udp_payload} \ ++ expected ++ ++# Check that traffic is hairpinned. ++OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) ++ ++# Change LB Hairpin SNAT IP. ++# Also flush conntrack to avoid reusing an existing entry. ++as hv1 ovs-appctl dpctl/flush-conntrack ++ovn-nbctl --wait=hv set load_balancer lb-ipv4-udp options:hairpin_snat_ip="88.88.88.87" ++# Inject IPv4 UDP packet from lsp. ++hp_udp_payload=$(build_udp 84d0 07e5 6e4a) ++send_ipv4_pkt hv1 hv1-vif1 000000000001 000000000100 \ ++ $(ip_to_hex 42 42 42 1) $(ip_to_hex 88 88 88 88) \ ++ 11 001e \ ++ ${udp_payload} \ ++ $(ip_to_hex 88 88 88 87) ${hp_udp_payload} \ + expected + + # Check that traffic is hairpinned. +@@ -20848,7 +20886,25 @@ hp_tcp_payload=$(build_tcp_syn 84d0 0fc9 4fc0) + send_ipv6_pkt hv1 hv1-vif1 000000000001 000000000100 \ + 42000000000000000000000000000001 88000000000000000000000000000088 \ + 06 0014 \ +- ${tcp_payload} ${hp_tcp_payload} \ ++ ${tcp_payload} \ ++ 88000000000000000000000000000088 ${hp_tcp_payload} \ ++ expected ++ ++# Check that traffic is hairpinned. ++OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) ++ ++# Change LB Hairpin SNAT IP. ++# Also flush conntrack to avoid reusing an existing entry. ++as hv1 ovs-appctl dpctl/flush-conntrack ++ovn-nbctl --wait=hv set load_balancer lb-ipv6-tcp options:hairpin_snat_ip="8800::0087" ++ ++# Inject IPv6 TCP packet from lsp. ++hp_tcp_payload=$(build_tcp_syn 84d0 0fc9 4fc1) ++send_ipv6_pkt hv1 hv1-vif1 000000000001 000000000100 \ ++ 42000000000000000000000000000001 88000000000000000000000000000088 \ ++ 06 0014 \ ++ ${tcp_payload} \ ++ 88000000000000000000000000000087 ${hp_tcp_payload} \ + expected + + # Check that traffic is hairpinned. +@@ -20860,12 +20916,27 @@ hp_udp_payload=$(build_udp 84d0 07e5 a89b) + send_ipv6_pkt hv1 hv1-vif1 000000000001 000000000100 \ + 42000000000000000000000000000001 88000000000000000000000000000088 \ + 11 000a \ +- ${udp_payload} ${hp_udp_payload} \ ++ ${udp_payload} \ ++ 88000000000000000000000000000088 ${hp_udp_payload} \ + expected + +-# Check that traffic is hairpinned. ++Check that traffic is hairpinned. + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) + ++# Change LB Hairpin SNAT IP. ++# Also flush conntrack to avoid reusing an existing entry. ++as hv1 ovs-appctl dpctl/flush-conntrack ++ovn-nbctl --wait=hv set load_balancer lb-ipv6-udp options:hairpin_snat_ip="8800::0087" ++ ++# Inject IPv6 UDP packet from lsp. ++hp_udp_payload=$(build_udp 84d0 07e5 a89b) ++send_ipv6_pkt hv1 hv1-vif1 000000000001 000000000100 \ ++ 42000000000000000000000000000001 88000000000000000000000000000088 \ ++ 11 000a \ ++ ${udp_payload} \ ++ 88000000000000000000000000000087 ${hp_udp_payload} \ ++ expected ++ + OVN_CLEANUP([hv1]) + AT_CLEANUP + +@@ -23156,7 +23227,7 @@ priority=100,tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=4041 a + ]) + + AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8-], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) + ]) + + AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68], [0], [dnl +@@ -23190,8 +23261,8 @@ priority=100,tcp,metadata=0x1,nw_src=52.52.52.52,nw_dst=88.88.88.90,tp_src=4042 + ]) + + AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8-], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + + AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68], [0], [dnl +@@ -23227,8 +23298,8 @@ priority=100,tcp,metadata=0x1,nw_src=52.52.52.52,nw_dst=88.88.88.90,tp_src=4042 + ]) + + AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8-], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + + check ovn-nbctl --wait=hv ls-lb-add sw0 lb-ipv4-udp +@@ -23256,8 +23327,9 @@ priority=100,udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=2021 a + ]) + + AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + + AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +@@ -23275,8 +23347,9 @@ priority=100,udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=2021 a + ]) + + AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + + check ovn-nbctl --wait=hv ls-lb-add sw0 lb-ipv6-tcp +@@ -23306,9 +23379,10 @@ priority=100,udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=2021 a + ]) + + AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ipv6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + + AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +@@ -23328,9 +23402,10 @@ priority=100,udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=2021 a + ]) + + AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ipv6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + + check ovn-nbctl --wait=hv ls-lb-add sw0 lb-ipv6-udp +@@ -23362,9 +23437,11 @@ priority=100,udp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=2021 ac + ]) + + AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ipv6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + + AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +@@ -23386,9 +23463,11 @@ priority=100,udp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=2021 ac + ]) + + AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ipv6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + + check ovn-nbctl --wait=hv ls-lb-add sw1 lb-ipv6-udp +@@ -23423,10 +23502,12 @@ priority=100,udp6,metadata=0x2,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=2021 ac + ]) + + AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ipv6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ipv6,metadata=0x2 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x2 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + + AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +@@ -23449,10 +23530,12 @@ priority=100,udp6,metadata=0x2,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=2021 ac + ]) + + AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ipv6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ipv6,metadata=0x2 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x2 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + + as hv2 ovs-vsctl del-port hv2-vif1 +@@ -23501,9 +23584,10 @@ priority=100,udp6,metadata=0x2,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=2021 ac + ]) + + AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ipv6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ipv6,metadata=0x2 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ip,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x2 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) + ]) + + check ovn-nbctl --wait=hv ls-del sw0 +-- +1.8.3.1 + diff --git a/SOURCES/0001-bfd-introduce-IPv6-support.patch b/SOURCES/0001-bfd-introduce-IPv6-support.patch new file mode 100644 index 0000000..f219ccd --- /dev/null +++ b/SOURCES/0001-bfd-introduce-IPv6-support.patch @@ -0,0 +1,261 @@ +From 9f42e93b6a25bff87074156586505a6e8968f8cb Mon Sep 17 00:00:00 2001 +Message-Id: <9f42e93b6a25bff87074156586505a6e8968f8cb.1610538323.git.lorenzo.bianconi@redhat.com> +From: Lorenzo Bianconi +Date: Tue, 12 Jan 2021 13:10:56 +0100 +Subject: [PATCH] bfd: introduce IPv6 support + +Introduce IPv6 support to ovn controller BFD implementation + +Signed-off-by: Lorenzo Bianconi +Acked-by: Mark Michelson +Signed-off-by: Numan Siddique +--- + NEWS | 2 +- + controller/pinctrl.c | 112 ++++++++++++++++++++++++++++--------------- + tests/system-ovn.at | 16 ++++++- + 3 files changed, 89 insertions(+), 41 deletions(-) + +--- a/NEWS ++++ b/NEWS +@@ -2,7 +2,7 @@ Post-v20.12.0 + ------------------------- + - Support ECMP multiple nexthops for reroute router policies. + - BFD protocol support according to RFC880 [0]. Introduce next-hop BFD +- availability check for OVN static routes. IPv6 is not suported yet. ++ availability check for OVN static routes. + [0] https://tools.ietf.org/html/rfc5880) + + OVN v20.12.0 - 18 Dec 2020 +--- a/controller/pinctrl.c ++++ b/controller/pinctrl.c +@@ -6393,10 +6393,10 @@ struct bfd_entry { + + /* L2 source address */ + struct eth_addr src_mac; +- /* IPv4 source address */ +- ovs_be32 ip_src; +- /* IPv4 destination address */ +- ovs_be32 ip_dst; ++ /* IP source address */ ++ struct in6_addr ip_src; ++ /* IP destination address */ ++ struct in6_addr ip_dst; + /* RFC 5881 section 4 + * The source port MUST be in the range 49152 through 65535. + * The same UDP source port number MUST be used for all BFD +@@ -6458,20 +6458,17 @@ pinctrl_find_bfd_monitor_entry_by_port(c + } + + static struct bfd_entry * +-pinctrl_find_bfd_monitor_entry_by_disc(ovs_be32 ip, ovs_be32 disc) ++pinctrl_find_bfd_monitor_entry_by_disc(char *ip, ovs_be32 disc) + { +- char *ip_src = xasprintf(IP_FMT, IP_ARGS(ip)); + struct bfd_entry *ret = NULL, *entry; + +- HMAP_FOR_EACH_WITH_HASH (entry, node, hash_string(ip_src, 0), ++ HMAP_FOR_EACH_WITH_HASH (entry, node, hash_string(ip, 0), + &bfd_monitor_map) { + if (entry->local_disc == disc) { + ret = entry; + break; + } + } +- +- free(ip_src); + return ret; + } + +@@ -6501,33 +6498,28 @@ static void + bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet, + bool final) + { +- struct udp_header *udp; +- struct bfd_msg *msg; ++ int payload_len = sizeof(struct udp_header) + sizeof(struct bfd_msg); + + /* Properly align after the ethernet header */ + dp_packet_reserve(packet, 2); +- struct eth_header *eth = dp_packet_put_uninit(packet, sizeof *eth); +- eth->eth_dst = eth_addr_broadcast; +- eth->eth_src = entry->src_mac; +- eth->eth_type = htons(ETH_TYPE_IP); +- +- struct ip_header *ip = dp_packet_put_zeros(packet, sizeof *ip); +- ip->ip_ihl_ver = IP_IHL_VER(5, 4); +- ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg); +- ip->ip_ttl = MAXTTL; +- ip->ip_tos = IPTOS_PREC_INTERNETCONTROL; +- ip->ip_proto = IPPROTO_UDP; +- put_16aligned_be32(&ip->ip_src, entry->ip_src); +- put_16aligned_be32(&ip->ip_dst, entry->ip_dst); +- /* Checksum has already been zeroed by put_zeros call. */ +- ip->ip_csum = csum(ip, sizeof *ip); ++ if (IN6_IS_ADDR_V4MAPPED(&entry->ip_src)) { ++ ovs_be32 ip_src = in6_addr_get_mapped_ipv4(&entry->ip_src); ++ ovs_be32 ip_dst = in6_addr_get_mapped_ipv4(&entry->ip_dst); ++ pinctrl_compose_ipv4(packet, entry->src_mac, eth_addr_broadcast, ++ ip_src, ip_dst, IPPROTO_UDP, MAXTTL, payload_len); ++ } else { ++ pinctrl_compose_ipv6(packet, entry->src_mac, eth_addr_broadcast, ++ &entry->ip_src, &entry->ip_dst, IPPROTO_UDP, ++ MAXTTL, payload_len); ++ } + +- udp = dp_packet_put_zeros(packet, sizeof *udp); ++ struct udp_header *udp = dp_packet_put_zeros(packet, sizeof *udp); ++ udp->udp_len = htons(payload_len); ++ udp->udp_csum = 0; + udp->udp_src = htons(entry->udp_src); + udp->udp_dst = htons(BFD_DEST_PORT); +- udp->udp_len = htons(sizeof *udp + sizeof *msg); + +- msg = dp_packet_put_zeros(packet, sizeof *msg); ++ struct bfd_msg *msg = dp_packet_put_zeros(packet, sizeof *msg); + msg->vers_diag = (BFD_VERSION << 5); + msg->mult = entry->local_mult; + msg->length = BFD_PACKET_LEN; +@@ -6538,6 +6530,17 @@ bfd_monitor_put_bfd_msg(struct bfd_entry + /* min_tx and min_rx are in us - RFC 5880 page 9 */ + msg->min_tx = htonl(entry->local_min_tx * 1000); + msg->min_rx = htonl(entry->local_min_rx * 1000); ++ ++ if (!IN6_IS_ADDR_V4MAPPED(&entry->ip_src)) { ++ /* IPv6 needs UDP checksum calculated */ ++ uint32_t csum = packet_csum_pseudoheader6(dp_packet_l3(packet)); ++ int len = (uint8_t *)udp - (uint8_t *)dp_packet_eth(packet); ++ csum = csum_continue(csum, udp, dp_packet_size(packet) - len); ++ udp->udp_csum = csum_finish(csum); ++ if (!udp->udp_csum) { ++ udp->udp_csum = htons(0xffff); ++ } ++ } + } + + static void +@@ -6736,9 +6739,18 @@ pinctrl_handle_bfd_msg(struct rconn *swc + return; + } + ++ char *ip_src; ++ if (ip_flow->dl_type == htons(ETH_TYPE_IP)) { ++ ip_src = normalize_ipv4_prefix(ip_flow->nw_src, 32); ++ } else { ++ ip_src = normalize_ipv6_prefix(&ip_flow->ipv6_src, 128); ++ } ++ + const struct bfd_msg *msg = dp_packet_get_udp_payload(pkt_in); +- struct bfd_entry *entry = pinctrl_find_bfd_monitor_entry_by_disc( +- ip_flow->nw_src, msg->your_disc); ++ struct bfd_entry *entry = ++ pinctrl_find_bfd_monitor_entry_by_disc(ip_src, msg->your_disc); ++ free(ip_src); ++ + if (!entry) { + return; + } +@@ -6821,10 +6833,21 @@ static void + bfd_monitor_check_sb_conf(const struct sbrec_bfd *sb_bt, + struct bfd_entry *entry) + { +- ovs_be32 ip_dst; ++ struct lport_addresses dst_addr; ++ ++ if (extract_ip_addresses(sb_bt->dst_ip, &dst_addr)) { ++ struct in6_addr addr; ++ ++ if (dst_addr.n_ipv6_addrs > 0) { ++ addr = dst_addr.ipv6_addrs[0].addr; ++ } else { ++ addr = in6_addr_mapped_ipv4(dst_addr.ipv4_addrs[0].addr); ++ } + +- if (ip_parse(sb_bt->dst_ip, &ip_dst) && ip_dst != entry->ip_dst) { +- entry->ip_dst = ip_dst; ++ if (!ipv6_addr_equals(&addr, &entry->ip_dst)) { ++ entry->ip_dst = addr; ++ } ++ destroy_lport_addresses(&dst_addr); + } + + if (sb_bt->min_tx != entry->local_min_tx) { +@@ -6889,11 +6912,15 @@ bfd_monitor_run(struct ovsdb_idl_txn *ov + entry = pinctrl_find_bfd_monitor_entry_by_port( + bt->dst_ip, bt->src_port); + if (!entry) { +- ovs_be32 ip_dst, ip_src = htonl(BFD_DEFAULT_SRC_IP); + struct eth_addr ea = eth_addr_zero; ++ struct lport_addresses dst_addr; ++ struct in6_addr ip_src, ip_dst; + int i; + +- if (!ip_parse(bt->dst_ip, &ip_dst)) { ++ ip_dst = in6_addr_mapped_ipv4(htonl(BFD_DEFAULT_DST_IP)); ++ ip_src = in6_addr_mapped_ipv4(htonl(BFD_DEFAULT_SRC_IP)); ++ ++ if (!extract_ip_addresses(bt->dst_ip, &dst_addr)) { + continue; + } + +@@ -6905,13 +6932,20 @@ bfd_monitor_run(struct ovsdb_idl_txn *ov + } + + ea = laddrs.ea; +- if (laddrs.n_ipv4_addrs > 0) { +- ip_src = laddrs.ipv4_addrs[0].addr; ++ if (dst_addr.n_ipv6_addrs > 0 && laddrs.n_ipv6_addrs > 0) { ++ ip_dst = dst_addr.ipv6_addrs[0].addr; ++ ip_src = laddrs.ipv6_addrs[0].addr; ++ destroy_lport_addresses(&laddrs); ++ break; ++ } else if (laddrs.n_ipv4_addrs > 0) { ++ ip_dst = in6_addr_mapped_ipv4(dst_addr.ipv4_addrs[0].addr); ++ ip_src = in6_addr_mapped_ipv4(laddrs.ipv4_addrs[0].addr); + destroy_lport_addresses(&laddrs); + break; + } + destroy_lport_addresses(&laddrs); + } ++ destroy_lport_addresses(&dst_addr); + + if (eth_addr_is_zero(ea)) { + continue; +--- a/tests/system-ovn.at ++++ b/tests/system-ovn.at +@@ -5563,7 +5563,7 @@ check ovn-nbctl ls-add public + + check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24 + check ovn-nbctl lrp-add R1 rp-sw1 00:00:03:01:02:03 192.168.2.1/24 +-check ovn-nbctl lrp-add R1 rp-public 00:00:02:01:02:03 172.16.1.1/24 \ ++check ovn-nbctl lrp-add R1 rp-public 00:00:02:01:02:03 172.16.1.1/24 1000::a/64 \ + -- lrp-set-gateway-chassis rp-public hv1 + + check ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \ +@@ -5593,6 +5593,7 @@ ADD_NAMESPACES(server) + NS_CHECK_EXEC([server], [ip link set dev lo up]) + ADD_VETH(s1, server, br-ext, "172.16.1.50/24", "f0:00:00:01:02:05", \ + "172.16.1.1") ++NS_CHECK_EXEC([server], [ip addr add 1000::b/64 dev s1]) + + AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext]) + check ovn-nbctl lsp-add public public1 \ +@@ -5652,6 +5653,19 @@ sleep 5 + kill $(pidof tcpdump) + AT_CHECK([grep -qi bfd bfd.pcap],[1]) + ++uuid_v6=$(ovn-nbctl create bfd logical_port=rp-public dst_ip=\"1000::b\") ++check ovn-nbctl lr-route-add R1 2000::/64 1000::b ++route_uuid_v6=$(fetch_column nb:logical_router_static_route _uuid ip_prefix=\"2000::/64\") ++ovn-nbctl set logical_router_static_route $route_uuid_v6 bfd=$uuid_v6 ++check ovn-nbctl --wait=hv sync ++NS_CHECK_EXEC([server], [bfdd-beacon --listen=1000::b], [0]) ++NS_CHECK_EXEC([server], [bfdd-control allow 1000::a], [0], [dnl ++Allowing connections from 1000::a ++]) ++ ++wait_column "up" nb:bfd status logical_port=rp-public ++ovn-nbctl destroy bfd $uuid_v6 ++ + kill $(pidof ovn-controller) + + as ovn-sb diff --git a/SOURCES/0001-binding-Correctly-set-Port_Binding.up-for-container-.patch b/SOURCES/0001-binding-Correctly-set-Port_Binding.up-for-container-.patch new file mode 100644 index 0000000..ccf1785 --- /dev/null +++ b/SOURCES/0001-binding-Correctly-set-Port_Binding.up-for-container-.patch @@ -0,0 +1,280 @@ +From 36a57e7a388277d1e45f0cadd8e2601490a76b2d Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Wed, 3 Feb 2021 20:36:30 +0100 +Subject: [PATCH 1/4] binding: Correctly set Port_Binding.up for + container/virtual ports. + +For port bindings that are children of other port bindings (container +and virtual ports) set Port_Binding.up directly, when claimed, if their +parent bindings are already up. + +For non-VIF port bindings maintain compatibility with older versions +and set Port_Binding.up as soon as they are claimed. + +Reported-by: Ben Pfaff +Fixes: 4d3cb42b076b ("binding: Set Logical_Switch_Port.up when all OVS flows are installed.") +Signed-off-by: Dumitru Ceara +Signed-off-by: Numan Siddique +(cherry picked from upstream commit aae25e67b1ba86e34d670eb808de86f7ba66c3c0) + +Conflicts: + tests/ovn.at + +Change-Id: I17c5a4d0f89868190c81c7221caadf29959064b1 +--- + controller-vtep/binding.c | 3 +++ + controller/binding.c | 47 +++++++++++++++++++++++++++++++++++++++++------ + tests/ovn.at | 32 ++++++++++++++++++++++++++------ + 3 files changed, 70 insertions(+), 12 deletions(-) + +diff --git a/controller-vtep/binding.c b/controller-vtep/binding.c +index 8337715..d28a598 100644 +--- a/controller-vtep/binding.c ++++ b/controller-vtep/binding.c +@@ -109,7 +109,10 @@ update_pb_chassis(const struct sbrec_port_binding *port_binding_rec, + port_binding_rec->chassis->name, + chassis_rec->name); + } ++ ++ bool up = true; + sbrec_port_binding_set_chassis(port_binding_rec, chassis_rec); ++ sbrec_port_binding_set_up(port_binding_rec, &up, 1); + } + } + +diff --git a/controller/binding.c b/controller/binding.c +index d44f635..353debe 100644 +--- a/controller/binding.c ++++ b/controller/binding.c +@@ -864,21 +864,52 @@ get_lport_type(const struct sbrec_port_binding *pb) + return LP_UNKNOWN; + } + ++/* For newly claimed ports, if 'notify_up' is 'false': ++ * - set the 'pb.up' field to true if 'pb' has no 'parent_pb'. ++ * - set the 'pb.up' field to true if 'parent_pb.up' is 'true' (e.g., for ++ * container and virtual ports). ++ * Otherwise request a notification to be sent when the OVS flows ++ * corresponding to 'pb' have been installed. ++ */ ++static void ++claimed_lport_set_up(const struct sbrec_port_binding *pb, ++ const struct sbrec_port_binding *parent_pb, ++ const struct sbrec_chassis *chassis_rec, ++ bool notify_up) ++{ ++ if (!notify_up) { ++ bool up = true; ++ if (!parent_pb || (parent_pb->n_up && parent_pb->up[0])) { ++ sbrec_port_binding_set_up(pb, &up, 1); ++ } ++ return; ++ } ++ ++ if (pb->chassis != chassis_rec) { ++ binding_iface_bound_add(pb->logical_port); ++ } ++} ++ + /* Returns false if lport is not claimed due to 'sb_readonly'. + * Returns true otherwise. + */ + static bool + claim_lport(const struct sbrec_port_binding *pb, ++ const struct sbrec_port_binding *parent_pb, + const struct sbrec_chassis *chassis_rec, + const struct ovsrec_interface *iface_rec, +- bool sb_readonly, struct hmap *tracked_datapaths) ++ bool sb_readonly, bool notify_up, ++ struct hmap *tracked_datapaths) + { ++ if (!sb_readonly) { ++ claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up); ++ } ++ + if (pb->chassis != chassis_rec) { + if (sb_readonly) { + return false; + } + +- binding_iface_bound_add(pb->logical_port); + if (pb->chassis) { + VLOG_INFO("Changing chassis for lport %s from %s to %s.", + pb->logical_port, pb->chassis->name, +@@ -1041,8 +1072,12 @@ consider_vif_lport_(const struct sbrec_port_binding *pb, + if (lbinding_set) { + if (can_bind) { + /* We can claim the lport. */ +- if (!claim_lport(pb, b_ctx_in->chassis_rec, lbinding->iface, +- !b_ctx_in->ovnsb_idl_txn, ++ const struct sbrec_port_binding *parent_pb = ++ lbinding->parent ? lbinding->parent->pb : NULL; ++ ++ if (!claim_lport(pb, parent_pb, b_ctx_in->chassis_rec, ++ lbinding->iface, !b_ctx_in->ovnsb_idl_txn, ++ !lbinding->parent, + b_ctx_out->tracked_dp_bindings)){ + return false; + } +@@ -1246,8 +1281,8 @@ consider_nonvif_lport_(const struct sbrec_port_binding *pb, + b_ctx_out->tracked_dp_bindings); + + update_local_lport_ids(pb, b_ctx_out); +- return claim_lport(pb, b_ctx_in->chassis_rec, NULL, +- !b_ctx_in->ovnsb_idl_txn, ++ return claim_lport(pb, NULL, b_ctx_in->chassis_rec, NULL, ++ !b_ctx_in->ovnsb_idl_txn, false, + b_ctx_out->tracked_dp_bindings); + } else if (pb->chassis == b_ctx_in->chassis_rec) { + return release_lport(pb, !b_ctx_in->ovnsb_idl_txn, +diff --git a/tests/ovn.at b/tests/ovn.at +index dfb94d2..1956f5c 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -8992,6 +8992,7 @@ AT_CHECK([test -z $bar2_zoneid]) + # Add back bar2 + ovn-nbctl lsp-add bar bar2 vm2 1 \ + -- lsp-set-addresses bar2 "f0:00:00:01:02:08 192.168.2.3" ++wait_for_ports_up + ovn-nbctl --wait=hv sync + + bar2_zoneid=$(as hv2 ovs-vsctl get bridge br-int external_ids:ct-zone-bar2) +@@ -14671,6 +14672,8 @@ OVS_WAIT_UNTIL( + logical_port=ls1-lp_ext1` + test "$chassis" = "$hv1_uuid"]) + ++wait_for_ports_up ls1-lp_ext1 ++ + # There should be DHCPv4/v6 OF flows for the ls1-lp_ext1 port in hv1 + (ovn-sbctl dump-flows lr0; ovn-sbctl dump-flows ls1) > sbflows + as hv1 ovs-ofctl dump-flows br-int > brintflows +@@ -14951,6 +14954,7 @@ OVS_WAIT_UNTIL( + [chassis=`ovn-sbctl --bare --columns chassis find port_binding \ + logical_port=ls1-lp_ext1` + test "$chassis" = "$hv2_uuid"]) ++wait_for_ports_up ls1-lp_ext1 + + # There should be OF flows for DHCP4/v6 for the ls1-lp_ext1 port in hv2 + AT_CHECK([as hv2 ovs-ofctl dump-flows br-int | \ +@@ -15065,6 +15069,7 @@ OVS_WAIT_UNTIL( + [chassis=`ovn-sbctl --bare --columns chassis find port_binding \ + logical_port=ls1-lp_ext1` + test "$chassis" = "$hv1_uuid"]) ++wait_for_ports_up ls1-lp_ext1 + + as hv1 + ovs-vsctl show +@@ -15145,6 +15150,7 @@ OVS_WAIT_UNTIL( + [chassis=`ovn-sbctl --bare --columns chassis find port_binding \ + logical_port=ls1-lp_ext1` + test "$chassis" = "$hv3_uuid"]) ++wait_for_ports_up ls1-lp_ext1 + + as hv1 + ovs-vsctl show +@@ -15229,6 +15235,7 @@ OVS_WAIT_UNTIL( + [chassis=`ovn-sbctl --bare --columns chassis find port_binding \ + logical_port=ls1-lp_ext1` + test "$chassis" = "$hv1_uuid"]) ++wait_for_ports_up ls1-lp_ext1 + + # There should be a flow in hv2 to drop traffic from ls1-lp_ext1 destined + # to router mac. +@@ -15246,6 +15253,7 @@ OVS_WAIT_UNTIL( + [chassis=`ovn-sbctl --bare --columns chassis find port_binding \ + logical_port=ls1-lp_ext1` + test "$chassis" = "$hv2_uuid"]) ++wait_for_ports_up ls1-lp_ext1 + + as hv1 + OVS_APP_EXIT_AND_WAIT([ovs-vswitchd]) +@@ -16604,12 +16612,10 @@ spa=$(ip_to_hex 10 0 0 10) + tpa=$(ip_to_hex 10 0 0 10) + send_garp 1 1 $eth_src $eth_dst $spa $tpa + +-OVS_WAIT_UNTIL([test x$(ovn-sbctl --bare --columns chassis find port_binding \ +-logical_port=sw0-vir) = x$hv1_ch_uuid], [0], []) +- +-AT_CHECK([test x$(ovn-sbctl --bare --columns virtual_parent find port_binding \ +-logical_port=sw0-vir) = xsw0-p1]) +- ++wait_row_count Port_Binding 1 logical_port=sw0-vir chassis=$hv1_ch_uuid ++check_row_count Port_Binding 1 logical_port=sw0-vir virtual_parent=sw0-p1 ++wait_for_ports_up sw0-vir ++check ovn-nbctl --wait=hv sync + + # There should be an arp resolve flow to resolve the virtual_ip with the + # sw0-p1's MAC. +@@ -16627,6 +16633,8 @@ ovn-sbctl clear port_binding $pb_uuid virtual_parent + OVS_WAIT_UNTIL([test x$(ovn-sbctl --bare --columns chassis find port_binding \ + logical_port=sw0-vir) = x]) + ++wait_row_count nb:Logical_Switch_Port 1 up=false name=sw0-vir ++ + # From sw0-p0 resend GARP for 10.0.0.10. hv1 should reclaim sw0-vir + # and sw0-p1 should be its virtual_parent. + send_garp 1 1 $eth_src $eth_dst $spa $tpa +@@ -16637,6 +16645,8 @@ logical_port=sw0-vir) = x$hv1_ch_uuid], [0], []) + AT_CHECK([test x$(ovn-sbctl --bare --columns virtual_parent find port_binding \ + logical_port=sw0-vir) = xsw0-p1]) + ++wait_for_ports_up sw0-vir ++ + # From sw0-p3 send GARP for 10.0.0.10. hv1 should claim sw0-vir + # and sw0-p3 should be its virtual_parent. + eth_src=505400000005 +@@ -16651,6 +16661,7 @@ logical_port=sw0-vir) = x$hv1_ch_uuid], [0], []) + OVS_WAIT_UNTIL([test x$(ovn-sbctl --bare --columns virtual_parent find port_binding \ + logical_port=sw0-vir) = xsw0-p3]) + ++wait_for_ports_up sw0-vir + + # There should be an arp resolve flow to resolve the virtual_ip with the + # sw0-p2's MAC. +@@ -16676,6 +16687,7 @@ logical_port=sw0-vir) = x$hv2_ch_uuid], [0], []) + AT_CHECK([test x$(ovn-sbctl --bare --columns virtual_parent find port_binding \ + logical_port=sw0-vir) = xsw0-p2]) + ++wait_for_ports_up sw0-vir + + # There should be an arp resolve flow to resolve the virtual_ip with the + # sw0-p3's MAC. +@@ -16701,6 +16713,8 @@ sleep 1 + AT_CHECK([test x$(ovn-sbctl --bare --columns virtual_parent find port_binding \ + logical_port=sw0-vir) = xsw0-p1]) + ++wait_for_ports_up sw0-vir ++ + ovn-sbctl dump-flows lr0 > lr0-flows5 + AT_CAPTURE_FILE([lr0-flows5]) + AT_CHECK([grep lr_in_arp_resolve lr0-flows5 | grep "reg0 == 10.0.0.10" | sed 's/table=../table=??/'], [0], [dnl +@@ -16717,6 +16731,8 @@ sleep 1 + AT_CHECK([test x$(ovn-sbctl --bare --columns virtual_parent find port_binding \ + logical_port=sw0-vir) = x]) + ++wait_row_count nb:Logical_Switch_Port 1 up=false name=sw0-vir ++ + # Since the sw0-vir is not claimed by any chassis, eth.dst should be set to + # zero if the ip4.dst is the virtual ip. + ovn-sbctl dump-flows lr0 > lr0-flows6 +@@ -16740,6 +16756,8 @@ sleep 1 + AT_CHECK([test x$(ovn-sbctl --bare --columns virtual_parent find port_binding \ + logical_port=sw0-vir) = xsw0-p2]) + ++wait_for_ports_up sw0-vir ++ + ovn-sbctl dump-flows lr0 > lr0-flows7 + AT_CAPTURE_FILE([lr0-flows7]) + AT_CHECK([grep lr_in_arp_resolve lr0-flows7 | grep "reg0 == 10.0.0.10" | sed 's/table=../table=??/'], [0], [dnl +@@ -16754,6 +16772,8 @@ logical_port=sw0-vir) = x], [0], []) + AT_CHECK([test x$(ovn-sbctl --bare --columns virtual_parent find port_binding \ + logical_port=sw0-vir) = x]) + ++wait_row_count nb:Logical_Switch_Port 1 up=false name=sw0-vir ++ + # Clear virtual_ip column of sw0-vir. There should be no bind_vport flows. + ovn-nbctl --wait=hv remove logical_switch_port sw0-vir options virtual-ip + +-- +1.8.3.1 + diff --git a/SOURCES/0001-binding-Do-not-clear-container-lbinding-pb-when-pare.patch b/SOURCES/0001-binding-Do-not-clear-container-lbinding-pb-when-pare.patch new file mode 100644 index 0000000..2ac7a05 --- /dev/null +++ b/SOURCES/0001-binding-Do-not-clear-container-lbinding-pb-when-pare.patch @@ -0,0 +1,61 @@ +From 0ec31292fc29d2c111927382b13ea8af0499f6ac Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Wed, 6 Jan 2021 11:53:14 +0100 +Subject: [PATCH] binding: Do not clear container lbinding->pb when parent is + deleted. + +When a parent Port_Binding is deleted we shouldn't clear the children's +'pb' field. Container port bindings have their own Port_Binding SB +record so the child_lbinding->pb field should be cleared only when +their corresponding SB record is deleted. + +Whithout this fix when a parent Port_Binding "remove" followed by "add" +operations are received in the same iteration in ovn-controller, +consider_container_lport() can be called with "pb == NULL" causing a +crash. + +Fixes: 354bdba51abf ("ovn-controller: I-P for SB port binding and OVS interface in runtime_data.") +Signed-off-by: Dumitru Ceara +Signed-off-by: Numan Siddique + +(cherry-picked from master commit d3245f69dd6ec613ceb193f728946f7e3b9b3de3) +--- + controller/binding.c | 3 +-- + tests/ovn.at | 7 +++++++ + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/controller/binding.c b/controller/binding.c +index cb60c5d..e632203 100644 +--- a/controller/binding.c ++++ b/controller/binding.c +@@ -958,8 +958,7 @@ release_local_binding_children(const struct sbrec_chassis *chassis_rec, + } + } + +- /* Clear the local bindings' 'pb' and 'iface'. */ +- l->pb = NULL; ++ /* Clear the local bindings' 'iface'. */ + l->iface = NULL; + } + +diff --git a/tests/ovn.at b/tests/ovn.at +index 8bcfcdf..ce6db86 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -9126,6 +9126,13 @@ OVS_WAIT_UNTIL([test xup = x$(ovn-nbctl lsp-get-up vm1)]) + OVS_WAIT_UNTIL([test xup = x$(ovn-nbctl lsp-get-up foo1)]) + OVS_WAIT_UNTIL([test xup = x$(ovn-nbctl lsp-get-up bar1)]) + ++# Move VM1 to a new logical switch. ++ovn-nbctl ls-add mgmt2 ++ovn-nbctl lsp-del vm1 -- lsp-add mgmt2 vm1 ++OVS_WAIT_UNTIL([test xup = x$(ovn-nbctl lsp-get-up vm1)]) ++OVS_WAIT_UNTIL([test xup = x$(ovn-nbctl lsp-get-up foo1)]) ++OVS_WAIT_UNTIL([test xup = x$(ovn-nbctl lsp-get-up bar1)]) ++ + as hv1 ovs-vsctl del-port vm1 + OVS_WAIT_UNTIL([test xdown = x$(ovn-nbctl lsp-get-up vm1)]) + OVS_WAIT_UNTIL([test xdown = x$(ovn-nbctl lsp-get-up foo1)]) +-- +1.8.3.1 + diff --git a/SOURCES/0001-binding-Fix-container-port-removal-from-local-bindin.patch b/SOURCES/0001-binding-Fix-container-port-removal-from-local-bindin.patch new file mode 100644 index 0000000..af9d3cb --- /dev/null +++ b/SOURCES/0001-binding-Fix-container-port-removal-from-local-bindin.patch @@ -0,0 +1,150 @@ +From 44955fb2395677c9d9bb1afa3985b24317c84431 Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Mon, 18 Jan 2021 17:50:23 +0100 +Subject: [PATCH 1/2] binding: Fix container port removal from local bindings. + +When the Port_Binding associated to a container port is removed make +sure we also remove it from the parent's 'children' shash. Container +ports don't have any VIFs associated so it's safe to destroy the +container port local binding when the SB.Port_Binding is deleted. + +Signed-off-by: Dumitru Ceara +Signed-off-by: Numan Siddique +(cherry picked from master commit 68cf9fdceba80ce0c03e3ddb3e0a5531f248fa04) + +Change-Id: I65f4bd461f3f94ca90dbcc0646c0037c301c71a1 +--- + controller/binding.c | 18 +++++++++++++++++- + controller/binding.h | 1 + + tests/ovn.at | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 71 insertions(+), 1 deletion(-) + +diff --git a/controller/binding.c b/controller/binding.c +index e632203..3512a1d 100644 +--- a/controller/binding.c ++++ b/controller/binding.c +@@ -688,6 +688,7 @@ local_binding_add_child(struct local_binding *lbinding, + struct local_binding *child) + { + local_binding_add(&lbinding->children, child); ++ child->parent = lbinding; + } + + static struct local_binding * +@@ -697,6 +698,13 @@ local_binding_find_child(struct local_binding *lbinding, + return local_binding_find(&lbinding->children, child_name); + } + ++static void ++local_binding_delete_child(struct local_binding *lbinding, ++ struct local_binding *child) ++{ ++ shash_find_and_delete(&lbinding->children, child->name); ++} ++ + static bool + is_lport_vif(const struct sbrec_port_binding *pb) + { +@@ -2062,6 +2070,14 @@ handle_deleted_vif_lport(const struct sbrec_port_binding *pb, + * when the interface change happens. */ + if (is_lport_container(pb)) { + remove_local_lports(pb->logical_port, b_ctx_out); ++ ++ /* If the container port is removed we should also remove it from ++ * its parent's children set. ++ */ ++ if (lbinding->parent) { ++ local_binding_delete_child(lbinding->parent, lbinding); ++ } ++ local_binding_destroy(lbinding); + } + + handle_deleted_lport(pb, b_ctx_in, b_ctx_out); +@@ -2147,7 +2163,7 @@ binding_handle_port_binding_changes(struct binding_ctx_in *b_ctx_in, + enum en_lport_type lport_type = get_lport_type(pb); + if (lport_type == LP_VIF || lport_type == LP_VIRTUAL) { + handled = handle_deleted_vif_lport(pb, lport_type, b_ctx_in, +- b_ctx_out); ++ b_ctx_out); + } else { + handle_deleted_lport(pb, b_ctx_in, b_ctx_out); + } +diff --git a/controller/binding.h b/controller/binding.h +index c974056..2885971 100644 +--- a/controller/binding.h ++++ b/controller/binding.h +@@ -100,6 +100,7 @@ struct local_binding { + + /* shash of 'struct local_binding' representing children. */ + struct shash children; ++ struct local_binding *parent; + }; + + static inline struct local_binding * +diff --git a/tests/ovn.at b/tests/ovn.at +index 27cb2e4..2cdc036 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -22144,6 +22144,59 @@ AT_CHECK_UNQUOTED([grep -c "output:4" offlows_table65_2.txt], [0], [dnl + OVN_CLEANUP([hv1]) + AT_CLEANUP + ++AT_SETUP([ovn -- Container port Incremental Processing]) ++ovn_start ++ ++net_add n1 ++sim_add hv1 ++as hv1 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.10 ++ ++as hv1 ++ovs-vsctl \ ++ -- add-port br-int vif1 \ ++ -- set Interface vif1 external_ids:iface-id=lsp1 \ ++ ofport-request=1 ++ ++check ovn-nbctl ls-add ls1 \ ++ -- ls-add ls2 \ ++ -- lsp-add ls1 lsp1 \ ++ -- lsp-add ls2 lsp-cont1 lsp1 1 ++check ovn-nbctl --wait=hv sync ++ ++# Wait for ports to be bound. ++wait_row_count Chassis 1 name=hv1 ++ch=$(fetch_column Chassis _uuid name=hv1) ++wait_row_count Port_Binding 1 logical_port=lsp1 chassis=$ch ++wait_row_count Port_Binding 1 logical_port=lsp-cont1 chassis=$ch ++ ++AS_BOX([delete OVS VIF and OVN container port]) ++as hv1 ovn-appctl -t ovn-controller debug/pause ++as hv1 ovs-vsctl del-port vif1 ++ ++check ovn-nbctl --wait=sb lsp-del lsp-cont1 ++as hv1 ovn-appctl -t ovn-controller debug/resume ++ ++check ovn-nbctl --wait=hv sync ++check_row_count Port_Binding 1 logical_port=lsp1 chassis="[[]]" ++ ++AS_BOX([readd OVS VIF]) ++as hv1 ++ovs-vsctl \ ++ -- add-port br-int vif1 \ ++ -- set Interface vif1 external_ids:iface-id=lsp1 \ ++ ofport-request=1 ++wait_row_count Port_Binding 1 logical_port=lsp1 chassis=$ch ++ ++AS_BOX([readd OVN container port]) ++check ovn-nbctl lsp-add ls2 lsp-cont1 lsp1 1 ++check ovn-nbctl --wait=hv sync ++check_row_count Port_Binding 1 logical_port=lsp-cont1 chassis=$ch ++ ++OVN_CLEANUP([hv1]) ++AT_CLEANUP ++ + # Test dropping traffic destined to router owned IPs. + AT_SETUP([ovn -- gateway router drop traffic for own IPs]) + ovn_start +-- +1.8.3.1 + diff --git a/SOURCES/0001-controller-fix-pkt_marking-with-IP-buffering.patch b/SOURCES/0001-controller-fix-pkt_marking-with-IP-buffering.patch new file mode 100644 index 0000000..7bdf589 --- /dev/null +++ b/SOURCES/0001-controller-fix-pkt_marking-with-IP-buffering.patch @@ -0,0 +1,59 @@ +From 5b75b36198b1cdf66aa0bee5a0a73f1e591af1b2 Mon Sep 17 00:00:00 2001 +Message-Id: <5b75b36198b1cdf66aa0bee5a0a73f1e591af1b2.1611831762.git.lorenzo.bianconi@redhat.com> +From: Lorenzo Bianconi +Date: Mon, 25 Jan 2021 14:28:48 +0100 +Subject: [PATCH] controller: fix pkt_marking with IP buffering + +Reload pkt_marking metadata for cloned packets during ARP/ND address +resolution. + +https://bugzilla.redhat.com/show_bug.cgi?id=1857106 + +Tested-by: Jianlin Shi +Signed-off-by: Lorenzo Bianconi +Signed-off-by: Numan Siddique +--- + controller/pinctrl.c | 5 +++++ + tests/ovn.at | 10 ++++++++++ + 2 files changed, 15 insertions(+) + +--- a/controller/pinctrl.c ++++ b/controller/pinctrl.c +@@ -1398,6 +1398,11 @@ buffered_push_packet(struct buffered_pac + ofpbuf_init(&bi->ofpacts, 4096); + + reload_metadata(&bi->ofpacts, md); ++ /* reload pkt_mark field */ ++ const struct mf_field *pkt_mark_field = mf_from_id(MFF_PKT_MARK); ++ union mf_value pkt_mark_value; ++ mf_get_value(pkt_mark_field, &md->flow, &pkt_mark_value); ++ ofpact_put_set_field(&bi->ofpacts, pkt_mark_field, &pkt_mark_value, NULL); + bi->ofp_port = md->flow.in_port.ofp_port; + + struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&bi->ofpacts); +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -15886,6 +15886,14 @@ ovn-nbctl --wait=hv sync + ovn-sbctl dump-flows > sbflows2 + AT_CAPTURE_FILE([sbflows2]) + ++# create a route policy for pkt marking ++check ovn-nbctl lr-policy-add lr0 2000 "ip4.src == 192.168.1.3" allow ++policy=$(fetch_column nb:Logical_Router_Policy _uuid priority=2000) ++check ovn-nbctl set logical_router_policy $policy options:pkt_mark=100 ++as hv2 ++# add a flow in egress pipeline to check pkt marking ++ovs-ofctl --protocols=OpenFlow13 add-flow br-int "table=32,priority=200,ip,nw_src=172.16.1.2,pkt_mark=0x64 actions=resubmit(,33)" ++ + dst_ip=$(ip_to_hex 172 16 2 10) + fip_ip=$(ip_to_hex 172 16 1 2) + src_ip=$(ip_to_hex 192 168 1 3) +@@ -15896,6 +15904,8 @@ echo $(get_arp_req f00000010204 $fip_ip + send_arp_reply 2 1 $gw_router_mac f00000010204 $gw_router_ip $fip_ip + echo "${gw_router_mac}f0000001020408004500001c00004000fe0121b4${fip_ip}${dst_ip}${data}" >> expected + ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=32 | grep pkt_mark=0x64 | grep -q n_packets=1],[0]) ++ + OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected]) + + OVN_CLEANUP([hv1],[hv2]) diff --git a/SOURCES/0001-northd-Fix-ACL-fair-log-meters-for-Port_Group-ACLs.patch b/SOURCES/0001-northd-Fix-ACL-fair-log-meters-for-Port_Group-ACLs.patch new file mode 100644 index 0000000..e25335c --- /dev/null +++ b/SOURCES/0001-northd-Fix-ACL-fair-log-meters-for-Port_Group-ACLs.patch @@ -0,0 +1,269 @@ +From d2b69af321ad8064d42aad2fd3d15857334e2d63 Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Fri, 15 Jan 2021 14:41:19 +0100 +Subject: [PATCH] northd: Fix ACL fair log meters for Port_Group ACLs. + +Commit 880dca99eaf7 added support for fair meters but didn't cover the +case when an ACL is configured on a port group instead of a logical +switch. + +Iterate over PG ACLs too when syncing fair meters to the Southbound +database. Due to the fact that a meter might be used for ACLs that are +applied on multiple logical datapaths (through port groups) we also need +to change the logic of deleting stale SB Meter records. + +Fixes: 880dca99eaf7 ("northd: Enhance the implementation of ACL log meters (pre-ddlog merge).") +Reported-by: Dmitry Yusupov +Signed-off-by: Dumitru Ceara +Acked-by: Flavio Fernandes +Signed-off-by: Numan Siddique +(cherry picked from master commit bf4f75f90c3309dbcfac8e098a2c1ff2d822e77d) + +Change-Id: If6f19df6fe0b84abc2fbb7356bf59c2d5eb496e1 +--- + northd/ovn-northd.c | 61 ++++++++++++++++++++++++++++++++++++++++------------- + tests/ovn-northd.at | 42 ++++++++++++++++++++++++------------ + 2 files changed, 74 insertions(+), 29 deletions(-) + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index fa2bd73..49afc2f 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -12250,17 +12250,20 @@ static void + sync_meters_iterate_nb_meter(struct northd_context *ctx, + const char *meter_name, + const struct nbrec_meter *nb_meter, +- struct shash *sb_meters) ++ struct shash *sb_meters, ++ struct sset *used_sb_meters) + { ++ const struct sbrec_meter *sb_meter; + bool new_sb_meter = false; + +- const struct sbrec_meter *sb_meter = shash_find_and_delete(sb_meters, +- meter_name); ++ sb_meter = shash_find_data(sb_meters, meter_name); + if (!sb_meter) { + sb_meter = sbrec_meter_insert(ctx->ovnsb_txn); + sbrec_meter_set_name(sb_meter, meter_name); ++ shash_add(sb_meters, sb_meter->name, sb_meter); + new_sb_meter = true; + } ++ sset_add(used_sb_meters, meter_name); + + if (new_sb_meter || bands_need_update(nb_meter, sb_meter)) { + struct sbrec_meter_band **sb_bands; +@@ -12282,6 +12285,24 @@ sync_meters_iterate_nb_meter(struct northd_context *ctx, + sbrec_meter_set_unit(sb_meter, nb_meter->unit); + } + ++static void ++sync_acl_fair_meter(struct northd_context *ctx, struct shash *meter_groups, ++ const struct nbrec_acl *acl, struct shash *sb_meters, ++ struct sset *used_sb_meters) ++{ ++ const struct nbrec_meter *nb_meter = ++ fair_meter_lookup_by_name(meter_groups, acl->meter); ++ ++ if (!nb_meter) { ++ return; ++ } ++ ++ char *meter_name = alloc_acl_log_unique_meter_name(acl); ++ sync_meters_iterate_nb_meter(ctx, meter_name, nb_meter, sb_meters, ++ used_sb_meters); ++ free(meter_name); ++} ++ + /* Each entry in the Meter and Meter_Band tables in OVN_Northbound have + * a corresponding entries in the Meter and Meter_Band tables in + * OVN_Southbound. Additionally, ACL logs that use fair meters have +@@ -12289,9 +12310,10 @@ sync_meters_iterate_nb_meter(struct northd_context *ctx, + */ + static void + sync_meters(struct northd_context *ctx, struct hmap *datapaths, +- struct shash *meter_groups) ++ struct shash *meter_groups, struct hmap *port_groups) + { + struct shash sb_meters = SHASH_INITIALIZER(&sb_meters); ++ struct sset used_sb_meters = SSET_INITIALIZER(&used_sb_meters); + + const struct sbrec_meter *sb_meter; + SBREC_METER_FOR_EACH (sb_meter, ctx->ovnsb_idl) { +@@ -12301,7 +12323,7 @@ sync_meters(struct northd_context *ctx, struct hmap *datapaths, + const struct nbrec_meter *nb_meter; + NBREC_METER_FOR_EACH (nb_meter, ctx->ovnnb_idl) { + sync_meters_iterate_nb_meter(ctx, nb_meter->name, nb_meter, +- &sb_meters); ++ &sb_meters, &used_sb_meters); + } + + /* +@@ -12315,19 +12337,28 @@ sync_meters(struct northd_context *ctx, struct hmap *datapaths, + continue; + } + for (size_t i = 0; i < od->nbs->n_acls; i++) { +- struct nbrec_acl *acl = od->nbs->acls[i]; +- nb_meter = fair_meter_lookup_by_name(meter_groups, acl->meter); +- if (!nb_meter) { +- continue; ++ sync_acl_fair_meter(ctx, meter_groups, od->nbs->acls[i], ++ &sb_meters, &used_sb_meters); ++ } ++ struct ovn_port_group *pg; ++ HMAP_FOR_EACH (pg, key_node, port_groups) { ++ if (ovn_port_group_ls_find(pg, &od->nbs->header_.uuid)) { ++ for (size_t i = 0; i < pg->nb_pg->n_acls; i++) { ++ sync_acl_fair_meter(ctx, meter_groups, pg->nb_pg->acls[i], ++ &sb_meters, &used_sb_meters); ++ } + } +- +- char *meter_name = alloc_acl_log_unique_meter_name(acl); +- sync_meters_iterate_nb_meter(ctx, meter_name, nb_meter, +- &sb_meters); +- free(meter_name); + } + } + ++ const char *used_meter; ++ const char *used_meter_next; ++ SSET_FOR_EACH_SAFE (used_meter, used_meter_next, &used_sb_meters) { ++ shash_find_and_delete(&sb_meters, used_meter); ++ sset_delete(&used_sb_meters, SSET_NODE_FROM_NAME(used_meter)); ++ } ++ sset_destroy(&used_sb_meters); ++ + struct shash_node *node, *next; + SHASH_FOR_EACH_SAFE (node, next, &sb_meters) { + sbrec_meter_delete(node->data); +@@ -12825,7 +12856,7 @@ ovnnb_db_run(struct northd_context *ctx, + + sync_address_sets(ctx); + sync_port_groups(ctx, &port_groups); +- sync_meters(ctx, datapaths, &meter_groups); ++ sync_meters(ctx, datapaths, &meter_groups, &port_groups); + sync_dns_entries(ctx, datapaths); + + struct ovn_northd_lb *lb; +diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at +index 91eb9a3..df03b6e 100644 +--- a/tests/ovn-northd.at ++++ b/tests/ovn-northd.at +@@ -1843,20 +1843,25 @@ AT_KEYWORDS([acl log meter fair]) + ovn_start + + check ovn-nbctl ls-add sw0 ++check ovn-nbctl ls-add sw1 + check ovn-nbctl lsp-add sw0 sw0-p1 -- lsp-set-addresses sw0-p1 "50:54:00:00:00:01 10.0.0.11" + check ovn-nbctl lsp-add sw0 sw0-p2 -- lsp-set-addresses sw0-p2 "50:54:00:00:00:02 10.0.0.12" +-check ovn-nbctl lsp-add sw0 sw0-p3 -- lsp-set-addresses sw0-p3 "50:54:00:00:00:03 10.0.0.13" ++check ovn-nbctl lsp-add sw1 sw1-p3 -- lsp-set-addresses sw1-p3 "50:54:00:00:00:03 10.0.0.13" ++check ovn-nbctl pg-add pg0 sw0-p1 sw0-p2 sw1-p3 + + check ovn-nbctl meter-add meter_me drop 1 pktps + nb_meter_uuid=$(fetch_column nb:Meter _uuid name=meter_me) + + check ovn-nbctl acl-add sw0 to-lport 1002 'outport == "sw0-p1" && ip4.src == 10.0.0.12' allow + check ovn-nbctl acl-add sw0 to-lport 1002 'outport == "sw0-p1" && ip4.src == 10.0.0.13' allow ++check ovn-nbctl acl-add pg0 to-lport 1002 'outport == "pg0" && ip4.src == 10.0.0.11' drop + + acl1=$(ovn-nbctl --bare --column _uuid,match find acl | grep -B1 '10.0.0.12' | head -1) + acl2=$(ovn-nbctl --bare --column _uuid,match find acl | grep -B1 '10.0.0.13' | head -1) ++acl3=$(ovn-nbctl --bare --column _uuid,match find acl | grep -B1 '10.0.0.11' | head -1) + check ovn-nbctl set acl $acl1 log=true severity=alert meter=meter_me name=acl_one + check ovn-nbctl set acl $acl2 log=true severity=info meter=meter_me name=acl_two ++check ovn-nbctl set acl $acl3 log=true severity=info meter=meter_me name=acl_three + check ovn-nbctl --wait=sb sync + + check_row_count nb:meter 1 +@@ -1865,8 +1870,9 @@ check_column meter_me nb:meter name + check_acl_lflow() { + acl_log_name=$1 + meter_name=$2 ++ ls=$3 + # echo checking that logical flow for acl log $acl_log_name has $meter_name +- AT_CHECK([ovn-sbctl lflow-list | grep ls_out_acl | \ ++ AT_CHECK([ovn-sbctl lflow-list $ls | grep ls_out_acl | \ + grep "\"${acl_log_name}\"" | \ + grep -c "meter=\"${meter_name}\""], [0], [1 + ]) +@@ -1882,55 +1888,63 @@ check_meter_by_name() { + + # Make sure 'fair' value properly affects the Meters in SB + check_meter_by_name meter_me +-check_meter_by_name NOT meter_me__${acl1} meter_me__${acl2} ++check_meter_by_name NOT meter_me__${acl1} meter_me__${acl2} meter_me__${acl3} + + check ovn-nbctl --wait=sb set Meter $nb_meter_uuid fair=true +-check_meter_by_name meter_me meter_me__${acl1} meter_me__${acl2} ++check_meter_by_name meter_me meter_me__${acl1} meter_me__${acl2} meter_me__${acl3} + + check ovn-nbctl --wait=sb set Meter $nb_meter_uuid fair=false + check_meter_by_name meter_me +-check_meter_by_name NOT meter_me__${acl1} meter_me__${acl2} ++check_meter_by_name NOT meter_me__${acl1} meter_me__${acl2} meter_me__${acl3} + + check ovn-nbctl --wait=sb set Meter $nb_meter_uuid fair=true +-check_meter_by_name meter_me meter_me__${acl1} meter_me__${acl2} ++check_meter_by_name meter_me meter_me__${acl1} meter_me__${acl2} meter_me__${acl3} + + # Change template meter and make sure that is reflected on acl meters as well + template_band=$(fetch_column nb:meter bands name=meter_me) + check ovn-nbctl --wait=sb set meter_band $template_band rate=123 + # Make sure that every Meter_Band has the right rate. (ovn-northd +-# creates 3 identical Meter_Band rows, all identical; ovn-northd-ddlog ++# creates 4 identical Meter_Band rows, all identical; ovn-northd-ddlog + # creates just 1. It doesn't matter, they work just as well.) + n_meter_bands=$(count_rows meter_band) +-AT_FAIL_IF([test "$n_meter_bands" != 1 && test "$n_meter_bands" != 3]) ++AT_FAIL_IF([test "$n_meter_bands" != 1 && test "$n_meter_bands" != 4]) + check_row_count meter_band $n_meter_bands rate=123 + + # Check meter in logical flows for acl logs +-check_acl_lflow acl_one meter_me__${acl1} +-check_acl_lflow acl_two meter_me__${acl2} ++check_acl_lflow acl_one meter_me__${acl1} sw0 ++check_acl_lflow acl_two meter_me__${acl2} sw0 ++check_acl_lflow acl_three meter_me__${acl3} sw0 ++check_acl_lflow acl_three meter_me__${acl3} sw1 + + # Stop using meter for acl1 + check ovn-nbctl --wait=sb clear acl $acl1 meter + check_meter_by_name meter_me meter_me__${acl2} + check_meter_by_name NOT meter_me__${acl1} +-check_acl_lflow acl_two meter_me__${acl2} ++check_acl_lflow acl_two meter_me__${acl2} sw0 ++check_acl_lflow acl_three meter_me__${acl3} sw0 ++check_acl_lflow acl_three meter_me__${acl3} sw1 + + # Remove template Meter should remove all others as well + check ovn-nbctl --wait=sb meter-del meter_me + check_row_count meter 0 + # Check that logical flow remains but uses non-unique meter since fair + # attribute is lost by the removal of the Meter row. +-check_acl_lflow acl_two meter_me ++check_acl_lflow acl_two meter_me sw0 ++check_acl_lflow acl_three meter_me sw0 ++check_acl_lflow acl_three meter_me sw1 + + # Re-add template meter and make sure acl2's meter is back in sb + check ovn-nbctl --wait=sb --fair meter-add meter_me drop 1 pktps + check_meter_by_name meter_me meter_me__${acl2} + check_meter_by_name NOT meter_me__${acl1} +-check_acl_lflow acl_two meter_me__${acl2} ++check_acl_lflow acl_two meter_me__${acl2} sw0 ++check_acl_lflow acl_three meter_me__${acl3} sw0 ++check_acl_lflow acl_three meter_me__${acl3} sw1 + + # Remove acl2 + sw0=$(fetch_column nb:logical_switch _uuid name=sw0) + check ovn-nbctl --wait=sb remove logical_switch $sw0 acls $acl2 +-check_meter_by_name meter_me ++check_meter_by_name meter_me meter_me__${acl3} + check_meter_by_name NOT meter_me__${acl1} meter_me__${acl2} + + AT_CLEANUP +-- +1.8.3.1 + diff --git a/SOURCES/0001-northd-Fix-duplicate-logical-port-detection.patch b/SOURCES/0001-northd-Fix-duplicate-logical-port-detection.patch new file mode 100644 index 0000000..baec6e8 --- /dev/null +++ b/SOURCES/0001-northd-Fix-duplicate-logical-port-detection.patch @@ -0,0 +1,182 @@ +From 46a4e3bb3a6d01c96721761a0e03d093583ab1cc Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Thu, 21 Jan 2021 13:34:11 +0100 +Subject: [PATCH] northd: Fix duplicate logical port detection. + +When reconciling SB Port_Bindings and NB Logical_Switch_Ports or +Logical_Router_Ports make sure we properly detect ports that are +incorrectly attached to multiple logical datapaths. + +Reported-at: https://bugzilla.redhat.com/1918582 +Reported-by: Jianlin Shi +Fixes: 8bf9075968ac ("ovn-northd: Fix tunnel_key allocation for SB Port_Bindings.") +Signed-off-by: Dumitru Ceara +Signed-off-by: Numan Siddique +(cherry picked from master commit 7b404e68b5b8ea7168b25cb60abb79b5696bcc02) + +Change-Id: I09d958cbfe676aad3b370e2a9fec404119c4e6d8 +--- + northd/ovn-northd.c | 64 ++++++++++++++++++++++++++++++++++------------------- + tests/ovn-northd.at | 45 +++++++++++++++++++++++++++++++++++++ + 2 files changed, 86 insertions(+), 23 deletions(-) + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 49afc2f..f11894a 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -1546,17 +1546,38 @@ ovn_port_destroy(struct hmap *ports, struct ovn_port *port) + } + } + ++/* Returns the ovn_port that matches 'name'. If 'prefer_bound' is true and ++ * multiple ports share the same name, gives precendence to ports bound to ++ * an ovn_datapath. ++ */ + static struct ovn_port * +-ovn_port_find(const struct hmap *ports, const char *name) ++ovn_port_find__(const struct hmap *ports, const char *name, ++ bool prefer_bound) + { ++ struct ovn_port *matched_op = NULL; + struct ovn_port *op; + + HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) { + if (!strcmp(op->key, name)) { +- return op; ++ matched_op = op; ++ if (!prefer_bound || op->od) { ++ return op; ++ } + } + } +- return NULL; ++ return matched_op; ++} ++ ++static struct ovn_port * ++ovn_port_find(const struct hmap *ports, const char *name) ++{ ++ return ovn_port_find__(ports, name, false); ++} ++ ++static struct ovn_port * ++ovn_port_find_bound(const struct hmap *ports, const char *name) ++{ ++ return ovn_port_find__(ports, name, true); + } + + /* Returns true if the logical switch port 'enabled' column is empty or +@@ -2339,15 +2360,13 @@ join_logical_ports(struct northd_context *ctx, + for (size_t i = 0; i < od->nbs->n_ports; i++) { + const struct nbrec_logical_switch_port *nbsp + = od->nbs->ports[i]; +- struct ovn_port *op = ovn_port_find(ports, nbsp->name); +- if (op && op->sb->datapath == od->sb) { +- if (op->nbsp || op->nbrp) { +- static struct vlog_rate_limit rl +- = VLOG_RATE_LIMIT_INIT(5, 1); +- VLOG_WARN_RL(&rl, "duplicate logical port %s", +- nbsp->name); +- continue; +- } ++ struct ovn_port *op = ovn_port_find_bound(ports, nbsp->name); ++ if (op && (op->od || op->nbsp || op->nbrp)) { ++ static struct vlog_rate_limit rl ++ = VLOG_RATE_LIMIT_INIT(5, 1); ++ VLOG_WARN_RL(&rl, "duplicate logical port %s", nbsp->name); ++ continue; ++ } else if (op && (!op->sb || op->sb->datapath == od->sb)) { + ovn_port_set_nb(op, nbsp, NULL); + ovs_list_remove(&op->list); + +@@ -2438,16 +2457,15 @@ join_logical_ports(struct northd_context *ctx, + continue; + } + +- struct ovn_port *op = ovn_port_find(ports, nbrp->name); +- if (op && op->sb->datapath == od->sb) { +- if (op->nbsp || op->nbrp) { +- static struct vlog_rate_limit rl +- = VLOG_RATE_LIMIT_INIT(5, 1); +- VLOG_WARN_RL(&rl, "duplicate logical router port %s", +- nbrp->name); +- destroy_lport_addresses(&lrp_networks); +- continue; +- } ++ struct ovn_port *op = ovn_port_find_bound(ports, nbrp->name); ++ if (op && (op->od || op->nbsp || op->nbrp)) { ++ static struct vlog_rate_limit rl ++ = VLOG_RATE_LIMIT_INIT(5, 1); ++ VLOG_WARN_RL(&rl, "duplicate logical router port %s", ++ nbrp->name); ++ destroy_lport_addresses(&lrp_networks); ++ continue; ++ } else if (op && (!op->sb || op->sb->datapath == od->sb)) { + ovn_port_set_nb(op, NULL, nbrp); + ovs_list_remove(&op->list); + ovs_list_push_back(both, &op->list); +@@ -2490,7 +2508,7 @@ join_logical_ports(struct northd_context *ctx, + char *redirect_name = + ovn_chassis_redirect_name(nbrp->name); + struct ovn_port *crp = ovn_port_find(ports, redirect_name); +- if (crp && crp->sb->datapath == od->sb) { ++ if (crp && crp->sb && crp->sb->datapath == od->sb) { + crp->derived = true; + ovn_port_set_nb(crp, NULL, nbrp); + ovs_list_remove(&crp->list); +diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at +index df03b6e..d22cad8 100644 +--- a/tests/ovn-northd.at ++++ b/tests/ovn-northd.at +@@ -2399,3 +2399,48 @@ ovn-nbctl destroy bfd $uuid + check_row_count bfd 2 + + AT_CLEANUP ++ ++AT_SETUP([ovn -- check LSP attached to multiple LS]) ++ovn_start ++ ++check ovn-nbctl ls-add ls1 \ ++ -- ls-add ls2 \ ++ -- lsp-add ls1 p1 ++check ovn-nbctl --wait=sb sync ++ ++uuid=$(fetch_column nb:Logical_Switch_Port _uuid name=p1) ++check ovn-nbctl set Logical_Switch ls2 ports=$uuid ++check ovn-nbctl --wait=sb sync ++ ++AT_CHECK([grep -qE 'duplicate logical port p1' northd/ovn-northd.log], [0]) ++ ++AT_CLEANUP ++ ++AT_SETUP([ovn -- check LRP attached to multiple LR]) ++ovn_start ++ ++check ovn-nbctl lr-add lr1 \ ++ -- lr-add lr2 \ ++ -- lrp-add lr1 p1 00:00:00:00:00:01 10.0.0.1/24 ++check ovn-nbctl --wait=sb sync ++ ++uuid=$(fetch_column nb:Logical_Router_Port _uuid name=p1) ++check ovn-nbctl set Logical_Router lr2 ports=$uuid ++check ovn-nbctl --wait=sb sync ++ ++AT_CHECK([grep -qE 'duplicate logical router port p1' northd/ovn-northd.log], [0]) ++ ++AT_CLEANUP ++ ++AT_SETUP([ovn -- check duplicate LSP/LRP]) ++ovn_start ++ ++check ovn-nbctl ls-add ls \ ++ -- lsp-add ls p1 \ ++ -- lr-add lr \ ++ -- lrp-add lr p1 00:00:00:00:00:01 10.0.0.1/24 ++check ovn-nbctl --wait=sb sync ++ ++AT_CHECK([grep -qE 'duplicate logical.*port p1' northd/ovn-northd.log], [0]) ++ ++AT_CLEANUP +-- +1.8.3.1 + diff --git a/SOURCES/0001-northd-Skip-matching-on-ct-flags-for-stateless-confi.patch b/SOURCES/0001-northd-Skip-matching-on-ct-flags-for-stateless-confi.patch new file mode 100644 index 0000000..f83a390 --- /dev/null +++ b/SOURCES/0001-northd-Skip-matching-on-ct-flags-for-stateless-confi.patch @@ -0,0 +1,265 @@ +From 5336b5cb342b8f81115299540f3268f734a6d009 Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Wed, 10 Feb 2021 12:20:17 +0100 +Subject: [PATCH] northd: Skip matching on ct flags for stateless + configurations. + +If no load balancers or "allow-related" ACLs are configured on a logical +switch, no packets will be sent to conntrack in the logical switch +pipeline and ACL flows in tables ls_in/out_acl will not match on +conntrack state. In this case there's no need to try to set ACL hints +in tables ls_in/out_acl_hint. + +Furthermore, setting the hints translates to always generating flows +that match on ct.state. Depending on the underlying hardware such flows +may not be offloadable inducing a hit in performance even when no +conntrack recirculations are required. + +To avoid iterating through all configured ACLs and load balancers +multiple times, we now store two new fields in the 'ovn_datapath' +structure: +- has_stateful_acl +- has_lb_vip + +Also, rename the 'has_lb_vip()' and 'has_stateful_acl()' functions, +prefixing them with 'ls_' to match other helper function names. + +Fixes: 209ea46bbf9d ("ovn-northd: Reduce number of flows generated for stateful ACLs.") +Reported-by: Haresh Khandelwal +Reported-at: https://bugzilla.redhat.com/1927211 +Signed-off-by: Dumitru Ceara +Signed-off-by: Mark Michelson +Acked-by: Mark Michelson +--- + northd/ovn-northd.8.xml | 3 +- + northd/ovn-northd.c | 34 +++++++++++------- + tests/ovn-northd.at | 77 +++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 101 insertions(+), 13 deletions(-) + +diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml +index 70065a36d..2f8b4e8c3 100644 +--- a/northd/ovn-northd.8.xml ++++ b/northd/ovn-northd.8.xml +@@ -415,7 +415,8 @@ +

+ This table consists of logical flows that set hints + (reg0 bits) to be used in the next stage, in the ACL +- processing table. Multiple hints can be set for the same packet. ++ processing table, if stateful ACLs or load balancers are configured. ++ Multiple hints can be set for the same packet. + The possible hints are: +

+
    +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index db6572a62..b85e6e78a 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -597,6 +597,8 @@ struct ovn_datapath { + struct hmap port_tnlids; + uint32_t port_key_hint; + ++ bool has_stateful_acl; ++ bool has_lb_vip; + bool has_unknown; + + /* IPAM data. */ +@@ -635,6 +637,9 @@ struct ovn_datapath { + struct hmap nb_pgs; + }; + ++static bool ls_has_stateful_acl(struct ovn_datapath *od); ++static bool ls_has_lb_vip(struct ovn_datapath *od); ++ + /* Contains a NAT entry with the external addresses pre-parsed. */ + struct ovn_nat { + const struct nbrec_nat *nb; +@@ -4635,7 +4640,7 @@ ovn_ls_port_group_destroy(struct hmap *nb_pgs) + } + + static bool +-has_stateful_acl(struct ovn_datapath *od) ++ls_has_stateful_acl(struct ovn_datapath *od) + { + for (size_t i = 0; i < od->nbs->n_acls; i++) { + struct nbrec_acl *acl = od->nbs->acls[i]; +@@ -4814,8 +4819,6 @@ skip_port_from_conntrack(struct ovn_datapath *od, struct ovn_port *op, + static void + build_pre_acls(struct ovn_datapath *od, struct hmap *lflows) + { +- bool has_stateful = has_stateful_acl(od); +- + /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are + * allowed by default. */ + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;"); +@@ -4830,7 +4833,7 @@ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows) + /* If there are any stateful ACL rules in this datapath, we must + * send all IP packets through the conntrack action, which handles + * defragmentation, in order to match L4 headers. */ +- if (has_stateful) { ++ if (od->has_stateful_acl) { + for (size_t i = 0; i < od->n_router_ports; i++) { + skip_port_from_conntrack(od, od->router_ports[i], + S_SWITCH_IN_PRE_ACL, S_SWITCH_OUT_PRE_ACL, +@@ -4933,7 +4936,7 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows, + } + + static bool +-has_lb_vip(struct ovn_datapath *od) ++ls_has_lb_vip(struct ovn_datapath *od) + { + for (int i = 0; i < od->nbs->n_load_balancer; i++) { + struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i]; +@@ -5076,6 +5079,13 @@ build_acl_hints(struct ovn_datapath *od, struct hmap *lflows) + for (size_t i = 0; i < ARRAY_SIZE(stages); i++) { + enum ovn_stage stage = stages[i]; + ++ /* In any case, advance to the next stage. */ ++ ovn_lflow_add(lflows, od, stage, 0, "1", "next;"); ++ ++ if (!od->has_stateful_acl && !od->has_lb_vip) { ++ continue; ++ } ++ + /* New, not already established connections, may hit either allow + * or drop ACLs. For allow ACLs, the connection must also be committed + * to conntrack so we set REGBIT_ACL_HINT_ALLOW_NEW. +@@ -5136,9 +5146,6 @@ build_acl_hints(struct ovn_datapath *od, struct hmap *lflows) + ovn_lflow_add(lflows, od, stage, 1, "ct.est && ct_label.blocked == 0", + REGBIT_ACL_HINT_BLOCK " = 1; " + "next;"); +- +- /* In any case, advance to the next stage. */ +- ovn_lflow_add(lflows, od, stage, 0, "1", "next;"); + } + } + +@@ -5470,7 +5477,7 @@ static void + build_acls(struct ovn_datapath *od, struct hmap *lflows, + struct hmap *port_groups, const struct shash *meter_groups) + { +- bool has_stateful = (has_stateful_acl(od) || has_lb_vip(od)); ++ bool has_stateful = od->has_stateful_acl || od->has_lb_vip; + + /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by + * default. A related rule at priority 1 is added below if there +@@ -5739,7 +5746,7 @@ build_lb(struct ovn_datapath *od, struct hmap *lflows) + } + } + +- if (has_lb_vip(od)) { ++ if (od->has_lb_vip) { + /* Ingress and Egress LB Table (Priority 65534). + * + * Send established traffic through conntrack for just NAT. */ +@@ -5860,7 +5867,7 @@ build_lb_hairpin(struct ovn_datapath *od, struct hmap *lflows) + ovn_lflow_add(lflows, od, S_SWITCH_IN_NAT_HAIRPIN, 0, "1", "next;"); + ovn_lflow_add(lflows, od, S_SWITCH_IN_HAIRPIN, 0, "1", "next;"); + +- if (has_lb_vip(od)) { ++ if (od->has_lb_vip) { + /* Check if the packet needs to be hairpinned. */ + ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_HAIRPIN, 100, + "ip && ct.trk && ct.dnat", +@@ -6597,7 +6604,10 @@ build_lswitch_lflows_pre_acl_and_acl(struct ovn_datapath *od, + struct shash *meter_groups, + struct hmap *lbs) + { +- if (od->nbs) { ++ if (od->nbs) { ++ od->has_stateful_acl = ls_has_stateful_acl(od); ++ od->has_lb_vip = ls_has_lb_vip(od); ++ + build_pre_acls(od, lflows); + build_pre_lb(od, lflows, meter_groups, lbs); + build_pre_stateful(od, lflows); +diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at +index 7240e22ba..64a788067 100644 +--- a/tests/ovn-northd.at ++++ b/tests/ovn-northd.at +@@ -1912,6 +1912,83 @@ check_meter_by_name NOT meter_me__${acl1} meter_me__${acl2} + + AT_CLEANUP + ++AT_SETUP([ovn -- ACL skip hints for stateless config]) ++AT_KEYWORDS([acl]) ++ovn_start ++ ++check ovn-nbctl --wait=sb \ ++ -- ls-add ls \ ++ -- lsp-add ls lsp \ ++ -- acl-add ls from-lport 1 "ip" allow \ ++ -- acl-add ls to-lport 1 "ip" allow ++ ++AS_BOX([Check no match on ct_state with stateless ACLs]) ++AT_CHECK([ovn-sbctl lflow-list ls | grep -e ls_in_acl_hint -e ls_out_acl_hint -e ls_in_acl -e ls_out_acl | grep 'ct\.' | sort], [0], [dnl ++]) ++ ++AS_BOX([Check match ct_state with stateful ACLs]) ++check ovn-nbctl --wait=sb \ ++ -- acl-add ls from-lport 2 "udp" allow-related \ ++ -- acl-add ls to-lport 2 "udp" allow-related ++AT_CHECK([ovn-sbctl lflow-list ls | grep -e ls_in_acl_hint -e ls_out_acl_hint -e ls_in_acl -e ls_out_acl | grep 'ct\.' | sort], [0], [dnl ++ table=4 (ls_out_acl_hint ), priority=1 , match=(ct.est && ct_label.blocked == 0), action=(reg0[[10]] = 1; next;) ++ table=4 (ls_out_acl_hint ), priority=2 , match=(ct.est && ct_label.blocked == 1), action=(reg0[[9]] = 1; next;) ++ table=4 (ls_out_acl_hint ), priority=3 , match=(!ct.est), action=(reg0[[9]] = 1; next;) ++ table=4 (ls_out_acl_hint ), priority=4 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 0), action=(reg0[[8]] = 1; reg0[[10]] = 1; next;) ++ table=4 (ls_out_acl_hint ), priority=5 , match=(!ct.trk), action=(reg0[[8]] = 1; reg0[[9]] = 1; next;) ++ table=4 (ls_out_acl_hint ), priority=6 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 1), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) ++ table=4 (ls_out_acl_hint ), priority=7 , match=(ct.new && !ct.est), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) ++ table=5 (ls_out_acl ), priority=1 , match=(ip && (!ct.est || (ct.est && ct_label.blocked == 1))), action=(reg0[[1]] = 1; next;) ++ table=5 (ls_out_acl ), priority=65535, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;) ++ table=5 (ls_out_acl ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;) ++ table=5 (ls_out_acl ), priority=65535, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) ++ table=6 (ls_in_acl_hint ), priority=1 , match=(ct.est && ct_label.blocked == 0), action=(reg0[[10]] = 1; next;) ++ table=6 (ls_in_acl_hint ), priority=2 , match=(ct.est && ct_label.blocked == 1), action=(reg0[[9]] = 1; next;) ++ table=6 (ls_in_acl_hint ), priority=3 , match=(!ct.est), action=(reg0[[9]] = 1; next;) ++ table=6 (ls_in_acl_hint ), priority=4 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 0), action=(reg0[[8]] = 1; reg0[[10]] = 1; next;) ++ table=6 (ls_in_acl_hint ), priority=5 , match=(!ct.trk), action=(reg0[[8]] = 1; reg0[[9]] = 1; next;) ++ table=6 (ls_in_acl_hint ), priority=6 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 1), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) ++ table=6 (ls_in_acl_hint ), priority=7 , match=(ct.new && !ct.est), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) ++ table=7 (ls_in_acl ), priority=1 , match=(ip && (!ct.est || (ct.est && ct_label.blocked == 1))), action=(reg0[[1]] = 1; next;) ++ table=7 (ls_in_acl ), priority=65535, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;) ++ table=7 (ls_in_acl ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;) ++ table=7 (ls_in_acl ), priority=65535, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) ++]) ++ ++AS_BOX([Check match ct_state with load balancer]) ++check ovn-nbctl --wait=sb \ ++ -- acl-del ls from-lport 2 "udp" \ ++ -- acl-del ls to-lport 2 "udp" \ ++ -- lb-add lb "10.0.0.1" "10.0.0.2" \ ++ -- ls-lb-add ls lb ++ ++AT_CHECK([ovn-sbctl lflow-list ls | grep -e ls_in_acl_hint -e ls_out_acl_hint -e ls_in_acl -e ls_out_acl | grep 'ct\.' | sort], [0], [dnl ++ table=4 (ls_out_acl_hint ), priority=1 , match=(ct.est && ct_label.blocked == 0), action=(reg0[[10]] = 1; next;) ++ table=4 (ls_out_acl_hint ), priority=2 , match=(ct.est && ct_label.blocked == 1), action=(reg0[[9]] = 1; next;) ++ table=4 (ls_out_acl_hint ), priority=3 , match=(!ct.est), action=(reg0[[9]] = 1; next;) ++ table=4 (ls_out_acl_hint ), priority=4 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 0), action=(reg0[[8]] = 1; reg0[[10]] = 1; next;) ++ table=4 (ls_out_acl_hint ), priority=5 , match=(!ct.trk), action=(reg0[[8]] = 1; reg0[[9]] = 1; next;) ++ table=4 (ls_out_acl_hint ), priority=6 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 1), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) ++ table=4 (ls_out_acl_hint ), priority=7 , match=(ct.new && !ct.est), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) ++ table=5 (ls_out_acl ), priority=1 , match=(ip && (!ct.est || (ct.est && ct_label.blocked == 1))), action=(reg0[[1]] = 1; next;) ++ table=5 (ls_out_acl ), priority=65535, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;) ++ table=5 (ls_out_acl ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;) ++ table=5 (ls_out_acl ), priority=65535, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) ++ table=6 (ls_in_acl_hint ), priority=1 , match=(ct.est && ct_label.blocked == 0), action=(reg0[[10]] = 1; next;) ++ table=6 (ls_in_acl_hint ), priority=2 , match=(ct.est && ct_label.blocked == 1), action=(reg0[[9]] = 1; next;) ++ table=6 (ls_in_acl_hint ), priority=3 , match=(!ct.est), action=(reg0[[9]] = 1; next;) ++ table=6 (ls_in_acl_hint ), priority=4 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 0), action=(reg0[[8]] = 1; reg0[[10]] = 1; next;) ++ table=6 (ls_in_acl_hint ), priority=5 , match=(!ct.trk), action=(reg0[[8]] = 1; reg0[[9]] = 1; next;) ++ table=6 (ls_in_acl_hint ), priority=6 , match=(!ct.new && ct.est && !ct.rpl && ct_label.blocked == 1), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) ++ table=6 (ls_in_acl_hint ), priority=7 , match=(ct.new && !ct.est), action=(reg0[[7]] = 1; reg0[[9]] = 1; next;) ++ table=7 (ls_in_acl ), priority=1 , match=(ip && (!ct.est || (ct.est && ct_label.blocked == 1))), action=(reg0[[1]] = 1; next;) ++ table=7 (ls_in_acl ), priority=65535, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;) ++ table=7 (ls_in_acl ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;) ++ table=7 (ls_in_acl ), priority=65535, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;) ++]) ++ ++AT_CLEANUP ++ + AT_SETUP([datapath requested-tnl-key]) + AT_KEYWORDS([requested tnl tunnel key keys]) + ovn_start +-- +2.29.2 + diff --git a/SOURCES/0001-northd-add-event-option-to-enable-controller_event-f.patch b/SOURCES/0001-northd-add-event-option-to-enable-controller_event-f.patch new file mode 100644 index 0000000..97c2dce --- /dev/null +++ b/SOURCES/0001-northd-add-event-option-to-enable-controller_event-f.patch @@ -0,0 +1,141 @@ +From 8bcee6092b931caa936ee8ac715cf6ec89d3f18d Mon Sep 17 00:00:00 2001 +Message-Id: <8bcee6092b931caa936ee8ac715cf6ec89d3f18d.1611836178.git.lorenzo.bianconi@redhat.com> +From: Lorenzo Bianconi +Date: Fri, 22 Jan 2021 18:25:54 +0100 +Subject: [PATCH] northd: add --event option to enable controller_event for + empty_lb + +Introduce the --event option to enable empty_lb controller event for a +load_balancer with no backends (doing so the option is per-lb and not +global). + +$ovn-nbctl --event lb-add lb0 192.168.0.100:80 "" + +controller_event_en global variable is not removed for backward +compatibility but it is deprecated + +Signed-off-by: Lorenzo Bianconi +Signed-off-by: Numan Siddique +--- + northd/ovn-northd.c | 5 ++++- + tests/ovn.at | 7 +++---- + utilities/ovn-nbctl.8.xml | 10 +++++++++- + utilities/ovn-nbctl.c | 13 ++++++++++++- + 4 files changed, 28 insertions(+), 7 deletions(-) + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index f11894a4d..9f8fb3b95 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -5110,7 +5110,9 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows, + struct nbrec_load_balancer *lb, + int pl, struct shash *meter_groups) + { +- if (!controller_event_en || lb_vip->n_backends || ++ bool controller_event = smap_get_bool(&lb->options, "event", false) || ++ controller_event_en; /* deprecated */ ++ if (!controller_event || lb_vip->n_backends || + lb_vip->empty_backend_rej) { + return; + } +@@ -12853,6 +12855,7 @@ ovnnb_db_run(struct northd_context *ctx, + + use_logical_dp_groups = smap_get_bool(&nb->options, + "use_logical_dp_groups", false); ++ /* deprecated, use --event instead */ + controller_event_en = smap_get_bool(&nb->options, + "controller_event", false); + check_lsp_is_up = !smap_get_bool(&nb->options, +diff --git a/tests/ovn.at b/tests/ovn.at +index aab749300..a4fafa5a8 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -16856,16 +16856,15 @@ ovs-vsctl -- add-port br-int vif33 -- \ + options:rxq_pcap=hv$i/vif33-rx.pcap \ + ofport-request=33 + +-ovn-nbctl --wait=hv set NB_Global . options:controller_event=true +-ovn-nbctl lb-add lb0 192.168.1.100:80 "" ++ovn-nbctl --event lb-add lb0 192.168.1.100:80 "" + ovn-nbctl ls-lb-add sw0 lb0 + uuid_lb0=$(ovn-nbctl --bare --columns=_uuid find load_balancer name=lb0) + +-ovn-nbctl lb-add lb1 192.168.2.100:80 "" ++ovn-nbctl --event lb-add lb1 192.168.2.100:80 "" + ovn-nbctl lr-lb-add lr0 lb1 + uuid_lb1=$(ovn-nbctl --bare --columns=_uuid find load_balancer name=lb1) + +-ovn-nbctl lb-add lb2 [[2001::10]]:50051 "" ++ovn-nbctl --event lb-add lb2 [[2001::10]]:50051 "" + ovn-nbctl ls-lb-add sw0 lb2 + uuid_lb2=$(ovn-nbctl --bare --columns=_uuid find load_balancer name=lb2) + +diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml +index e6fec9980..6ed8bcb75 100644 +--- a/utilities/ovn-nbctl.8.xml ++++ b/utilities/ovn-nbctl.8.xml +@@ -905,7 +905,7 @@ + +

    Load Balancer Commands

    +
    +-
    [--may-exist | --add-duplicate | --reject] lb-add lb vip ips [protocol]
    ++
    [--may-exist | --add-duplicate | --reject | --event] lb-add lb vip ips [protocol]
    +
    +

    + Creates a new load balancer named lb with the provided +@@ -947,6 +947,14 @@ + empty_lb SB controller event for this load balancer. +

    + ++

    ++ If the load balancer is created with --event option and ++ it has no active backends, whenever the lb receives traffic, the event ++ is reported in the Controller_Event table in the SB db. ++ Please note --event option can't be specified with ++ --reject one. ++

    ++ +

    + The following example adds a load balancer. +

    +diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c +index f61982879..47cb8db9d 100644 +--- a/utilities/ovn-nbctl.c ++++ b/utilities/ovn-nbctl.c +@@ -2836,6 +2836,13 @@ nbctl_lb_add(struct ctl_context *ctx) + bool may_exist = shash_find(&ctx->options, "--may-exist") != NULL; + bool add_duplicate = shash_find(&ctx->options, "--add-duplicate") != NULL; + bool empty_backend_rej = shash_find(&ctx->options, "--reject") != NULL; ++ bool empty_backend_event = shash_find(&ctx->options, "--event") != NULL; ++ ++ if (empty_backend_event && empty_backend_rej) { ++ ctl_error(ctx, ++ "--reject and --event can't specified at the same time"); ++ return; ++ } + + const char *lb_proto; + bool is_update_proto = false; +@@ -2953,6 +2960,10 @@ nbctl_lb_add(struct ctl_context *ctx) + const struct smap options = SMAP_CONST1(&options, "reject", "true"); + nbrec_load_balancer_set_options(lb, &options); + } ++ if (empty_backend_event) { ++ const struct smap options = SMAP_CONST1(&options, "event", "true"); ++ nbrec_load_balancer_set_options(lb, &options); ++ } + out: + ds_destroy(&lb_ips_new); + +@@ -6564,7 +6575,7 @@ static const struct ctl_command_syntax nbctl_commands[] = { + nbctl_lr_nat_set_ext_ips, NULL, "--is-exempted", RW}, + /* load balancer commands. */ + { "lb-add", 3, 4, "LB VIP[:PORT] IP[:PORT]... [PROTOCOL]", NULL, +- nbctl_lb_add, NULL, "--may-exist,--add-duplicate,--reject", RW }, ++ nbctl_lb_add, NULL, "--may-exist,--add-duplicate,--reject,--event", RW }, + { "lb-del", 1, 2, "LB [VIP]", NULL, nbctl_lb_del, NULL, + "--if-exists", RW }, + { "lb-list", 0, 1, "[LB]", NULL, nbctl_lb_list, NULL, "", RO }, +-- +2.29.2 + diff --git a/SOURCES/0001-ofctrl-Rename-nb_cfg-to-req_cfg.patch b/SOURCES/0001-ofctrl-Rename-nb_cfg-to-req_cfg.patch new file mode 100644 index 0000000..cd61e64 --- /dev/null +++ b/SOURCES/0001-ofctrl-Rename-nb_cfg-to-req_cfg.patch @@ -0,0 +1,223 @@ +From c6f4b3a47571d87149b86c999b78509185da7647 Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Wed, 13 Jan 2021 10:23:09 +0100 +Subject: [PATCH 1/3] ofctrl: Rename 'nb_cfg' to 'req_cfg'. + +A future commit will extend the ofctrl OVS barrier sync mechanism to +make it usable by multiple components. One of the use cases will be +'nb_cfg' sync but it may not be the only one. + +Signed-off-by: Dumitru Ceara +Acked-by: Mark Michelson +Acked-by: Numan Siddique +Signed-off-by: Numan Siddique +(cherry picked from upstream master commit 2f933fc560330022cfc816ed870da6e5847809c9) + +Change-Id: Iddeeeb6aaee189a3e9918426e0dce3f1dbf6ff49 +--- + controller/ofctrl.c | 70 ++++++++++++++++++++++----------------------- + controller/ofctrl.h | 4 +-- + controller/ovn-controller.c | 6 ++-- + 3 files changed, 40 insertions(+), 40 deletions(-) + +diff --git a/controller/ofctrl.c b/controller/ofctrl.c +index a1ac695..9d62e12 100644 +--- a/controller/ofctrl.c ++++ b/controller/ofctrl.c +@@ -268,13 +268,14 @@ enum ofctrl_state { + /* An in-flight update to the switch's flow table. + * + * When we receive a barrier reply from the switch with the given 'xid', we +- * know that the switch is caught up to northbound database sequence number +- * 'nb_cfg' (and make that available to the client via ofctrl_get_cur_cfg(), so +- * that it can store it into our Chassis record's nb_cfg column). */ ++ * know that the switch is caught up to the requested sequence number ++ * 'req_cfg' (and make that available to the client via ofctrl_get_cur_cfg(), ++ * so that it can store it into external state, e.g., our Chassis record's ++ * nb_cfg column). */ + struct ofctrl_flow_update { + struct ovs_list list_node; /* In 'flow_updates'. */ + ovs_be32 xid; /* OpenFlow transaction ID for barrier. */ +- int64_t nb_cfg; /* Northbound database sequence number. */ ++ uint64_t req_cfg; /* Requested sequence number. */ + }; + + static struct ofctrl_flow_update * +@@ -286,8 +287,8 @@ ofctrl_flow_update_from_list_node(const struct ovs_list *list_node) + /* Currently in-flight updates. */ + static struct ovs_list flow_updates; + +-/* nb_cfg of latest committed flow update. */ +-static int64_t cur_cfg; ++/* req_cfg of latest committed flow update. */ ++static uint64_t cur_cfg; + + /* Current state. */ + static enum ofctrl_state state; +@@ -632,8 +633,8 @@ recv_S_UPDATE_FLOWS(const struct ofp_header *oh, enum ofptype type, + struct ofctrl_flow_update *fup = ofctrl_flow_update_from_list_node( + ovs_list_front(&flow_updates)); + if (fup->xid == oh->xid) { +- if (fup->nb_cfg >= cur_cfg) { +- cur_cfg = fup->nb_cfg; ++ if (fup->req_cfg >= cur_cfg) { ++ cur_cfg = fup->req_cfg; + } + ovs_list_remove(&fup->list_node); + free(fup); +@@ -763,7 +764,7 @@ ofctrl_destroy(void) + shash_destroy(&symtab); + } + +-int64_t ++uint64_t + ofctrl_get_cur_cfg(void) + { + return cur_cfg; +@@ -2024,28 +2025,28 @@ void + ofctrl_put(struct ovn_desired_flow_table *flow_table, + struct shash *pending_ct_zones, + const struct sbrec_meter_table *meter_table, +- int64_t nb_cfg, ++ uint64_t req_cfg, + bool flow_changed) + { + static bool skipped_last_time = false; +- static int64_t old_nb_cfg = 0; ++ static uint64_t old_req_cfg = 0; + bool need_put = false; + if (flow_changed || skipped_last_time || need_reinstall_flows) { + need_put = true; +- old_nb_cfg = nb_cfg; +- } else if (nb_cfg != old_nb_cfg) { +- /* nb_cfg changed since last ofctrl_put() call */ +- if (cur_cfg == old_nb_cfg) { ++ old_req_cfg = req_cfg; ++ } else if (req_cfg != old_req_cfg) { ++ /* req_cfg changed since last ofctrl_put() call */ ++ if (cur_cfg == old_req_cfg) { + /* If there are no updates pending, we were up-to-date already, +- * update with the new nb_cfg. ++ * update with the new req_cfg. + */ + if (ovs_list_is_empty(&flow_updates)) { +- cur_cfg = nb_cfg; +- old_nb_cfg = nb_cfg; ++ cur_cfg = req_cfg; ++ old_req_cfg = req_cfg; + } + } else { + need_put = true; +- old_nb_cfg = nb_cfg; ++ old_req_cfg = req_cfg; + } + } + +@@ -2187,24 +2188,23 @@ ofctrl_put(struct ovn_desired_flow_table *flow_table, + /* Track the flow update. */ + struct ofctrl_flow_update *fup, *prev; + LIST_FOR_EACH_REVERSE_SAFE (fup, prev, list_node, &flow_updates) { +- if (nb_cfg < fup->nb_cfg) { ++ if (req_cfg < fup->req_cfg) { + /* This ofctrl_flow_update is for a configuration later than +- * 'nb_cfg'. This should not normally happen, because it means +- * that 'nb_cfg' in the SB_Global table of the southbound +- * database decreased, and it should normally be monotonically +- * increasing. */ +- VLOG_WARN("nb_cfg regressed from %"PRId64" to %"PRId64, +- fup->nb_cfg, nb_cfg); ++ * 'req_cfg'. This should not normally happen, because it ++ * means that the local seqno decreased and it should normally ++ * be monotonically increasing. */ ++ VLOG_WARN("req_cfg regressed from %"PRId64" to %"PRId64, ++ fup->req_cfg, req_cfg); + ovs_list_remove(&fup->list_node); + free(fup); +- } else if (nb_cfg == fup->nb_cfg) { ++ } else if (req_cfg == fup->req_cfg) { + /* This ofctrl_flow_update is for the same configuration as +- * 'nb_cfg'. Probably, some change to the physical topology ++ * 'req_cfg'. Probably, some change to the physical topology + * means that we had to revise the OpenFlow flow table even + * though the logical topology did not change. Update fp->xid, + * so that we don't send a notification that we're up-to-date + * until we're really caught up. */ +- VLOG_DBG("advanced xid target for nb_cfg=%"PRId64, nb_cfg); ++ VLOG_DBG("advanced xid target for req_cfg=%"PRId64, req_cfg); + fup->xid = xid_; + goto done; + } else { +@@ -2216,18 +2216,18 @@ ofctrl_put(struct ovn_desired_flow_table *flow_table, + fup = xmalloc(sizeof *fup); + ovs_list_push_back(&flow_updates, &fup->list_node); + fup->xid = xid_; +- fup->nb_cfg = nb_cfg; ++ fup->req_cfg = req_cfg; + done:; + } else if (!ovs_list_is_empty(&flow_updates)) { +- /* Getting up-to-date with 'nb_cfg' didn't require any extra flow table +- * changes, so whenever we get up-to-date with the most recent flow +- * table update, we're also up-to-date with 'nb_cfg'. */ ++ /* Getting up-to-date with 'req_cfg' didn't require any extra flow ++ * table changes, so whenever we get up-to-date with the most recent ++ * flow table update, we're also up-to-date with 'req_cfg'. */ + struct ofctrl_flow_update *fup = ofctrl_flow_update_from_list_node( + ovs_list_back(&flow_updates)); +- fup->nb_cfg = nb_cfg; ++ fup->req_cfg = req_cfg; + } else { + /* We were completely up-to-date before and still are. */ +- cur_cfg = nb_cfg; ++ cur_cfg = req_cfg; + } + + flow_table->change_tracked = true; +diff --git a/controller/ofctrl.h b/controller/ofctrl.h +index 64b0ea5..8876956 100644 +--- a/controller/ofctrl.h ++++ b/controller/ofctrl.h +@@ -55,12 +55,12 @@ enum mf_field_id ofctrl_get_mf_field_id(void); + void ofctrl_put(struct ovn_desired_flow_table *, + struct shash *pending_ct_zones, + const struct sbrec_meter_table *, +- int64_t nb_cfg, ++ uint64_t nb_cfg, + bool flow_changed); + bool ofctrl_can_put(void); + void ofctrl_wait(void); + void ofctrl_destroy(void); +-int64_t ofctrl_get_cur_cfg(void); ++uint64_t ofctrl_get_cur_cfg(void); + + void ofctrl_ct_flush_zone(uint16_t zone_id); + +diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c +index 7551287..42883b4 100644 +--- a/controller/ovn-controller.c ++++ b/controller/ovn-controller.c +@@ -798,11 +798,11 @@ restore_ct_zones(const struct ovsrec_bridge_table *bridge_table, + } + } + +-static int64_t ++static uint64_t + get_nb_cfg(const struct sbrec_sb_global_table *sb_global_table, + unsigned int cond_seqno, unsigned int expected_cond_seqno) + { +- static int64_t nb_cfg = 0; ++ static uint64_t nb_cfg = 0; + + /* Delay getting nb_cfg if there are monitor condition changes + * in flight. It might be that those changes would instruct the +@@ -826,7 +826,7 @@ store_nb_cfg(struct ovsdb_idl_txn *sb_txn, struct ovsdb_idl_txn *ovs_txn, + const struct sbrec_chassis_private *chassis, + const struct ovsrec_bridge *br_int, + unsigned int delay_nb_cfg_report, +- int64_t cur_cfg) ++ uint64_t cur_cfg) + { + if (!cur_cfg) { + return; +-- +1.8.3.1 + diff --git a/SOURCES/0001-ovn-controller-Fix-wrong-conj_id-match-flows-when-ca.patch b/SOURCES/0001-ovn-controller-Fix-wrong-conj_id-match-flows-when-ca.patch new file mode 100644 index 0000000..5bb885d --- /dev/null +++ b/SOURCES/0001-ovn-controller-Fix-wrong-conj_id-match-flows-when-ca.patch @@ -0,0 +1,205 @@ +From 11f75ad5bef3d2f6a9d72a8b27468fc3ccfc3d7e Mon Sep 17 00:00:00 2001 +From: Numan Siddique +Date: Fri, 22 Jan 2021 13:52:29 +0530 +Subject: [PATCH] ovn-controller: Fix wrong conj_id match flows when caching is + enabled. + +When the below ACL is added - +ovn-nbctl acl-add ls1 to-lport 3 + '(ip4.src==10.0.0.1 || ip4.src==10.0.0.2) && + (ip4.dst == 10.0.0.3 || ip4.dst == 10.0.0.4)' allow + +ovn-controller installs the below OF flows + +table=45, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.4 actions=conjunction(2,1/2) +table=45, priority=1003,ip,metadata=0x1,nw_dst=10.0.0.3 actions=conjunction(2,1/2) +table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.2 actions=conjunction(2,2/2) +table=45, priority=1003,ip,metadata=0x1,nw_src=10.0.0.1 actions=conjunction(2,2/2) +table=45, priority=1003,conj_id=2,ip,metadata=0x1 actions=resubmit(,46) + +When a full recompute is triggered, ovn-controller deletes the last +OF flow with the match conj_id=2 and adds the below OF flow + +table=45, priority=1003,conj_id=3,ip,metadata=0x1 actions=resubmit(,46) + +For subsequent recomputes, the conj_id keeps increasing by 1. + +This disrupts the traffic which matches on conjuction action flows. + +This patch fixes this issue. + +Fixes: 1213bc8270("ovn-controller: Cache logical flow expr matches.") +Suggested-by: Dumitru Ceara +Acked-by: Dumitru Ceara +Signed-off-by: Numan Siddique + +(cherry-picked from master commit c83294970c62f662015a7979b12250580bee3001) +--- + controller/lflow.c | 30 ++++++++++++++---------------- + include/ovn/expr.h | 1 + + lib/expr.c | 19 +++++++++++++++++++ + tests/ovn.at | 28 ++++++++++++++++++++++++++++ + 4 files changed, 62 insertions(+), 16 deletions(-) + +diff --git a/controller/lflow.c b/controller/lflow.c +index c02585b1e..9f6aece9a 100644 +--- a/controller/lflow.c ++++ b/controller/lflow.c +@@ -668,9 +668,8 @@ update_conj_id_ofs(uint32_t *conj_id_ofs, uint32_t n_conjs) + static void + add_matches_to_flow_table(const struct sbrec_logical_flow *lflow, + const struct sbrec_datapath_binding *dp, +- struct hmap *matches, size_t conj_id_ofs, +- uint8_t ptable, uint8_t output_ptable, +- struct ofpbuf *ovnacts, ++ struct hmap *matches, uint8_t ptable, ++ uint8_t output_ptable, struct ofpbuf *ovnacts, + bool ingress, struct lflow_ctx_in *l_ctx_in, + struct lflow_ctx_out *l_ctx_out) + { +@@ -708,9 +707,6 @@ add_matches_to_flow_table(const struct sbrec_logical_flow *lflow, + struct expr_match *m; + HMAP_FOR_EACH (m, hmap_node, matches) { + match_set_metadata(&m->match, htonll(dp->tunnel_key)); +- if (m->match.wc.masks.conj_id) { +- m->match.flow.conj_id += conj_id_ofs; +- } + if (datapath_is_switch(dp)) { + unsigned int reg_index + = (ingress ? MFF_LOG_INPORT : MFF_LOG_OUTPORT) - MFF_REG0; +@@ -744,7 +740,7 @@ add_matches_to_flow_table(const struct sbrec_logical_flow *lflow, + struct ofpact_conjunction *dst; + + dst = ofpact_put_CONJUNCTION(&conj); +- dst->id = src->id + conj_id_ofs; ++ dst->id = src->id; + dst->clause = src->clause; + dst->n_clauses = src->n_clauses; + } +@@ -915,9 +911,9 @@ consider_logical_flow__(const struct sbrec_logical_flow *lflow, + return true; + } + +- add_matches_to_flow_table(lflow, dp, &matches, *l_ctx_out->conj_id_ofs, +- ptable, output_ptable, &ovnacts, ingress, +- l_ctx_in, l_ctx_out); ++ expr_matches_prepare(&matches, *l_ctx_out->conj_id_ofs); ++ add_matches_to_flow_table(lflow, dp, &matches, ptable, output_ptable, ++ &ovnacts, ingress, l_ctx_in, l_ctx_out); + + ovnacts_free(ovnacts.data, ovnacts.size); + ofpbuf_uninit(&ovnacts); +@@ -930,10 +926,11 @@ consider_logical_flow__(const struct sbrec_logical_flow *lflow, + lflow_cache_get(l_ctx_out->lflow_cache_map, lflow); + + if (lc && lc->type == LCACHE_T_MATCHES) { +- /* 'matches' is cached. No need to do expr parsing. ++ /* 'matches' is cached. No need to do expr parsing and no need ++ * to call expr_matches_prepare() to update the conj ids. + * Add matches to flow table and return. */ +- add_matches_to_flow_table(lflow, dp, lc->expr_matches, lc->conj_id_ofs, +- ptable, output_ptable, &ovnacts, ingress, ++ add_matches_to_flow_table(lflow, dp, lc->expr_matches, ptable, ++ output_ptable, &ovnacts, ingress, + l_ctx_in, l_ctx_out); + ovnacts_free(ovnacts.data, ovnacts.size); + ofpbuf_uninit(&ovnacts); +@@ -1009,10 +1006,11 @@ consider_logical_flow__(const struct sbrec_logical_flow *lflow, + } + } + ++ expr_matches_prepare(matches, lc->conj_id_ofs); ++ + /* Encode OVN logical actions into OpenFlow. */ +- add_matches_to_flow_table(lflow, dp, matches, lc->conj_id_ofs, +- ptable, output_ptable, &ovnacts, ingress, +- l_ctx_in, l_ctx_out); ++ add_matches_to_flow_table(lflow, dp, matches, ptable, output_ptable, ++ &ovnacts, ingress, l_ctx_in, l_ctx_out); + ovnacts_free(ovnacts.data, ovnacts.size); + ofpbuf_uninit(&ovnacts); + +diff --git a/include/ovn/expr.h b/include/ovn/expr.h +index 0a83ec7a8..c2c821818 100644 +--- a/include/ovn/expr.h ++++ b/include/ovn/expr.h +@@ -477,6 +477,7 @@ uint32_t expr_to_matches(const struct expr *, + const void *aux, + struct hmap *matches); + void expr_matches_destroy(struct hmap *matches); ++void expr_matches_prepare(struct hmap *matches, uint32_t conj_id_ofs); + void expr_matches_print(const struct hmap *matches, FILE *); + + /* Action parsing helper. */ +diff --git a/lib/expr.c b/lib/expr.c +index 4566d9110..796e88ac7 100644 +--- a/lib/expr.c ++++ b/lib/expr.c +@@ -3125,6 +3125,25 @@ expr_to_matches(const struct expr *expr, + return n_conjs; + } + ++/* Prepares the expr matches in the hmap 'matches' by updating the ++ * conj id offsets specified in 'conj_id_ofs'. ++ */ ++void ++expr_matches_prepare(struct hmap *matches, uint32_t conj_id_ofs) ++{ ++ struct expr_match *m; ++ HMAP_FOR_EACH (m, hmap_node, matches) { ++ if (m->match.wc.masks.conj_id) { ++ m->match.flow.conj_id += conj_id_ofs; ++ } ++ ++ for (size_t i = 0; i < m->n; i++) { ++ struct cls_conjunction *src = &m->conjunctions[i]; ++ src->id += conj_id_ofs; ++ } ++ } ++} ++ + /* Destroys all of the 'struct expr_match'es in 'matches', as well as the + * 'matches' hmap itself. */ + void +diff --git a/tests/ovn.at b/tests/ovn.at +index e2d2d8a9d..b890592ae 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -13824,6 +13824,34 @@ reset_pcap_file hv1-vif2 hv1/vif2 + rm -f 2.packets + > 2.expected + ++# Trigger recompute and make sure that the traffic still works as expected. ++as hv1 ovn-appctl -t ovn-controller recompute ++ ++# Traffic 10.0.0.1, 10.0.0.2 -> 10.0.0.3, 10.0.0.4 should be allowed. ++for src in `seq 1 2`; do ++ for dst in `seq 3 4`; do ++ sip=`ip_to_hex 10 0 0 $src` ++ dip=`ip_to_hex 10 0 0 $dst` ++ ++ test_ip 1 f00000000001 f00000000002 $sip $dip 2 ++ done ++done ++ ++# Traffic 10.0.0.1, 10.0.0.2 -> 10.0.0.5 should be dropped. ++dip=`ip_to_hex 10 0 0 5` ++for src in `seq 1 2`; do ++ sip=`ip_to_hex 10 0 0 $src` ++ ++ test_ip 1 f00000000001 f00000000002 $sip $dip ++done ++ ++cat 2.expected > expout ++$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets ++AT_CHECK([cat 2.packets], [0], [expout]) ++reset_pcap_file hv1-vif2 hv1/vif2 ++rm -f 2.packets ++> 2.expected ++ + # Add two less restrictive allow ACLs for src IP 10.0.0.1. + ovn-nbctl acl-add ls1 to-lport 3 'ip4.src==10.0.0.1 || ip4.src==10.0.0.1' allow + ovn-nbctl acl-add ls1 to-lport 3 'ip4.src==10.0.0.1' allow +-- +2.29.2 + diff --git a/SOURCES/0001-ovn-ctl-Add-support-for-ovsdb-server-disable-file-co.patch b/SOURCES/0001-ovn-ctl-Add-support-for-ovsdb-server-disable-file-co.patch new file mode 100644 index 0000000..1b1f9c0 --- /dev/null +++ b/SOURCES/0001-ovn-ctl-Add-support-for-ovsdb-server-disable-file-co.patch @@ -0,0 +1,75 @@ +From 15eefe928ea2a51c7ad03356821f0665ca6abb6d Mon Sep 17 00:00:00 2001 +From: Ilya Maximets +Date: Thu, 21 Jan 2021 20:23:27 +0100 +Subject: [PATCH] ovn-ctl: Add support for ovsdb-server + --disable-file-column-diff. + +There is a change of a database file format in OVS version 2.15 that +doesn't allow older versions of ovsdb-server to read the database file +modified by the ovsdb-server version 2.15 or later. This also affects +runtime communications between servers in active-backup and cluster +service models. + +For the upgrade scenario OVS introduced special command line argument +for ovsdb-server: --disable-file-column-diff. +More datails in ovsdb(7) or here: + https://docs.openvswitch.org/en/latest/ref/ovsdb.7/#upgrading-from-version-2-14-and-earlier-to-2-15-and-later + +In order to support upgrades of OVN databases introducing new option +'--ovsdb-disable-file-column-diff' for ovn-ctl script that will pass +aforementioned argument to ovsdb-server processes. + +To simplify upgrades for users, ovn-ctl will add requested argument +to ovsdb-server only if ovsdb-server actually supports it. + +Signed-off-by: Ilya Maximets +Acked-by: Han Zhou +Signed-off-by: Numan Siddique + +(cherry-picked from upstream master commit 668b0d02aeff42d361bad36c2b247195c8d2c6f0) + +Change-Id: Id5a70bd76f24f13ab0357f8e3c40159f77bc3141 +--- + utilities/ovn-ctl | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/utilities/ovn-ctl b/utilities/ovn-ctl +index c44201ccf..211c764a6 100755 +--- a/utilities/ovn-ctl ++++ b/utilities/ovn-ctl +@@ -251,6 +251,11 @@ $cluster_remote_port + + [ "$OVN_USER" != "" ] && set "$@" --user "$OVN_USER" + ++ if test X"$OVSDB_DISABLE_FILE_COLUMN_DIFF" = Xyes; then ++ (ovsdb-server --help | grep -q disable-file-column-diff) \ ++ && set "$@" --disable-file-column-diff ++ fi ++ + if test X"$detach" != Xno; then + set "$@" --detach --monitor + else +@@ -715,6 +720,8 @@ set_defaults () { + OVSDB_NB_WRAPPER= + OVSDB_SB_WRAPPER= + ++ OVSDB_DISABLE_FILE_COLUMN_DIFF=no ++ + OVN_USER= + + OVN_CONTROLLER_LOG="-vconsole:emer -vsyslog:err -vfile:info" +@@ -932,6 +939,11 @@ Options: + --ovs-user="user[:group]" pass the --user flag to ovs daemons + --ovsdb-nb-wrapper=WRAPPER run with a wrapper like valgrind for debugging + --ovsdb-sb-wrapper=WRAPPER run with a wrapper like valgrind for debugging ++ --ovsdb-disable-file-column-diff=no|yes ++ Specifies whether or not ovsdb-server ++ processes should be started with ++ --disable-file-column-diff. ++ More details in ovsdb(7). (default: no) + -h, --help display this help message + + File location options: +-- +2.29.2 + diff --git a/SOURCES/0001-ovn-nbctl-add-bfd-option-to-lr-route-add.patch b/SOURCES/0001-ovn-nbctl-add-bfd-option-to-lr-route-add.patch new file mode 100644 index 0000000..2502d9f --- /dev/null +++ b/SOURCES/0001-ovn-nbctl-add-bfd-option-to-lr-route-add.patch @@ -0,0 +1,183 @@ +From 97b58dde0f92fc83165a6db816456073f5ddf727 Mon Sep 17 00:00:00 2001 +Message-Id: <97b58dde0f92fc83165a6db816456073f5ddf727.1612349784.git.lorenzo.bianconi@redhat.com> +From: Lorenzo Bianconi +Date: Fri, 29 Jan 2021 23:45:19 +0100 +Subject: [PATCH] ovn-nbctl: add --bfd option to lr-route-add + +Introduce the --bfd option to lr-route-add command. +If the BFD session UUID is provided, it will be used for the OVN route +otherwise the next-hop will be used to perform a lookup in the OVN BFD +table. +If the lookup fails and outport is specified, a new entry in the BFD table +will be created using the nexthop as dst_ip and outport as logical_port. + +Signed-off-by: Lorenzo Bianconi +Signed-off-by: Numan Siddique +--- + tests/ovn-northd.at | 17 ++++++++++---- + tests/system-ovn.at | 5 ++-- + utilities/ovn-nbctl.8.xml | 11 +++++++++ + utilities/ovn-nbctl.c | 49 ++++++++++++++++++++++++++++++++++++++- + 4 files changed, 73 insertions(+), 9 deletions(-) + +--- a/tests/ovn-northd.at ++++ b/tests/ovn-northd.at +@@ -2342,7 +2342,7 @@ AT_KEYWORDS([northd-bfd]) + ovn_start + + check ovn-nbctl --wait=sb lr-add r0 +-for i in $(seq 1 4); do ++for i in $(seq 1 5); do + check ovn-nbctl --wait=sb lrp-add r0 r0-sw$i 00:00:00:00:00:0$i 192.168.$i.1/24 + check ovn-nbctl --wait=sb ls-add sw$i + check ovn-nbctl --wait=sb lsp-add sw$i sw$i-r0 +@@ -2387,17 +2387,24 @@ check_column 1000 bfd min_tx logical_por + check_column 1000 bfd min_rx logical_port=r0-sw1 + check_column 100 bfd detect_mult logical_port=r0-sw1 + +-check ovn-nbctl lr-route-add r0 100.0.0.0/8 192.168.10.2 +-route_uuid=$(fetch_column nb:logical_router_static_route _uuid ip_prefix="100.0.0.0/8") +-check ovn-nbctl set logical_router_static_route $route_uuid bfd=$uuid ++check ovn-nbctl --bfd=$uuid lr-route-add r0 100.0.0.0/8 192.168.10.2 + check_column down bfd status logical_port=r0-sw1 + AT_CHECK([ovn-nbctl lr-route-list r0 | grep 192.168.10.2 | grep -q bfd],[0]) + ++check ovn-nbctl --bfd lr-route-add r0 200.0.0.0/8 192.168.20.2 ++check_column down bfd status logical_port=r0-sw2 ++AT_CHECK([ovn-nbctl lr-route-list r0 | grep 192.168.20.2 | grep -q bfd],[0]) ++ ++check ovn-nbctl --bfd lr-route-add r0 240.0.0.0/8 192.168.50.2 r0-sw5 ++check_column down bfd status logical_port=r0-sw5 ++AT_CHECK([ovn-nbctl lr-route-list r0 | grep 192.168.50.2 | grep -q bfd],[0]) ++ ++route_uuid=$(fetch_column nb:logical_router_static_route _uuid ip_prefix="100.0.0.0/8") + check ovn-nbctl clear logical_router_static_route $route_uuid bfd + check_column admin_down bfd status logical_port=r0-sw1 + + ovn-nbctl destroy bfd $uuid +-check_row_count bfd 2 ++check_row_count bfd 3 + + AT_CLEANUP + +--- a/tests/system-ovn.at ++++ b/tests/system-ovn.at +@@ -5606,10 +5606,9 @@ NS_CHECK_EXEC([server], [bfdd-control al + Allowing connections from 172.16.1.1 + ]) + +-uuid=$(ovn-nbctl create bfd logical_port=rp-public dst_ip=172.16.1.50 min_tx=250 min_rx=250 detect_mult=10) +-check ovn-nbctl lr-route-add R1 100.0.0.0/8 172.16.1.50 ++check ovn-nbctl --bfd lr-route-add R1 100.0.0.0/8 172.16.1.50 rp-public ++uuid=$(fetch_column nb:bfd _uuid logical_port="rp-public") + route_uuid=$(fetch_column nb:logical_router_static_route _uuid ip_prefix="100.0.0.0/8") +-check ovn-nbctl set logical_router_static_route $route_uuid bfd=$uuid + check ovn-nbctl --wait=hv sync + + wait_column "up" nb:bfd status logical_port=rp-public +--- a/utilities/ovn-nbctl.8.xml ++++ b/utilities/ovn-nbctl.8.xml +@@ -659,6 +659,7 @@ +
    +
    [--may-exist] [--policy=POLICY] + [--ecmp] [--ecmp-symmetric-reply] ++ [--bfd[=UUID]] + lr-route-add router + prefix nexthop [port]
    +
    +@@ -696,6 +697,16 @@ +

    + +

    ++ --bfd option is used to link a BFD session to the ++ OVN route. If the BFD session UUID is provided, it will be used ++ for the OVN route otherwise the next-hop will be used to perform ++ a lookup in the OVN BFD table. ++ If the lookup fails and port is specified, a new entry ++ in the BFD table will be created using the nexthop as ++ dst_ip and port as logical_port. ++

    ++ ++

    + It is an error if a route with prefix and + POLICY already exists, unless --may-exist, + --ecmp, or --ecmp-symmetric-reply is +--- a/utilities/ovn-nbctl.c ++++ b/utilities/ovn-nbctl.c +@@ -3957,6 +3957,29 @@ nbctl_lr_route_add(struct ctl_context *c + goto cleanup; + } + ++ struct shash_node *bfd = shash_find(&ctx->options, "--bfd"); ++ const struct nbrec_bfd *nb_bt = NULL; ++ if (bfd) { ++ if (bfd->data) { ++ struct uuid bfd_uuid; ++ if (uuid_from_string(&bfd_uuid, bfd->data)) { ++ nb_bt = nbrec_bfd_get_for_uuid(ctx->idl, &bfd_uuid); ++ } ++ if (!nb_bt) { ++ ctl_error(ctx, "no entry found in the BFD table"); ++ goto cleanup; ++ } ++ } else { ++ const struct nbrec_bfd *iter; ++ NBREC_BFD_FOR_EACH (iter, ctx->idl) { ++ if (!strcmp(iter->dst_ip, next_hop)) { ++ nb_bt = iter; ++ break; ++ } ++ } ++ } ++ } ++ + bool may_exist = shash_find(&ctx->options, "--may-exist") != NULL; + bool ecmp_symmetric_reply = shash_find(&ctx->options, + "--ecmp-symmetric-reply") != NULL; +@@ -4011,6 +4034,18 @@ nbctl_lr_route_add(struct ctl_context *c + if (policy) { + nbrec_logical_router_static_route_set_policy(route, policy); + } ++ if (bfd) { ++ if (!nb_bt) { ++ if (ctx->argc != 5) { ++ ctl_error(ctx, "insert entry in the BFD table failed"); ++ goto cleanup; ++ } ++ nb_bt = nbrec_bfd_insert(ctx->txn); ++ nbrec_bfd_set_dst_ip(nb_bt, next_hop); ++ nbrec_bfd_set_logical_port(nb_bt, ctx->argv[4]); ++ } ++ nbrec_logical_router_static_route_set_bfd(route, nb_bt); ++ } + free(rt_prefix); + goto cleanup; + } +@@ -4035,6 +4070,18 @@ nbctl_lr_route_add(struct ctl_context *c + } + + nbrec_logical_router_update_static_routes_addvalue(lr, route); ++ if (bfd) { ++ if (!nb_bt) { ++ if (ctx->argc != 5) { ++ ctl_error(ctx, "insert entry in the BFD table failed"); ++ goto cleanup; ++ } ++ nb_bt = nbrec_bfd_insert(ctx->txn); ++ nbrec_bfd_set_dst_ip(nb_bt, next_hop); ++ nbrec_bfd_set_logical_port(nb_bt, ctx->argv[4]); ++ } ++ nbrec_logical_router_static_route_set_bfd(route, nb_bt); ++ } + + cleanup: + free(next_hop); +@@ -6548,7 +6595,7 @@ static const struct ctl_command_syntax n + /* logical router route commands. */ + { "lr-route-add", 3, 4, "ROUTER PREFIX NEXTHOP [PORT]", NULL, + nbctl_lr_route_add, NULL, "--may-exist,--ecmp,--ecmp-symmetric-reply," +- "--policy=", RW }, ++ "--policy=,--bfd?", RW }, + { "lr-route-del", 1, 4, "ROUTER [PREFIX [NEXTHOP [PORT]]]", NULL, + nbctl_lr_route_del, NULL, "--if-exists,--policy=", RW }, + { "lr-route-list", 1, 1, "ROUTER", NULL, nbctl_lr_route_list, NULL, diff --git a/SOURCES/0001-ovn-nbctl-add-bfd-report-to-lr-route-list-command.patch b/SOURCES/0001-ovn-nbctl-add-bfd-report-to-lr-route-list-command.patch new file mode 100644 index 0000000..c46782d --- /dev/null +++ b/SOURCES/0001-ovn-nbctl-add-bfd-report-to-lr-route-list-command.patch @@ -0,0 +1,46 @@ +From 8770192b3b4732e02679f723ea5903a515c6bd8a Mon Sep 17 00:00:00 2001 +Message-Id: <8770192b3b4732e02679f723ea5903a515c6bd8a.1611833004.git.lorenzo.bianconi@redhat.com> +From: Lorenzo Bianconi +Date: Fri, 15 Jan 2021 00:00:24 +0100 +Subject: [PATCH 1/2] ovn-nbctl: add bfd report to lr-route-list command + +Introduce bfd info to lr-route-list command + +Signed-off-by: Lorenzo Bianconi +Signed-off-by: Numan Siddique +--- + tests/ovn-northd.at | 1 + + utilities/ovn-nbctl.c | 5 +++++ + 2 files changed, 6 insertions(+) + +diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at +index d22cad863..8597ca1b9 100644 +--- a/tests/ovn-northd.at ++++ b/tests/ovn-northd.at +@@ -2391,6 +2391,7 @@ check ovn-nbctl lr-route-add r0 100.0.0.0/8 192.168.10.2 + route_uuid=$(fetch_column nb:logical_router_static_route _uuid ip_prefix="100.0.0.0/8") + check ovn-nbctl set logical_router_static_route $route_uuid bfd=$uuid + check_column down bfd status logical_port=r0-sw1 ++AT_CHECK([ovn-nbctl lr-route-list r0 | grep 192.168.10.2 | grep -q bfd],[0]) + + check ovn-nbctl clear logical_router_static_route $route_uuid bfd + check_column admin_down bfd status logical_port=r0-sw1 +diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c +index 94e7eedeb..788b1972e 100644 +--- a/utilities/ovn-nbctl.c ++++ b/utilities/ovn-nbctl.c +@@ -5502,6 +5502,11 @@ print_route(const struct nbrec_logical_router_static_route *route, struct ds *s) + if (smap_get(&route->external_ids, "ic-learned-route")) { + ds_put_format(s, " (learned)"); + } ++ ++ if (route->bfd) { ++ ds_put_cstr(s, " bfd"); ++ } ++ + ds_put_char(s, '\n'); + } + +-- +2.29.2 + diff --git a/SOURCES/0001-ovn-northd-Move-lswitch-ARP-ND-Responder-to-function.patch b/SOURCES/0001-ovn-northd-Move-lswitch-ARP-ND-Responder-to-function.patch new file mode 100644 index 0000000..667ff72 --- /dev/null +++ b/SOURCES/0001-ovn-northd-Move-lswitch-ARP-ND-Responder-to-function.patch @@ -0,0 +1,575 @@ +From f21c1b7a467a691847b5552d4570af706fcc5bb0 Mon Sep 17 00:00:00 2001 +Message-Id: +From: Anton Ivanov +Date: Tue, 5 Jan 2021 17:49:28 +0000 +Subject: [PATCH 01/16] ovn-northd: Move lswitch ARP/ND Responder to functions. + +Move arp/nd responder lflow processing to per-iterable functions. + +Signed-off-by: Anton Ivanov +Signed-off-by: Numan Siddique +Signed-off-by: Lorenzo Bianconi +--- + northd/ovn-northd.c | 496 +++++++++++++++++++++++--------------------- + 1 file changed, 260 insertions(+), 236 deletions(-) + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index b377dffa1..d17cc55ac 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -6770,7 +6770,7 @@ is_vlan_transparent(const struct ovn_datapath *od) + static void + build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, + struct hmap *lflows, struct hmap *mcgroups, +- struct hmap *igmp_groups, struct hmap *lbs) ++ struct hmap *igmp_groups) + { + /* This flow table structure is documented in ovn-northd(8), so please + * update ovn-northd.8.xml if you change anything. */ +@@ -6778,240 +6778,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, + struct ds match = DS_EMPTY_INITIALIZER; + struct ds actions = DS_EMPTY_INITIALIZER; + struct ovn_datapath *od; +- +- /* Ingress table 13: ARP/ND responder, skip requests coming from localnet +- * and vtep ports. (priority 100); see ovn-northd.8.xml for the +- * rationale. */ + struct ovn_port *op; +- HMAP_FOR_EACH (op, key_node, ports) { +- if (!op->nbsp) { +- continue; +- } +- +- if ((!strcmp(op->nbsp->type, "localnet")) || +- (!strcmp(op->nbsp->type, "vtep"))) { +- ds_clear(&match); +- ds_put_format(&match, "inport == %s", op->json_key); +- ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, +- 100, ds_cstr(&match), "next;", +- &op->nbsp->header_); +- } +- } +- +- /* Ingress table 13: ARP/ND responder, reply for known IPs. +- * (priority 50). */ +- HMAP_FOR_EACH (op, key_node, ports) { +- if (!op->nbsp) { +- continue; +- } +- +- if (!strcmp(op->nbsp->type, "virtual")) { +- /* Handle +- * - GARPs for virtual ip which belongs to a logical port +- * of type 'virtual' and bind that port. +- * +- * - ARP reply from the virtual ip which belongs to a logical +- * port of type 'virtual' and bind that port. +- * */ +- ovs_be32 ip; +- const char *virtual_ip = smap_get(&op->nbsp->options, +- "virtual-ip"); +- const char *virtual_parents = smap_get(&op->nbsp->options, +- "virtual-parents"); +- if (!virtual_ip || !virtual_parents || +- !ip_parse(virtual_ip, &ip)) { +- continue; +- } +- +- char *tokstr = xstrdup(virtual_parents); +- char *save_ptr = NULL; +- char *vparent; +- for (vparent = strtok_r(tokstr, ",", &save_ptr); vparent != NULL; +- vparent = strtok_r(NULL, ",", &save_ptr)) { +- struct ovn_port *vp = ovn_port_find(ports, vparent); +- if (!vp || vp->od != op->od) { +- /* vparent name should be valid and it should belong +- * to the same logical switch. */ +- continue; +- } +- +- ds_clear(&match); +- ds_put_format(&match, "inport == \"%s\" && " +- "((arp.op == 1 && arp.spa == %s && " +- "arp.tpa == %s) || (arp.op == 2 && " +- "arp.spa == %s))", +- vparent, virtual_ip, virtual_ip, +- virtual_ip); +- ds_clear(&actions); +- ds_put_format(&actions, +- "bind_vport(%s, inport); " +- "next;", +- op->json_key); +- ovn_lflow_add_with_hint(lflows, op->od, +- S_SWITCH_IN_ARP_ND_RSP, 100, +- ds_cstr(&match), ds_cstr(&actions), +- &vp->nbsp->header_); +- } +- +- free(tokstr); +- } else { +- /* +- * Add ARP/ND reply flows if either the +- * - port is up and it doesn't have 'unknown' address defined or +- * - port type is router or +- * - port type is localport +- */ +- if (check_lsp_is_up && +- !lsp_is_up(op->nbsp) && !lsp_is_router(op->nbsp) && +- strcmp(op->nbsp->type, "localport")) { +- continue; +- } +- +- if (lsp_is_external(op->nbsp) || op->has_unknown) { +- continue; +- } +- +- for (size_t i = 0; i < op->n_lsp_addrs; i++) { +- for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { +- ds_clear(&match); +- ds_put_format(&match, "arp.tpa == %s && arp.op == 1", +- op->lsp_addrs[i].ipv4_addrs[j].addr_s); +- ds_clear(&actions); +- ds_put_format(&actions, +- "eth.dst = eth.src; " +- "eth.src = %s; " +- "arp.op = 2; /* ARP reply */ " +- "arp.tha = arp.sha; " +- "arp.sha = %s; " +- "arp.tpa = arp.spa; " +- "arp.spa = %s; " +- "outport = inport; " +- "flags.loopback = 1; " +- "output;", +- op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s, +- op->lsp_addrs[i].ipv4_addrs[j].addr_s); +- ovn_lflow_add_with_hint(lflows, op->od, +- S_SWITCH_IN_ARP_ND_RSP, 50, +- ds_cstr(&match), +- ds_cstr(&actions), +- &op->nbsp->header_); +- +- /* Do not reply to an ARP request from the port that owns +- * the address (otherwise a DHCP client that ARPs to check +- * for a duplicate address will fail). Instead, forward +- * it the usual way. +- * +- * (Another alternative would be to simply drop the packet. +- * If everything is working as it is configured, then this +- * would produce equivalent results, since no one should +- * reply to the request. But ARPing for one's own IP +- * address is intended to detect situations where the +- * network is not working as configured, so dropping the +- * request would frustrate that intent.) */ +- ds_put_format(&match, " && inport == %s", op->json_key); +- ovn_lflow_add_with_hint(lflows, op->od, +- S_SWITCH_IN_ARP_ND_RSP, 100, +- ds_cstr(&match), "next;", +- &op->nbsp->header_); +- } +- +- /* For ND solicitations, we need to listen for both the +- * unicast IPv6 address and its all-nodes multicast address, +- * but always respond with the unicast IPv6 address. */ +- for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { +- ds_clear(&match); +- ds_put_format(&match, +- "nd_ns && ip6.dst == {%s, %s} && nd.target == %s", +- op->lsp_addrs[i].ipv6_addrs[j].addr_s, +- op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s, +- op->lsp_addrs[i].ipv6_addrs[j].addr_s); +- +- ds_clear(&actions); +- ds_put_format(&actions, +- "%s { " +- "eth.src = %s; " +- "ip6.src = %s; " +- "nd.target = %s; " +- "nd.tll = %s; " +- "outport = inport; " +- "flags.loopback = 1; " +- "output; " +- "};", +- lsp_is_router(op->nbsp) ? "nd_na_router" : "nd_na", +- op->lsp_addrs[i].ea_s, +- op->lsp_addrs[i].ipv6_addrs[j].addr_s, +- op->lsp_addrs[i].ipv6_addrs[j].addr_s, +- op->lsp_addrs[i].ea_s); +- ovn_lflow_add_with_hint(lflows, op->od, +- S_SWITCH_IN_ARP_ND_RSP, 50, +- ds_cstr(&match), +- ds_cstr(&actions), +- &op->nbsp->header_); +- +- /* Do not reply to a solicitation from the port that owns +- * the address (otherwise DAD detection will fail). */ +- ds_put_format(&match, " && inport == %s", op->json_key); +- ovn_lflow_add_with_hint(lflows, op->od, +- S_SWITCH_IN_ARP_ND_RSP, 100, +- ds_cstr(&match), "next;", +- &op->nbsp->header_); +- } +- } +- } +- } +- +- /* Ingress table 13: ARP/ND responder, by default goto next. +- * (priority 0)*/ +- HMAP_FOR_EACH (od, key_node, datapaths) { +- if (!od->nbs) { +- continue; +- } +- +- ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;"); +- } +- +- /* Ingress table 13: ARP/ND responder for service monitor source ip. +- * (priority 110)*/ +- struct ovn_northd_lb *lb; +- HMAP_FOR_EACH (lb, hmap_node, lbs) { +- for (size_t i = 0; i < lb->n_vips; i++) { +- struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[i]; +- if (!lb_vip_nb->lb_health_check) { +- continue; +- } +- +- for (size_t j = 0; j < lb_vip_nb->n_backends; j++) { +- struct ovn_northd_lb_backend *backend_nb = +- &lb_vip_nb->backends_nb[j]; +- if (!backend_nb->op || !backend_nb->svc_mon_src_ip) { +- continue; +- } +- +- ds_clear(&match); +- ds_put_format(&match, "arp.tpa == %s && arp.op == 1", +- backend_nb->svc_mon_src_ip); +- ds_clear(&actions); +- ds_put_format(&actions, +- "eth.dst = eth.src; " +- "eth.src = %s; " +- "arp.op = 2; /* ARP reply */ " +- "arp.tha = arp.sha; " +- "arp.sha = %s; " +- "arp.tpa = arp.spa; " +- "arp.spa = %s; " +- "outport = inport; " +- "flags.loopback = 1; " +- "output;", +- svc_monitor_mac, svc_monitor_mac, +- backend_nb->svc_mon_src_ip); +- ovn_lflow_add_with_hint(lflows, +- backend_nb->op->od, +- S_SWITCH_IN_ARP_ND_RSP, 110, +- ds_cstr(&match), ds_cstr(&actions), +- &lb->nlb->header_); +- } +- } +- } + + + /* Logical switch ingress table 14 and 15: DHCP options and response +@@ -7471,6 +7238,251 @@ build_lswitch_lflows_admission_control(struct ovn_datapath *od, + } + } + ++/* Ingress table 13: ARP/ND responder, skip requests coming from localnet ++ * and vtep ports. (priority 100); see ovn-northd.8.xml for the ++ * rationale. */ ++ ++static void ++build_lswitch_arp_nd_responder_skip_local(struct ovn_port *op, ++ struct hmap *lflows, ++ struct ds *match) ++{ ++ if (op->nbsp) { ++ if ((!strcmp(op->nbsp->type, "localnet")) || ++ (!strcmp(op->nbsp->type, "vtep"))) { ++ ds_clear(match); ++ ds_put_format(match, "inport == %s", op->json_key); ++ ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, ++ 100, ds_cstr(match), "next;", ++ &op->nbsp->header_); ++ } ++ } ++} ++ ++/* Ingress table 13: ARP/ND responder, reply for known IPs. ++ * (priority 50). */ ++static void ++build_lswitch_arp_nd_responder_known_ips(struct ovn_port *op, ++ struct hmap *lflows, ++ struct hmap *ports, ++ struct ds *actions, ++ struct ds *match) ++{ ++ if (op->nbsp) { ++ if (!strcmp(op->nbsp->type, "virtual")) { ++ /* Handle ++ * - GARPs for virtual ip which belongs to a logical port ++ * of type 'virtual' and bind that port. ++ * ++ * - ARP reply from the virtual ip which belongs to a logical ++ * port of type 'virtual' and bind that port. ++ * */ ++ ovs_be32 ip; ++ const char *virtual_ip = smap_get(&op->nbsp->options, ++ "virtual-ip"); ++ const char *virtual_parents = smap_get(&op->nbsp->options, ++ "virtual-parents"); ++ if (!virtual_ip || !virtual_parents || ++ !ip_parse(virtual_ip, &ip)) { ++ return; ++ } ++ ++ char *tokstr = xstrdup(virtual_parents); ++ char *save_ptr = NULL; ++ char *vparent; ++ for (vparent = strtok_r(tokstr, ",", &save_ptr); vparent != NULL; ++ vparent = strtok_r(NULL, ",", &save_ptr)) { ++ struct ovn_port *vp = ovn_port_find(ports, vparent); ++ if (!vp || vp->od != op->od) { ++ /* vparent name should be valid and it should belong ++ * to the same logical switch. */ ++ continue; ++ } ++ ++ ds_clear(match); ++ ds_put_format(match, "inport == \"%s\" && " ++ "((arp.op == 1 && arp.spa == %s && " ++ "arp.tpa == %s) || (arp.op == 2 && " ++ "arp.spa == %s))", ++ vparent, virtual_ip, virtual_ip, ++ virtual_ip); ++ ds_clear(actions); ++ ds_put_format(actions, ++ "bind_vport(%s, inport); " ++ "next;", ++ op->json_key); ++ ovn_lflow_add_with_hint(lflows, op->od, ++ S_SWITCH_IN_ARP_ND_RSP, 100, ++ ds_cstr(match), ds_cstr(actions), ++ &vp->nbsp->header_); ++ } ++ ++ free(tokstr); ++ } else { ++ /* ++ * Add ARP/ND reply flows if either the ++ * - port is up and it doesn't have 'unknown' address defined or ++ * - port type is router or ++ * - port type is localport ++ */ ++ if (check_lsp_is_up && ++ !lsp_is_up(op->nbsp) && !lsp_is_router(op->nbsp) && ++ strcmp(op->nbsp->type, "localport")) { ++ return; ++ } ++ ++ if (lsp_is_external(op->nbsp) || op->has_unknown) { ++ return; ++ } ++ ++ for (size_t i = 0; i < op->n_lsp_addrs; i++) { ++ for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { ++ ds_clear(match); ++ ds_put_format(match, "arp.tpa == %s && arp.op == 1", ++ op->lsp_addrs[i].ipv4_addrs[j].addr_s); ++ ds_clear(actions); ++ ds_put_format(actions, ++ "eth.dst = eth.src; " ++ "eth.src = %s; " ++ "arp.op = 2; /* ARP reply */ " ++ "arp.tha = arp.sha; " ++ "arp.sha = %s; " ++ "arp.tpa = arp.spa; " ++ "arp.spa = %s; " ++ "outport = inport; " ++ "flags.loopback = 1; " ++ "output;", ++ op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s, ++ op->lsp_addrs[i].ipv4_addrs[j].addr_s); ++ ovn_lflow_add_with_hint(lflows, op->od, ++ S_SWITCH_IN_ARP_ND_RSP, 50, ++ ds_cstr(match), ++ ds_cstr(actions), ++ &op->nbsp->header_); ++ ++ /* Do not reply to an ARP request from the port that owns ++ * the address (otherwise a DHCP client that ARPs to check ++ * for a duplicate address will fail). Instead, forward ++ * it the usual way. ++ * ++ * (Another alternative would be to simply drop the packet. ++ * If everything is working as it is configured, then this ++ * would produce equivalent results, since no one should ++ * reply to the request. But ARPing for one's own IP ++ * address is intended to detect situations where the ++ * network is not working as configured, so dropping the ++ * request would frustrate that intent.) */ ++ ds_put_format(match, " && inport == %s", op->json_key); ++ ovn_lflow_add_with_hint(lflows, op->od, ++ S_SWITCH_IN_ARP_ND_RSP, 100, ++ ds_cstr(match), "next;", ++ &op->nbsp->header_); ++ } ++ ++ /* For ND solicitations, we need to listen for both the ++ * unicast IPv6 address and its all-nodes multicast address, ++ * but always respond with the unicast IPv6 address. */ ++ for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { ++ ds_clear(match); ++ ds_put_format(match, ++ "nd_ns && ip6.dst == {%s, %s} && nd.target == %s", ++ op->lsp_addrs[i].ipv6_addrs[j].addr_s, ++ op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s, ++ op->lsp_addrs[i].ipv6_addrs[j].addr_s); ++ ++ ds_clear(actions); ++ ds_put_format(actions, ++ "%s { " ++ "eth.src = %s; " ++ "ip6.src = %s; " ++ "nd.target = %s; " ++ "nd.tll = %s; " ++ "outport = inport; " ++ "flags.loopback = 1; " ++ "output; " ++ "};", ++ lsp_is_router(op->nbsp) ? "nd_na_router" : "nd_na", ++ op->lsp_addrs[i].ea_s, ++ op->lsp_addrs[i].ipv6_addrs[j].addr_s, ++ op->lsp_addrs[i].ipv6_addrs[j].addr_s, ++ op->lsp_addrs[i].ea_s); ++ ovn_lflow_add_with_hint(lflows, op->od, ++ S_SWITCH_IN_ARP_ND_RSP, 50, ++ ds_cstr(match), ++ ds_cstr(actions), ++ &op->nbsp->header_); ++ ++ /* Do not reply to a solicitation from the port that owns ++ * the address (otherwise DAD detection will fail). */ ++ ds_put_format(match, " && inport == %s", op->json_key); ++ ovn_lflow_add_with_hint(lflows, op->od, ++ S_SWITCH_IN_ARP_ND_RSP, 100, ++ ds_cstr(match), "next;", ++ &op->nbsp->header_); ++ } ++ } ++ } ++ } ++} ++ ++/* Ingress table 13: ARP/ND responder, by default goto next. ++ * (priority 0)*/ ++static void ++build_lswitch_arp_nd_responder_default(struct ovn_datapath *od, ++ struct hmap *lflows) ++{ ++ if (od->nbs) { ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;"); ++ } ++} ++ ++/* Ingress table 13: ARP/ND responder for service monitor source ip. ++ * (priority 110)*/ ++static void ++build_lswitch_arp_nd_service_monitor(struct ovn_northd_lb *lb, ++ struct hmap *lflows, ++ struct ds *actions, ++ struct ds *match) ++{ ++ for (size_t i = 0; i < lb->n_vips; i++) { ++ struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[i]; ++ if (!lb_vip_nb->lb_health_check) { ++ continue; ++ } ++ ++ for (size_t j = 0; j < lb_vip_nb->n_backends; j++) { ++ struct ovn_northd_lb_backend *backend_nb = ++ &lb_vip_nb->backends_nb[j]; ++ if (!backend_nb->op || !backend_nb->svc_mon_src_ip) { ++ continue; ++ } ++ ++ ds_clear(match); ++ ds_put_format(match, "arp.tpa == %s && arp.op == 1", ++ backend_nb->svc_mon_src_ip); ++ ds_clear(actions); ++ ds_put_format(actions, ++ "eth.dst = eth.src; " ++ "eth.src = %s; " ++ "arp.op = 2; /* ARP reply */ " ++ "arp.tha = arp.sha; " ++ "arp.sha = %s; " ++ "arp.tpa = arp.spa; " ++ "arp.spa = %s; " ++ "outport = inport; " ++ "flags.loopback = 1; " ++ "output;", ++ svc_monitor_mac, svc_monitor_mac, ++ backend_nb->svc_mon_src_ip); ++ ovn_lflow_add_with_hint(lflows, ++ backend_nb->op->od, ++ S_SWITCH_IN_ARP_ND_RSP, 110, ++ ds_cstr(match), ds_cstr(actions), ++ &lb->nlb->header_); ++ } ++ } ++} ++ + + /* Returns a string of the IP address of the router port 'op' that + * overlaps with 'ip_s". If one is not found, returns NULL. +@@ -11322,6 +11334,7 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od, + build_fwd_group_lflows(od, lsi->lflows); + build_lswitch_lflows_admission_control(od, lsi->lflows); + build_lswitch_input_port_sec_od(od, lsi->lflows); ++ build_lswitch_arp_nd_responder_default(od, lsi->lflows); + + /* Build Logical Router Flows. */ + build_adm_ctrl_flows_for_lrouter(od, lsi->lflows); +@@ -11352,7 +11365,12 @@ build_lswitch_and_lrouter_iterate_by_op(struct ovn_port *op, + /* Build Logical Switch Flows. */ + build_lswitch_input_port_sec_op(op, lsi->lflows, &lsi->actions, + &lsi->match); +- ++ build_lswitch_arp_nd_responder_skip_local(op, lsi->lflows, ++ &lsi->match); ++ build_lswitch_arp_nd_responder_known_ips(op, lsi->lflows, ++ lsi->ports, ++ &lsi->actions, ++ &lsi->match); + /* Build Logical Router Flows. */ + build_adm_ctrl_flows_for_lrouter_port(op, lsi->lflows, &lsi->match, + &lsi->actions); +@@ -11379,6 +11397,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + { + struct ovn_datapath *od; + struct ovn_port *op; ++ struct ovn_northd_lb *lb; + + char *svc_check_match = xasprintf("eth.dst == %s", svc_monitor_mac); + +@@ -11405,6 +11424,11 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + HMAP_FOR_EACH (op, key_node, ports) { + build_lswitch_and_lrouter_iterate_by_op(op, &lsi); + } ++ HMAP_FOR_EACH (lb, hmap_node, lbs) { ++ build_lswitch_arp_nd_service_monitor(lb, lsi.lflows, ++ &lsi.actions, ++ &lsi.match); ++ } + free(svc_check_match); + + ds_destroy(&lsi.match); +@@ -11412,7 +11436,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + + /* Legacy lswitch build - to be migrated. */ + build_lswitch_flows(datapaths, ports, lflows, mcgroups, +- igmp_groups, lbs); ++ igmp_groups); + + /* Legacy lrouter build - to be migrated. */ + build_lrouter_flows(datapaths, ports, lflows, meter_groups, lbs); +-- +2.29.2 + diff --git a/SOURCES/0001-ovn-trace-fix-trigger_event-warning.patch b/SOURCES/0001-ovn-trace-fix-trigger_event-warning.patch new file mode 100644 index 0000000..73b57f5 --- /dev/null +++ b/SOURCES/0001-ovn-trace-fix-trigger_event-warning.patch @@ -0,0 +1,67 @@ +From 19fc17eee44cb899e62292a8f6ff4cfe98815ea6 Mon Sep 17 00:00:00 2001 +Message-Id: <19fc17eee44cb899e62292a8f6ff4cfe98815ea6.1610114400.git.lorenzo.bianconi@redhat.com> +From: Lorenzo Bianconi +Date: Tue, 5 Jan 2021 15:37:37 +0100 +Subject: [PATCH] ovn-trace: fix trigger_event warning. + +Fix the following ovn-trace warning triggered by controller_event: + +00001|ovntrace|WARN|trigger_event(event = "empty_lb_backends", meter = "", + vip = "192.168.0.100:80", protocol = "tcp", + load_balancer = "2c5462a7-b6ca-4b02-86c9-b9aa98a570e8"); +parsing actions failed (Syntax error a t `vip' expecting empty_lb_backends option name.) + +The issue can be triggered running the following reproducer in ovn-sanbox: + +$./ovn-setup.sh +$ovn-nbctl lb-add lb0 192.168.0.100:80 "" +$ovn-nbctl ls-lb-add sw0 lb0 +$ovn-nbctl --wait=hv set NB_Global . options:controller_event=true +$ovn-trace sw0 'inport == "sw0-port1" && eth.src == 50:54:00:00:00:01 && ip4.src==192.168.0.2 && eth.dst == 00:00:00:00:ff:01 && ip4.dst==192.168.0.100 && tcp && tcp.dst==80' + +Acked-by: Dumitru Ceara +Signed-off-by: Lorenzo Bianconi +Signed-off-by: Numan Siddique +--- + tests/ovn.at | 2 ++ + utilities/ovn-trace.c | 4 ++++ + 2 files changed, 6 insertions(+) + +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -16889,6 +16889,8 @@ AT_CHECK_UNQUOTED([ovn-sbctl get control + "$uuid_lb2" + ]) + ++AT_CHECK_UNQUOTED([ovn-trace sw0 'inport == "sw0-p11" && eth.src == 00:00:00:00:00:11 && ip4.dst == 192.168.1.100 && tcp && tcp.dst == 80' | grep -q 'event = "empty_lb_backends"'], [0]) ++ + OVN_CLEANUP([hv1], [hv2]) + AT_CLEANUP + +--- a/utilities/ovn-trace.c ++++ b/utilities/ovn-trace.c +@@ -478,6 +478,7 @@ static struct shash port_groups; + static struct hmap dhcp_opts; /* Contains "struct gen_opts_map"s. */ + static struct hmap dhcpv6_opts; /* Contains "struct gen_opts_map"s. */ + static struct hmap nd_ra_opts; /* Contains "struct gen_opts_map"s. */ ++static struct controller_event_options event_opts; + + static struct ovntrace_datapath * + ovntrace_datapath_find_by_sb_uuid(const struct uuid *sb_uuid) +@@ -901,6 +902,7 @@ parse_lflow_for_datapath(const struct sb + .dhcp_opts = &dhcp_opts, + .dhcpv6_opts = &dhcpv6_opts, + .nd_ra_opts = &nd_ra_opts, ++ .controller_event_opts = &event_opts, + .pipeline = (!strcmp(sblf->pipeline, "ingress") + ? OVNACT_P_INGRESS + : OVNACT_P_EGRESS), +@@ -1006,6 +1008,8 @@ read_gen_opts(void) + + hmap_init(&nd_ra_opts); + nd_ra_opts_init(&nd_ra_opts); ++ ++ controller_event_opts_init(&event_opts); + } + + static void diff --git a/SOURCES/0002-binding-Always-delete-child-port-bindings-first.patch b/SOURCES/0002-binding-Always-delete-child-port-bindings-first.patch new file mode 100644 index 0000000..b46be2e --- /dev/null +++ b/SOURCES/0002-binding-Always-delete-child-port-bindings-first.patch @@ -0,0 +1,154 @@ +From 47afa0664d6a41d0a75a65f0ba927974d957cb62 Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Mon, 18 Jan 2021 17:50:33 +0100 +Subject: [PATCH 2/2] binding: Always delete child port bindings first. + +When incrementally processing changes, child Port Bindings (container +and virtual ports) must be deleted first, before their parents, because +they need to be removed from their parent's children hash to avoid +parents with children with NULL port bindings. + +Signed-off-by: Dumitru Ceara +Signed-off-by: Numan Siddique +(cherry picked from master commit de8030e6abc7b51dd2ee48bbe2c76592ef8b064c) + +Change-Id: I382f463b757df1ff0f3b5b0e6ec2050d4e7811ed +--- + controller/binding.c | 75 +++++++++++++++++++++++++++++++++++++++++++++------- + tests/ovn.at | 18 +++++++++++++ + 2 files changed, 83 insertions(+), 10 deletions(-) + +diff --git a/controller/binding.c b/controller/binding.c +index 3512a1d..c8e8591 100644 +--- a/controller/binding.c ++++ b/controller/binding.c +@@ -2147,13 +2147,26 @@ bool + binding_handle_port_binding_changes(struct binding_ctx_in *b_ctx_in, + struct binding_ctx_out *b_ctx_out) + { +- bool handled = true; ++ /* Run the tracked port binding loop twice to ensure correctness: ++ * 1. First to handle deleted changes. This is split in four sub-parts ++ * because child local bindings must be cleaned up first: ++ * a. Container ports first. ++ * b. Then virtual ports. ++ * c. Then regular VIFs. ++ * d. Last other ports. ++ * 2. Second to handle add/update changes. ++ */ ++ struct shash deleted_container_pbs = ++ SHASH_INITIALIZER(&deleted_container_pbs); ++ struct shash deleted_virtual_pbs = ++ SHASH_INITIALIZER(&deleted_virtual_pbs); ++ struct shash deleted_vif_pbs = ++ SHASH_INITIALIZER(&deleted_vif_pbs); ++ struct shash deleted_other_pbs = ++ SHASH_INITIALIZER(&deleted_other_pbs); + const struct sbrec_port_binding *pb; ++ bool handled = true; + +- /* Run the tracked port binding loop twice. One to handle deleted +- * changes. And another to handle add/update changes. +- * This will ensure correctness. +- */ + SBREC_PORT_BINDING_TABLE_FOR_EACH_TRACKED (pb, + b_ctx_in->port_binding_table) { + if (!sbrec_port_binding_is_deleted(pb)) { +@@ -2161,18 +2174,60 @@ binding_handle_port_binding_changes(struct binding_ctx_in *b_ctx_in, + } + + enum en_lport_type lport_type = get_lport_type(pb); +- if (lport_type == LP_VIF || lport_type == LP_VIRTUAL) { +- handled = handle_deleted_vif_lport(pb, lport_type, b_ctx_in, +- b_ctx_out); ++ ++ if (lport_type == LP_VIF) { ++ if (is_lport_container(pb)) { ++ shash_add(&deleted_container_pbs, pb->logical_port, pb); ++ } else { ++ shash_add(&deleted_vif_pbs, pb->logical_port, pb); ++ } ++ } else if (lport_type == LP_VIRTUAL) { ++ shash_add(&deleted_virtual_pbs, pb->logical_port, pb); + } else { +- handle_deleted_lport(pb, b_ctx_in, b_ctx_out); ++ shash_add(&deleted_other_pbs, pb->logical_port, pb); ++ } ++ } ++ ++ struct shash_node *node; ++ struct shash_node *node_next; ++ SHASH_FOR_EACH_SAFE (node, node_next, &deleted_container_pbs) { ++ handled = handle_deleted_vif_lport(node->data, LP_VIF, b_ctx_in, ++ b_ctx_out); ++ shash_delete(&deleted_container_pbs, node); ++ if (!handled) { ++ goto delete_done; + } ++ } + ++ SHASH_FOR_EACH_SAFE (node, node_next, &deleted_virtual_pbs) { ++ handled = handle_deleted_vif_lport(node->data, LP_VIRTUAL, b_ctx_in, ++ b_ctx_out); ++ shash_delete(&deleted_virtual_pbs, node); + if (!handled) { +- break; ++ goto delete_done; ++ } ++ } ++ ++ SHASH_FOR_EACH_SAFE (node, node_next, &deleted_vif_pbs) { ++ handled = handle_deleted_vif_lport(node->data, LP_VIF, b_ctx_in, ++ b_ctx_out); ++ shash_delete(&deleted_vif_pbs, node); ++ if (!handled) { ++ goto delete_done; + } + } + ++ SHASH_FOR_EACH_SAFE (node, node_next, &deleted_other_pbs) { ++ handle_deleted_lport(node->data, b_ctx_in, b_ctx_out); ++ shash_delete(&deleted_other_pbs, node); ++ } ++ ++delete_done: ++ shash_destroy(&deleted_container_pbs); ++ shash_destroy(&deleted_virtual_pbs); ++ shash_destroy(&deleted_vif_pbs); ++ shash_destroy(&deleted_other_pbs); ++ + if (!handled) { + return false; + } +diff --git a/tests/ovn.at b/tests/ovn.at +index 2cdc036..e2d2d8a 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -22194,6 +22194,24 @@ check ovn-nbctl lsp-add ls2 lsp-cont1 lsp1 1 + check ovn-nbctl --wait=hv sync + check_row_count Port_Binding 1 logical_port=lsp-cont1 chassis=$ch + ++AS_BOX([delete both OVN VIF and OVN container port]) ++as hv1 ovn-appctl -t ovn-controller debug/pause ++check ovn-nbctl lsp-del lsp1 \ ++ -- lsp-del lsp-cont1 ++check ovn-nbctl --wait=sb sync ++as hv1 ovn-appctl -t ovn-controller debug/resume ++ ++AS_BOX([readd both OVN VIF and OVN container port]) ++as hv1 ovn-appctl -t ovn-controller debug/pause ++check ovn-nbctl lsp-add ls1 lsp1 \ ++ -- lsp-add ls2 lsp-cont1 lsp1 1 ++check ovn-nbctl --wait=sb sync ++as hv1 ovn-appctl -t ovn-controller debug/resume ++ ++check ovn-nbctl --wait=hv sync ++wait_row_count Port_Binding 1 logical_port=lsp1 chassis=$ch ++wait_row_count Port_Binding 1 logical_port=lsp-cont1 chassis=$ch ++ + OVN_CLEANUP([hv1]) + AT_CLEANUP + +-- +1.8.3.1 + diff --git a/SOURCES/0002-binding-Set-Port_Binding.up-only-if-supported.patch b/SOURCES/0002-binding-Set-Port_Binding.up-only-if-supported.patch new file mode 100644 index 0000000..d55e65a --- /dev/null +++ b/SOURCES/0002-binding-Set-Port_Binding.up-only-if-supported.patch @@ -0,0 +1,218 @@ +From 3de7959b9018f53abd06320bc7f1a43ec216db7e Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Wed, 3 Feb 2021 20:36:41 +0100 +Subject: [PATCH 2/4] binding: Set Port_Binding.up only if supported. + +The supported upgrade procedure is to always upgrade ovn-controllers +first and OVN DBs and ovn-northd later. This leads however to the +situation when ovn-controller might try to set the Port_Binding.up field +while the Southbound DB isn't yet aware of this field which would +trigger transaction failures and control plane/data plane outages. + +To avoid such situations ovn-controller only sets the Port_Binding.up +field if it was explicitly set to 'false'. This ensures that the SB +database was already upgraded. + +On the ovn-northd side, as soon as ovn-northd is upgraded it will update +all existing Port_Bindings and explicitly set 'Port_Binding.up' to +false, implicitly notifying ovn-controller that it is safe to write to +the field. + +Reported-by: Numan Siddique +Fixes: 4d3cb42b076b ("binding: Set Logical_Switch_Port.up when all OVS flows are installed.") +Signed-off-by: Dumitru Ceara +Signed-off-by: Numan Siddique +(cherry picked from upstream commit 8b45fc9b2269df68566e47eee9cdd5f043b595d3) + +Change-Id: I6fb8b59419466bbe43f0d993966d8316320c6327 +--- + controller-vtep/binding.c | 6 ++++-- + controller/binding.c | 33 ++++++++++++++++++++++---------- + northd/ovn-northd.c | 8 ++++++++ + tests/ovn.at | 48 ++++++++++++++++++++++++++++++++++++++++++++++- + 4 files changed, 82 insertions(+), 13 deletions(-) + +diff --git a/controller-vtep/binding.c b/controller-vtep/binding.c +index d28a598..01d5a16 100644 +--- a/controller-vtep/binding.c ++++ b/controller-vtep/binding.c +@@ -110,9 +110,11 @@ update_pb_chassis(const struct sbrec_port_binding *port_binding_rec, + chassis_rec->name); + } + +- bool up = true; + sbrec_port_binding_set_chassis(port_binding_rec, chassis_rec); +- sbrec_port_binding_set_up(port_binding_rec, &up, 1); ++ if (port_binding_rec->n_up) { ++ bool up = true; ++ sbrec_port_binding_set_up(port_binding_rec, &up, 1); ++ } + } + } + +diff --git a/controller/binding.c b/controller/binding.c +index 353debe..efaa109 100644 +--- a/controller/binding.c ++++ b/controller/binding.c +@@ -870,6 +870,11 @@ get_lport_type(const struct sbrec_port_binding *pb) + * container and virtual ports). + * Otherwise request a notification to be sent when the OVS flows + * corresponding to 'pb' have been installed. ++ * ++ * Note: ++ * Updates (directly or through a notification) the 'pb->up' field only if ++ * it's explicitly set to 'false'. ++ * This is to ensure compatibility with older versions of ovn-northd. + */ + static void + claimed_lport_set_up(const struct sbrec_port_binding *pb, +@@ -885,7 +890,7 @@ claimed_lport_set_up(const struct sbrec_port_binding *pb, + return; + } + +- if (pb->chassis != chassis_rec) { ++ if (pb->chassis != chassis_rec || (pb->n_up && !pb->up[0])) { + binding_iface_bound_add(pb->logical_port); + } + } +@@ -973,7 +978,10 @@ release_lport(const struct sbrec_port_binding *pb, bool sb_readonly, + sbrec_port_binding_set_virtual_parent(pb, NULL); + } + +- sbrec_port_binding_set_up(pb, NULL, 0); ++ if (pb->n_up) { ++ bool up = false; ++ sbrec_port_binding_set_up(pb, &up, 1); ++ } + update_lport_tracking(pb, tracked_datapaths); + binding_iface_released_add(pb->logical_port); + VLOG_INFO("Releasing lport %s from this chassis.", pb->logical_port); +@@ -2503,7 +2511,10 @@ binding_seqno_run(struct shash *local_bindings) + ovsrec_interface_update_external_ids_delkey( + lb->iface, OVN_INSTALLED_EXT_ID); + } +- sbrec_port_binding_set_up(lb->pb, NULL, 0); ++ if (lb->pb->n_up) { ++ bool up = false; ++ sbrec_port_binding_set_up(lb->pb, &up, 1); ++ } + simap_put(&binding_iface_seqno_map, lb->name, new_seqno); + } + sset_delete(&binding_iface_bound_set, SSET_NODE_FROM_NAME(iface_id)); +@@ -2536,7 +2547,6 @@ binding_seqno_install(struct shash *local_bindings) + + SIMAP_FOR_EACH_SAFE (node, node_next, &binding_iface_seqno_map) { + struct shash_node *lb_node = shash_find(local_bindings, node->name); +- bool up = true; + + if (!lb_node) { + goto del_seqno; +@@ -2554,12 +2564,15 @@ binding_seqno_install(struct shash *local_bindings) + ovsrec_interface_update_external_ids_setkey(lb->iface, + OVN_INSTALLED_EXT_ID, + "true"); +- sbrec_port_binding_set_up(lb->pb, &up, 1); +- +- struct shash_node *child_node; +- SHASH_FOR_EACH (child_node, &lb->children) { +- struct local_binding *lb_child = child_node->data; +- sbrec_port_binding_set_up(lb_child->pb, &up, 1); ++ if (lb->pb->n_up) { ++ bool up = true; ++ ++ sbrec_port_binding_set_up(lb->pb, &up, 1); ++ struct shash_node *child_node; ++ SHASH_FOR_EACH (child_node, &lb->children) { ++ struct local_binding *lb_child = child_node->data; ++ sbrec_port_binding_set_up(lb_child->pb, &up, 1); ++ } + } + + del_seqno: +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 307ee9c..0dc920b 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -3329,6 +3329,14 @@ ovn_port_update_sbrec(struct northd_context *ctx, + if (op->tunnel_key != op->sb->tunnel_key) { + sbrec_port_binding_set_tunnel_key(op->sb, op->tunnel_key); + } ++ ++ /* ovn-controller will update 'Port_Binding.up' only if it was explicitly ++ * set to 'false'. ++ */ ++ if (!op->sb->n_up) { ++ bool up = false; ++ sbrec_port_binding_set_up(op->sb, &up, 1); ++ } + } + + /* Remove mac_binding entries that refer to logical_ports which are +diff --git a/tests/ovn.at b/tests/ovn.at +index 1956f5c..2ef056b 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -23697,7 +23697,7 @@ check ovn-nbctl ls-add ls + AS_BOX([add OVS port for existing LSP]) + check ovn-nbctl lsp-add ls lsp1 + check ovn-nbctl --wait=hv sync +-check_column "[]" Port_Binding up logical_port=lsp1 ++check_column "false" Port_Binding up logical_port=lsp1 + + check ovs-vsctl add-port br-int lsp1 -- set Interface lsp1 external-ids:iface-id=lsp1 + check_column "true" Port_Binding up logical_port=lsp1 +@@ -23712,5 +23712,51 @@ check_column "true" Port_Binding up logical_port=lsp2 + wait_column "true" nb:Logical_Switch_Port up name=lsp2 + OVS_WAIT_UNTIL([test `ovs-vsctl get Interface lsp2 external_ids:ovn-installed` = '"true"']) + ++AS_BOX([ovn-controller should not reset Port_Binding.up without northd]) ++# Pause northd and clear the "up" field to simulate older ovn-northd ++# versions writing to the Southbound DB. ++as northd ovn-appctl -t ovn-northd pause ++as northd-backup ovn-appctl -t ovn-northd pause ++ ++as hv1 ovn-appctl -t ovn-controller debug/pause ++check ovn-sbctl clear Port_Binding lsp1 up ++as hv1 ovn-appctl -t ovn-controller debug/resume ++ ++# Forcefully release the Port_Binding so ovn-controller reclaims it. ++# Make sure the Port_Binding.up field is not updated though. ++check ovn-sbctl clear Port_Binding lsp1 chassis ++hv1_uuid=$(fetch_column Chassis _uuid name=hv1) ++wait_column "$hv1_uuid" Port_Binding chassis logical_port=lsp1 ++check_column "" Port_Binding up logical_port=lsp1 ++ ++# Once northd should explicitly set the Port_Binding.up field to 'false' and ++# ovn-controller sets it to 'true' as soon as the update is processed. ++as northd ovn-appctl -t ovn-northd resume ++as northd-backup ovn-appctl -t ovn-northd resume ++wait_column "true" Port_Binding up logical_port=lsp1 ++wait_column "true" nb:Logical_Switch_Port up name=lsp1 ++ ++AS_BOX([ovn-controller should reset Port_Binding.up - from NULL]) ++# If Port_Binding.up is cleared externally, ovn-northd resets it to 'false' ++# and ovn-controller finally sets it to 'true' once the update is processed. ++as hv1 ovn-appctl -t ovn-controller debug/pause ++check ovn-sbctl clear Port_Binding lsp1 up ++check ovn-nbctl --wait=sb sync ++wait_column "false" nb:Logical_Switch_Port up name=lsp1 ++as hv1 ovn-appctl -t ovn-controller debug/resume ++wait_column "true" Port_Binding up logical_port=lsp1 ++wait_column "true" nb:Logical_Switch_Port up name=lsp1 ++ ++AS_BOX([ovn-controller should reset Port_Binding.up - from false]) ++# If Port_Binding.up is externally set to 'false', ovn-controller should sets ++# it to 'true' once the update is processed. ++as hv1 ovn-appctl -t ovn-controller debug/pause ++check ovn-sbctl set Port_Binding lsp1 up=false ++check ovn-nbctl --wait=sb sync ++wait_column "false" nb:Logical_Switch_Port up name=lsp1 ++as hv1 ovn-appctl -t ovn-controller debug/resume ++wait_column "true" Port_Binding up logical_port=lsp1 ++wait_column "true" nb:Logical_Switch_Port up name=lsp1 ++ + OVN_CLEANUP([hv1]) + AT_CLEANUP +-- +1.8.3.1 + diff --git a/SOURCES/0002-controller-Implement-a-generic-barrier-based-on-ofct.patch b/SOURCES/0002-controller-Implement-a-generic-barrier-based-on-ofct.patch new file mode 100644 index 0000000..c7a8387 --- /dev/null +++ b/SOURCES/0002-controller-Implement-a-generic-barrier-based-on-ofct.patch @@ -0,0 +1,960 @@ +From 2bafeec1b98cfa813fa75dfafa74fdacae8e32c4 Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Wed, 13 Jan 2021 10:23:19 +0100 +Subject: [PATCH 2/3] controller: Implement a generic barrier based on ofctrl + cur_cfg sync. + +A new module, 'ofctrl-seqno', is added to implement this generic +barrier. Other modules can register their own types of seqno update +requests. The barrier implementation ensures that the a seqno update +request is acked (returned by ofctrl_acked_seqnos_get()) only if the +OVS flow operations that have been requested when the seqno update +request was queued have been processed by OVS. + +For now, the only user of this barrier is the main ovn-controller +module but a future commit will use it too in order to mark +Port_Bindings and OVS interfaces as "fully installed". + +This commit also adds unit tests for the new 'ofctrl-seqno' module. +The unit test structure is inspired by Mark Michelson's patch: +http://patchwork.ozlabs.org/project/ovn/patch/20201216182421.234772-3-mmichels@redhat.com/ + +Signed-off-by: Dumitru Ceara +Acked-by: Mark Michelson +Signed-off-by: Numan Siddique +(cherry picked from upstream master commit c93c626248c120eeaffafd323aef323d3b2507ab) + +Change-Id: I500750d4756267e3f62746da33275a22cb1af26f +--- + controller/automake.mk | 2 + + controller/ofctrl-seqno.c | 254 +++++++++++++++++++++++++++++++++++++++++ + controller/ofctrl-seqno.h | 49 ++++++++ + controller/ovn-controller.c | 41 +++++-- + controller/test-ofctrl-seqno.c | 194 +++++++++++++++++++++++++++++++ + tests/automake.mk | 8 +- + tests/ovn-ofctrl-seqno.at | 226 ++++++++++++++++++++++++++++++++++++ + tests/testsuite.at | 1 + + 8 files changed, 765 insertions(+), 10 deletions(-) + create mode 100644 controller/ofctrl-seqno.c + create mode 100644 controller/ofctrl-seqno.h + create mode 100644 controller/test-ofctrl-seqno.c + create mode 100644 tests/ovn-ofctrl-seqno.at + +diff --git a/controller/automake.mk b/controller/automake.mk +index 45e1bdd..480578e 100644 +--- a/controller/automake.mk ++++ b/controller/automake.mk +@@ -18,6 +18,8 @@ controller_ovn_controller_SOURCES = \ + controller/lport.h \ + controller/ofctrl.c \ + controller/ofctrl.h \ ++ controller/ofctrl-seqno.c \ ++ controller/ofctrl-seqno.h \ + controller/pinctrl.c \ + controller/pinctrl.h \ + controller/patch.c \ +diff --git a/controller/ofctrl-seqno.c b/controller/ofctrl-seqno.c +new file mode 100644 +index 0000000..c9334b0 +--- /dev/null ++++ b/controller/ofctrl-seqno.c +@@ -0,0 +1,254 @@ ++/* Copyright (c) 2021, Red Hat, Inc. ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at: ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++#include ++ ++#include "hash.h" ++#include "ofctrl-seqno.h" ++#include "openvswitch/list.h" ++#include "util.h" ++ ++/* A sequence number update request, i.e., when the barrier corresponding to ++ * the 'flow_cfg' sequence number is replied to by OVS then it is safe ++ * to inform the application that the 'req_cfg' seqno has been processed. ++ */ ++struct ofctrl_seqno_update { ++ struct ovs_list list_node; /* In 'ofctrl_seqno_updates'. */ ++ size_t seqno_type; /* Application specific seqno type. ++ * Relevant only for 'req_cfg'. ++ */ ++ uint64_t flow_cfg; /* The seqno that needs to be acked by OVS ++ * before 'req_cfg' can be acked for the ++ * application. ++ */ ++ uint64_t req_cfg; /* Application specific seqno. */ ++}; ++ ++/* List of in flight sequence number updates. */ ++static struct ovs_list ofctrl_seqno_updates; ++ ++/* Last sequence number request sent to OVS. */ ++static uint64_t ofctrl_req_seqno; ++ ++/* State of seqno requests for a given application seqno type. */ ++struct ofctrl_seqno_state { ++ struct ovs_list acked_cfgs; /* Acked requests since the last time the ++ * application consumed acked requests. ++ */ ++ uint64_t cur_cfg; /* Last acked application seqno. */ ++ uint64_t req_cfg; /* Last requested application seqno. */ ++}; ++ ++/* Per application seqno type states. */ ++static size_t n_ofctrl_seqno_states; ++static struct ofctrl_seqno_state *ofctrl_seqno_states; ++ ++/* ofctrl_acked_seqnos related static function prototypes. */ ++static void ofctrl_acked_seqnos_init(struct ofctrl_acked_seqnos *seqnos, ++ uint64_t last_acked); ++static void ofctrl_acked_seqnos_add(struct ofctrl_acked_seqnos *seqnos, ++ uint32_t val); ++ ++/* ofctrl_seqno_update related static function prototypes. */ ++static void ofctrl_seqno_update_create__(size_t seqno_type, uint64_t req_cfg); ++static void ofctrl_seqno_update_list_destroy(struct ovs_list *seqno_list); ++static void ofctrl_seqno_cfg_run(size_t seqno_type, ++ struct ofctrl_seqno_update *update); ++ ++/* Returns the collection of acked ofctrl_seqno_update requests of type ++ * 'seqno_type'. It's the responsibility of the caller to free memory by ++ * calling ofctrl_acked_seqnos_destroy(). ++ */ ++struct ofctrl_acked_seqnos * ++ofctrl_acked_seqnos_get(size_t seqno_type) ++{ ++ struct ofctrl_acked_seqnos *acked_seqnos = xmalloc(sizeof *acked_seqnos); ++ struct ofctrl_seqno_state *state = &ofctrl_seqno_states[seqno_type]; ++ struct ofctrl_seqno_update *update; ++ ++ ofctrl_acked_seqnos_init(acked_seqnos, state->cur_cfg); ++ ++ ovs_assert(seqno_type < n_ofctrl_seqno_states); ++ LIST_FOR_EACH_POP (update, list_node, &state->acked_cfgs) { ++ ofctrl_acked_seqnos_add(acked_seqnos, update->req_cfg); ++ free(update); ++ } ++ return acked_seqnos; ++} ++ ++void ++ofctrl_acked_seqnos_destroy(struct ofctrl_acked_seqnos *seqnos) ++{ ++ if (!seqnos) { ++ return; ++ } ++ ++ struct ofctrl_ack_seqno *seqno_node; ++ HMAP_FOR_EACH_POP (seqno_node, node, &seqnos->acked) { ++ free(seqno_node); ++ } ++ hmap_destroy(&seqnos->acked); ++ free(seqnos); ++} ++ ++/* Returns true if 'val' is one of the acked sequence numbers in 'seqnos'. */ ++bool ++ofctrl_acked_seqnos_contains(const struct ofctrl_acked_seqnos *seqnos, ++ uint32_t val) ++{ ++ struct ofctrl_ack_seqno *sn; ++ ++ HMAP_FOR_EACH_WITH_HASH (sn, node, hash_int(val, 0), &seqnos->acked) { ++ if (sn->seqno == val) { ++ return true; ++ } ++ } ++ return false; ++} ++ ++void ++ofctrl_seqno_init(void) ++{ ++ ovs_list_init(&ofctrl_seqno_updates); ++} ++ ++/* Adds a new type of application specific seqno updates. */ ++size_t ++ofctrl_seqno_add_type(void) ++{ ++ size_t new_type = n_ofctrl_seqno_states; ++ n_ofctrl_seqno_states++; ++ ++ struct ofctrl_seqno_state *new_states = ++ xzalloc(n_ofctrl_seqno_states * sizeof *new_states); ++ ++ for (size_t i = 0; i < n_ofctrl_seqno_states - 1; i++) { ++ ovs_list_move(&new_states[i].acked_cfgs, ++ &ofctrl_seqno_states[i].acked_cfgs); ++ } ++ ovs_list_init(&new_states[new_type].acked_cfgs); ++ ++ free(ofctrl_seqno_states); ++ ofctrl_seqno_states = new_states; ++ return new_type; ++} ++ ++/* Creates a new seqno update request for an application specific ++ * 'seqno_type'. ++ */ ++void ++ofctrl_seqno_update_create(size_t seqno_type, uint64_t new_cfg) ++{ ++ ovs_assert(seqno_type < n_ofctrl_seqno_states); ++ ++ struct ofctrl_seqno_state *state = &ofctrl_seqno_states[seqno_type]; ++ ++ /* If new_cfg didn't change since the last request there should already ++ * be an update pending. ++ */ ++ if (new_cfg == state->req_cfg) { ++ return; ++ } ++ ++ state->req_cfg = new_cfg; ++ ofctrl_seqno_update_create__(seqno_type, new_cfg); ++} ++ ++/* Should be called when the application is certain that all OVS flow updates ++ * corresponding to 'flow_cfg' were processed. Populates the application ++ * specific lists of acked requests in 'ofctrl_seqno_states'. ++ */ ++void ++ofctrl_seqno_run(uint64_t flow_cfg) ++{ ++ struct ofctrl_seqno_update *update, *prev; ++ LIST_FOR_EACH_SAFE (update, prev, list_node, &ofctrl_seqno_updates) { ++ if (flow_cfg < update->flow_cfg) { ++ break; ++ } ++ ++ ovs_list_remove(&update->list_node); ++ ofctrl_seqno_cfg_run(update->seqno_type, update); ++ } ++} ++ ++/* Returns the seqno to be used when sending a barrier request to OVS. */ ++uint64_t ++ofctrl_seqno_get_req_cfg(void) ++{ ++ return ofctrl_req_seqno; ++} ++ ++/* Should be called whenever the openflow connection to OVS is lost. Flushes ++ * all pending 'ofctrl_seqno_updates'. ++ */ ++void ++ofctrl_seqno_flush(void) ++{ ++ for (size_t i = 0; i < n_ofctrl_seqno_states; i++) { ++ ofctrl_seqno_update_list_destroy(&ofctrl_seqno_states[i].acked_cfgs); ++ } ++ ofctrl_seqno_update_list_destroy(&ofctrl_seqno_updates); ++ ofctrl_req_seqno = 0; ++} ++ ++static void ++ofctrl_acked_seqnos_init(struct ofctrl_acked_seqnos *seqnos, ++ uint64_t last_acked) ++{ ++ hmap_init(&seqnos->acked); ++ seqnos->last_acked = last_acked; ++} ++ ++static void ++ofctrl_acked_seqnos_add(struct ofctrl_acked_seqnos *seqnos, uint32_t val) ++{ ++ seqnos->last_acked = val; ++ ++ struct ofctrl_ack_seqno *sn = xmalloc(sizeof *sn); ++ hmap_insert(&seqnos->acked, &sn->node, hash_int(val, 0)); ++ sn->seqno = val; ++} ++ ++static void ++ofctrl_seqno_update_create__(size_t seqno_type, uint64_t req_cfg) ++{ ++ struct ofctrl_seqno_update *update = xmalloc(sizeof *update); ++ ++ ofctrl_req_seqno++; ++ ovs_list_push_back(&ofctrl_seqno_updates, &update->list_node); ++ update->seqno_type = seqno_type; ++ update->flow_cfg = ofctrl_req_seqno; ++ update->req_cfg = req_cfg; ++} ++ ++static void ++ofctrl_seqno_update_list_destroy(struct ovs_list *seqno_list) ++{ ++ struct ofctrl_seqno_update *update; ++ ++ LIST_FOR_EACH_POP (update, list_node, seqno_list) { ++ free(update); ++ } ++} ++ ++static void ++ofctrl_seqno_cfg_run(size_t seqno_type, struct ofctrl_seqno_update *update) ++{ ++ ovs_assert(seqno_type < n_ofctrl_seqno_states); ++ ovs_list_push_back(&ofctrl_seqno_states[seqno_type].acked_cfgs, ++ &update->list_node); ++ ofctrl_seqno_states[seqno_type].cur_cfg = update->req_cfg; ++} +diff --git a/controller/ofctrl-seqno.h b/controller/ofctrl-seqno.h +new file mode 100644 +index 0000000..876947c +--- /dev/null ++++ b/controller/ofctrl-seqno.h +@@ -0,0 +1,49 @@ ++/* Copyright (c) 2021, Red Hat, Inc. ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at: ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++#ifndef OFCTRL_SEQNO_H ++#define OFCTRL_SEQNO_H 1 ++ ++#include ++ ++#include ++ ++/* Collection of acked ofctrl_seqno_update requests and the most recent ++ * 'last_acked' value. ++ */ ++struct ofctrl_acked_seqnos { ++ struct hmap acked; ++ uint64_t last_acked; ++}; ++ ++/* Acked application specific seqno. Stored in ofctrl_acked_seqnos.acked. */ ++struct ofctrl_ack_seqno { ++ struct hmap_node node; ++ uint64_t seqno; ++}; ++ ++struct ofctrl_acked_seqnos *ofctrl_acked_seqnos_get(size_t seqno_type); ++void ofctrl_acked_seqnos_destroy(struct ofctrl_acked_seqnos *seqnos); ++bool ofctrl_acked_seqnos_contains(const struct ofctrl_acked_seqnos *seqnos, ++ uint32_t val); ++ ++void ofctrl_seqno_init(void); ++size_t ofctrl_seqno_add_type(void); ++void ofctrl_seqno_update_create(size_t seqno_type, uint64_t new_cfg); ++void ofctrl_seqno_run(uint64_t flow_cfg); ++uint64_t ofctrl_seqno_get_req_cfg(void); ++void ofctrl_seqno_flush(void); ++ ++#endif /* controller/ofctrl-seqno.h */ +diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c +index 42883b4..bb1c659 100644 +--- a/controller/ovn-controller.c ++++ b/controller/ovn-controller.c +@@ -39,6 +39,7 @@ + #include "lib/vswitch-idl.h" + #include "lport.h" + #include "ofctrl.h" ++#include "ofctrl-seqno.h" + #include "openvswitch/vconn.h" + #include "openvswitch/vlog.h" + #include "ovn/actions.h" +@@ -98,6 +99,9 @@ struct pending_pkt { + char *flow_s; + }; + ++/* Registered ofctrl seqno type for nb_cfg propagation. */ ++static size_t ofctrl_seq_type_nb_cfg; ++ + struct local_datapath * + get_local_datapath(const struct hmap *local_datapaths, uint32_t tunnel_key) + { +@@ -825,11 +829,14 @@ static void + store_nb_cfg(struct ovsdb_idl_txn *sb_txn, struct ovsdb_idl_txn *ovs_txn, + const struct sbrec_chassis_private *chassis, + const struct ovsrec_bridge *br_int, +- unsigned int delay_nb_cfg_report, +- uint64_t cur_cfg) ++ unsigned int delay_nb_cfg_report) + { ++ struct ofctrl_acked_seqnos *acked_nb_cfg_seqnos = ++ ofctrl_acked_seqnos_get(ofctrl_seq_type_nb_cfg); ++ uint64_t cur_cfg = acked_nb_cfg_seqnos->last_acked; ++ + if (!cur_cfg) { +- return; ++ goto done; + } + + if (sb_txn && chassis && cur_cfg != chassis->nb_cfg) { +@@ -850,6 +857,9 @@ store_nb_cfg(struct ovsdb_idl_txn *sb_txn, struct ovsdb_idl_txn *ovs_txn, + cur_cfg_str); + free(cur_cfg_str); + } ++ ++done: ++ ofctrl_acked_seqnos_destroy(acked_nb_cfg_seqnos); + } + + static const char * +@@ -967,6 +977,11 @@ en_ofctrl_is_connected_run(struct engine_node *node, void *data) + struct ed_type_ofctrl_is_connected *of_data = data; + if (of_data->connected != ofctrl_is_connected()) { + of_data->connected = !of_data->connected; ++ ++ /* Flush ofctrl seqno requests when the ofctrl connection goes down. */ ++ if (!of_data->connected) { ++ ofctrl_seqno_flush(); ++ } + engine_set_node_state(node, EN_UPDATED); + return; + } +@@ -2393,6 +2408,9 @@ main(int argc, char *argv[]) + pinctrl_init(); + lflow_init(); + ++ /* Register ofctrl seqno types. */ ++ ofctrl_seq_type_nb_cfg = ofctrl_seqno_add_type(); ++ + /* Connect to OVS OVSDB instance. */ + struct ovsdb_idl_loop ovs_idl_loop = OVSDB_IDL_LOOP_INITIALIZER( + ovsdb_idl_create(ovs_remote, &ovsrec_idl_class, false, true)); +@@ -2624,6 +2642,7 @@ main(int argc, char *argv[]) + ofctrl_init(&flow_output_data->group_table, + &flow_output_data->meter_table, + get_ofctrl_probe_interval(ovs_idl_loop.idl)); ++ ofctrl_seqno_init(); + + unixctl_command_register("group-table-list", "", 0, 0, + extend_table_list, +@@ -2853,17 +2872,23 @@ main(int argc, char *argv[]) + sb_monitor_all); + } + } ++ ++ ofctrl_seqno_update_create( ++ ofctrl_seq_type_nb_cfg, ++ get_nb_cfg(sbrec_sb_global_table_get( ++ ovnsb_idl_loop.idl), ++ ovnsb_cond_seqno, ++ ovnsb_expected_cond_seqno)); ++ + flow_output_data = engine_get_data(&en_flow_output); + if (flow_output_data && ct_zones_data) { + ofctrl_put(&flow_output_data->flow_table, + &ct_zones_data->pending, + sbrec_meter_table_get(ovnsb_idl_loop.idl), +- get_nb_cfg(sbrec_sb_global_table_get( +- ovnsb_idl_loop.idl), +- ovnsb_cond_seqno, +- ovnsb_expected_cond_seqno), ++ ofctrl_seqno_get_req_cfg(), + engine_node_changed(&en_flow_output)); + } ++ ofctrl_seqno_run(ofctrl_get_cur_cfg()); + } + + } +@@ -2889,7 +2914,7 @@ main(int argc, char *argv[]) + } + + store_nb_cfg(ovnsb_idl_txn, ovs_idl_txn, chassis_private, +- br_int, delay_nb_cfg_report, ofctrl_get_cur_cfg()); ++ br_int, delay_nb_cfg_report); + + if (pending_pkt.conn) { + struct ed_type_addr_sets *as_data = +diff --git a/controller/test-ofctrl-seqno.c b/controller/test-ofctrl-seqno.c +new file mode 100644 +index 0000000..fce88d4 +--- /dev/null ++++ b/controller/test-ofctrl-seqno.c +@@ -0,0 +1,194 @@ ++/* Copyright (c) 2021, Red Hat, Inc. ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at: ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++#include ++ ++#include "tests/ovstest.h" ++#include "sort.h" ++#include "util.h" ++ ++#include "ofctrl-seqno.h" ++ ++static void ++test_init(void) ++{ ++ ofctrl_seqno_init(); ++} ++ ++static bool ++test_read_uint_value(struct ovs_cmdl_context *ctx, unsigned int index, ++ const char *descr, unsigned int *result) ++{ ++ if (index >= ctx->argc) { ++ fprintf(stderr, "Missing %s argument\n", descr); ++ return false; ++ } ++ ++ const char *arg = ctx->argv[index]; ++ if (!str_to_uint(arg, 10, result)) { ++ fprintf(stderr, "Invalid %s: %s\n", descr, arg); ++ return false; ++ } ++ return true; ++} ++ ++static int ++test_seqno_compare(size_t a, size_t b, void *values_) ++{ ++ uint64_t *values = values_; ++ ++ return values[a] == values[b] ? 0 : (values[a] < values[b] ? -1 : 1); ++} ++ ++static void ++test_seqno_swap(size_t a, size_t b, void *values_) ++{ ++ uint64_t *values = values_; ++ uint64_t tmp = values[a]; ++ ++ values[a] = values[b]; ++ values[b] = tmp; ++} ++ ++static void ++test_dump_acked_seqnos(size_t seqno_type) ++{ ++ struct ofctrl_acked_seqnos * acked_seqnos = ++ ofctrl_acked_seqnos_get(seqno_type); ++ ++ printf("ofctrl-seqno-type: %"PRIuSIZE"\n", seqno_type); ++ printf(" last-acked %"PRIu64"\n", acked_seqnos->last_acked); ++ ++ size_t n_acked = hmap_count(&acked_seqnos->acked); ++ uint64_t *acked = xmalloc(n_acked * sizeof *acked); ++ struct ofctrl_ack_seqno *ack_seqno; ++ size_t i = 0; ++ ++ /* A bit hacky but ignoring overflows the "total of all seqno + 1" should ++ * be a number that is not part of the acked seqnos. ++ */ ++ uint64_t total_seqno = 1; ++ HMAP_FOR_EACH (ack_seqno, node, &acked_seqnos->acked) { ++ ovs_assert(ofctrl_acked_seqnos_contains(acked_seqnos, ++ ack_seqno->seqno)); ++ total_seqno += ack_seqno->seqno; ++ acked[i++] = ack_seqno->seqno; ++ } ++ ovs_assert(!ofctrl_acked_seqnos_contains(acked_seqnos, total_seqno)); ++ ++ sort(n_acked, test_seqno_compare, test_seqno_swap, acked); ++ ++ for (i = 0; i < n_acked; i++) { ++ printf(" %"PRIu64"\n", acked[i]); ++ } ++ ++ free(acked); ++ ofctrl_acked_seqnos_destroy(acked_seqnos); ++} ++ ++static void ++test_ofctrl_seqno_add_type(struct ovs_cmdl_context *ctx) ++{ ++ unsigned int n_types; ++ ++ test_init(); ++ ++ if (!test_read_uint_value(ctx, 1, "n_types", &n_types)) { ++ return; ++ } ++ for (unsigned int i = 0; i < n_types; i++) { ++ printf("%"PRIuSIZE"\n", ofctrl_seqno_add_type()); ++ } ++} ++ ++static void ++test_ofctrl_seqno_ack_seqnos(struct ovs_cmdl_context *ctx) ++{ ++ unsigned int n_reqs = 0; ++ unsigned int shift = 2; ++ unsigned int n_types; ++ unsigned int n_acks; ++ ++ test_init(); ++ bool batch_acks = !strcmp(ctx->argv[1], "true"); ++ ++ if (!test_read_uint_value(ctx, shift++, "n_types", &n_types)) { ++ return; ++ } ++ ++ for (unsigned int i = 0; i < n_types; i++) { ++ ovs_assert(ofctrl_seqno_add_type() == i); ++ ++ /* Read number of app specific seqnos. */ ++ unsigned int n_app_seqnos; ++ ++ if (!test_read_uint_value(ctx, shift++, "n_app_seqnos", ++ &n_app_seqnos)) { ++ return; ++ } ++ ++ for (unsigned int j = 0; j < n_app_seqnos; j++, n_reqs++) { ++ unsigned int app_seqno; ++ ++ if (!test_read_uint_value(ctx, shift++, "app_seqno", &app_seqno)) { ++ return; ++ } ++ ofctrl_seqno_update_create(i, app_seqno); ++ } ++ } ++ printf("ofctrl-seqno-req-cfg: %u\n", n_reqs); ++ ++ if (!test_read_uint_value(ctx, shift++, "n_acks", &n_acks)) { ++ return; ++ } ++ for (unsigned int i = 0; i < n_acks; i++) { ++ unsigned int ack_seqno; ++ ++ if (!test_read_uint_value(ctx, shift++, "ack_seqno", &ack_seqno)) { ++ return; ++ } ++ ofctrl_seqno_run(ack_seqno); ++ ++ if (!batch_acks) { ++ for (unsigned int st = 0; st < n_types; st++) { ++ test_dump_acked_seqnos(st); ++ } ++ } ++ } ++ if (batch_acks) { ++ for (unsigned int st = 0; st < n_types; st++) { ++ test_dump_acked_seqnos(st); ++ } ++ } ++} ++ ++static void ++test_ofctrl_seqno_main(int argc, char *argv[]) ++{ ++ set_program_name(argv[0]); ++ static const struct ovs_cmdl_command commands[] = { ++ {"ofctrl_seqno_add_type", NULL, 1, 1, ++ test_ofctrl_seqno_add_type, OVS_RO}, ++ {"ofctrl_seqno_ack_seqnos", NULL, 2, INT_MAX, ++ test_ofctrl_seqno_ack_seqnos, OVS_RO}, ++ {NULL, NULL, 0, 0, NULL, OVS_RO}, ++ }; ++ struct ovs_cmdl_context ctx; ++ ctx.argc = argc - 1; ++ ctx.argv = argv + 1; ++ ovs_cmdl_run_command(&ctx, commands); ++} ++ ++OVSTEST_REGISTER("test-ofctrl-seqno", test_ofctrl_seqno_main); +diff --git a/tests/automake.mk b/tests/automake.mk +index c5c286e..c09f615 100644 +--- a/tests/automake.mk ++++ b/tests/automake.mk +@@ -31,7 +31,8 @@ TESTSUITE_AT = \ + tests/ovn-controller-vtep.at \ + tests/ovn-ic.at \ + tests/ovn-macros.at \ +- tests/ovn-performance.at ++ tests/ovn-performance.at \ ++ tests/ovn-ofctrl-seqno.at + + SYSTEM_KMOD_TESTSUITE_AT = \ + tests/system-common-macros.at \ +@@ -202,7 +203,10 @@ noinst_PROGRAMS += tests/ovstest + tests_ovstest_SOURCES = \ + tests/ovstest.c \ + tests/ovstest.h \ +- tests/test-ovn.c ++ tests/test-ovn.c \ ++ controller/test-ofctrl-seqno.c \ ++ controller/ofctrl-seqno.c \ ++ controller/ofctrl-seqno.h + + tests_ovstest_LDADD = $(OVS_LIBDIR)/daemon.lo \ + $(OVS_LIBDIR)/libopenvswitch.la lib/libovn.la +diff --git a/tests/ovn-ofctrl-seqno.at b/tests/ovn-ofctrl-seqno.at +new file mode 100644 +index 0000000..59dfea9 +--- /dev/null ++++ b/tests/ovn-ofctrl-seqno.at +@@ -0,0 +1,226 @@ ++# ++# Unit tests for the controller/ofctrl-seqno.c module. ++# ++AT_BANNER([OVN unit tests - ofctrl-seqno]) ++ ++AT_SETUP([ovn -- unit test -- ofctrl-seqno add-type]) ++ ++AT_CHECK([ovstest test-ofctrl-seqno ofctrl_seqno_add_type 1], [0], [dnl ++0 ++]) ++AT_CHECK([ovstest test-ofctrl-seqno ofctrl_seqno_add_type 2], [0], [dnl ++0 ++1 ++]) ++AT_CHECK([ovstest test-ofctrl-seqno ofctrl_seqno_add_type 3], [0], [dnl ++0 ++1 ++2 ++]) ++AT_CLEANUP ++ ++AT_SETUP([ovn -- unit test -- ofctrl-seqno ack-seqnos]) ++ ++AS_BOX([No Ack Batching, 1 seqno type]) ++n_types=1 ++n_app_seqnos=3 ++app_seqnos="40 41 42" ++ ++n_acks=1 ++acks="1" ++echo "ovstest test-ofctrl-seqno ofctrl_seqno_ack_seqnos false ${n_types} ${n_app_seqnos} ${app_seqnos} ${n_acks} ${acks}" ++AT_CHECK([ovstest test-ofctrl-seqno ofctrl_seqno_ack_seqnos false ${n_types} \ ++ ${n_app_seqnos} ${app_seqnos} ${n_acks} ${acks}], [0], [dnl ++ofctrl-seqno-req-cfg: 3 ++ofctrl-seqno-type: 0 ++ last-acked 40 ++ 40 ++]) ++ ++n_acks=2 ++acks="1 2" ++AT_CHECK([ovstest test-ofctrl-seqno ofctrl_seqno_ack_seqnos false ${n_types} \ ++ ${n_app_seqnos} ${app_seqnos} ${n_acks} ${acks}], [0], [dnl ++ofctrl-seqno-req-cfg: 3 ++ofctrl-seqno-type: 0 ++ last-acked 40 ++ 40 ++ofctrl-seqno-type: 0 ++ last-acked 41 ++ 41 ++]) ++ ++n_acks=3 ++acks="1 2 3" ++AT_CHECK([ovstest test-ofctrl-seqno ofctrl_seqno_ack_seqnos false ${n_types} \ ++ ${n_app_seqnos} ${app_seqnos} ${n_acks} ${acks}], [0], [dnl ++ofctrl-seqno-req-cfg: 3 ++ofctrl-seqno-type: 0 ++ last-acked 40 ++ 40 ++ofctrl-seqno-type: 0 ++ last-acked 41 ++ 41 ++ofctrl-seqno-type: 0 ++ last-acked 42 ++ 42 ++]) ++ ++AS_BOX([Ack Batching, 1 seqno type]) ++n_types=1 ++n_app_seqnos=3 ++app_seqnos="40 41 42" ++ ++n_acks=1 ++acks="1" ++AT_CHECK([ovstest test-ofctrl-seqno ofctrl_seqno_ack_seqnos true ${n_types} \ ++ ${n_app_seqnos} ${app_seqnos} ${n_acks} ${acks}], [0], [dnl ++ofctrl-seqno-req-cfg: 3 ++ofctrl-seqno-type: 0 ++ last-acked 40 ++ 40 ++]) ++ ++n_acks=2 ++acks="1 2" ++AT_CHECK([ovstest test-ofctrl-seqno ofctrl_seqno_ack_seqnos true ${n_types} \ ++ ${n_app_seqnos} ${app_seqnos} ${n_acks} ${acks}], [0], [dnl ++ofctrl-seqno-req-cfg: 3 ++ofctrl-seqno-type: 0 ++ last-acked 41 ++ 40 ++ 41 ++]) ++ ++n_acks=3 ++acks="1 2 3" ++AT_CHECK([ovstest test-ofctrl-seqno ofctrl_seqno_ack_seqnos true ${n_types} \ ++ ${n_app_seqnos} ${app_seqnos} ${n_acks} ${acks}], [0], [dnl ++ofctrl-seqno-req-cfg: 3 ++ofctrl-seqno-type: 0 ++ last-acked 42 ++ 40 ++ 41 ++ 42 ++]) ++ ++AS_BOX([No Ack Batching, 2 seqno types]) ++n_types=2 ++n_app_seqnos=3 ++app_seqnos1="40 41 42" ++app_seqnos2="50 51 52" ++ ++n_acks=1 ++acks="1" ++AT_CHECK([ovstest test-ofctrl-seqno ofctrl_seqno_ack_seqnos false ${n_types} \ ++ ${n_app_seqnos} ${app_seqnos1} ${n_app_seqnos} ${app_seqnos2} \ ++ ${n_acks} ${acks}], [0], [dnl ++ofctrl-seqno-req-cfg: 6 ++ofctrl-seqno-type: 0 ++ last-acked 40 ++ 40 ++ofctrl-seqno-type: 1 ++ last-acked 0 ++]) ++ ++n_acks=3 ++acks="1 2 3" ++AT_CHECK([ovstest test-ofctrl-seqno ofctrl_seqno_ack_seqnos false ${n_types} \ ++ ${n_app_seqnos} ${app_seqnos1} ${n_app_seqnos} ${app_seqnos2} \ ++ ${n_acks} ${acks}], [0], [dnl ++ofctrl-seqno-req-cfg: 6 ++ofctrl-seqno-type: 0 ++ last-acked 40 ++ 40 ++ofctrl-seqno-type: 1 ++ last-acked 0 ++ofctrl-seqno-type: 0 ++ last-acked 41 ++ 41 ++ofctrl-seqno-type: 1 ++ last-acked 0 ++ofctrl-seqno-type: 0 ++ last-acked 42 ++ 42 ++ofctrl-seqno-type: 1 ++ last-acked 0 ++]) ++ ++n_acks=3 ++acks="4 5 6" ++AT_CHECK([ovstest test-ofctrl-seqno ofctrl_seqno_ack_seqnos false ${n_types} \ ++ ${n_app_seqnos} ${app_seqnos1} ${n_app_seqnos} ${app_seqnos2} \ ++ ${n_acks} ${acks}], [0], [dnl ++ofctrl-seqno-req-cfg: 6 ++ofctrl-seqno-type: 0 ++ last-acked 42 ++ 40 ++ 41 ++ 42 ++ofctrl-seqno-type: 1 ++ last-acked 50 ++ 50 ++ofctrl-seqno-type: 0 ++ last-acked 42 ++ofctrl-seqno-type: 1 ++ last-acked 51 ++ 51 ++ofctrl-seqno-type: 0 ++ last-acked 42 ++ofctrl-seqno-type: 1 ++ last-acked 52 ++ 52 ++]) ++ ++AS_BOX([Ack Batching, 2 seqno types]) ++n_types=2 ++n_app_seqnos=3 ++app_seqnos1="40 41 42" ++app_seqnos2="50 51 52" ++ ++n_acks=1 ++acks="1" ++AT_CHECK([ovstest test-ofctrl-seqno ofctrl_seqno_ack_seqnos true ${n_types} \ ++ ${n_app_seqnos} ${app_seqnos1} ${n_app_seqnos} ${app_seqnos2} \ ++ ${n_acks} ${acks}], [0], [dnl ++ofctrl-seqno-req-cfg: 6 ++ofctrl-seqno-type: 0 ++ last-acked 40 ++ 40 ++ofctrl-seqno-type: 1 ++ last-acked 0 ++]) ++ ++n_acks=3 ++acks="1 2 3" ++AT_CHECK([ovstest test-ofctrl-seqno ofctrl_seqno_ack_seqnos true ${n_types} \ ++ ${n_app_seqnos} ${app_seqnos1} ${n_app_seqnos} ${app_seqnos2} \ ++ ${n_acks} ${acks}], [0], [dnl ++ofctrl-seqno-req-cfg: 6 ++ofctrl-seqno-type: 0 ++ last-acked 42 ++ 40 ++ 41 ++ 42 ++ofctrl-seqno-type: 1 ++ last-acked 0 ++]) ++ ++n_acks=3 ++acks="4 5 6" ++AT_CHECK([ovstest test-ofctrl-seqno ofctrl_seqno_ack_seqnos true ${n_types} \ ++ ${n_app_seqnos} ${app_seqnos1} ${n_app_seqnos} ${app_seqnos2} \ ++ ${n_acks} ${acks}], [0], [dnl ++ofctrl-seqno-req-cfg: 6 ++ofctrl-seqno-type: 0 ++ last-acked 42 ++ 40 ++ 41 ++ 42 ++ofctrl-seqno-type: 1 ++ last-acked 52 ++ 50 ++ 51 ++ 52 ++]) ++AT_CLEANUP +diff --git a/tests/testsuite.at b/tests/testsuite.at +index 960227d..3eba785 100644 +--- a/tests/testsuite.at ++++ b/tests/testsuite.at +@@ -26,6 +26,7 @@ m4_include([tests/ovn.at]) + m4_include([tests/ovn-performance.at]) + m4_include([tests/ovn-northd.at]) + m4_include([tests/ovn-nbctl.at]) ++m4_include([tests/ovn-ofctrl-seqno.at]) + m4_include([tests/ovn-sbctl.at]) + m4_include([tests/ovn-ic-nbctl.at]) + m4_include([tests/ovn-ic-sbctl.at]) +-- +1.8.3.1 + diff --git a/SOURCES/0002-lflow-Use-learn-action-to-generate-LB-hairpin-reply-.patch b/SOURCES/0002-lflow-Use-learn-action-to-generate-LB-hairpin-reply-.patch new file mode 100644 index 0000000..415198d --- /dev/null +++ b/SOURCES/0002-lflow-Use-learn-action-to-generate-LB-hairpin-reply-.patch @@ -0,0 +1,1126 @@ +From 0ea619c80146bffd4fef7ac1a8a2aa07f9003eb0 Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Fri, 5 Feb 2021 23:29:29 +0100 +Subject: [PATCH 2/2] lflow: Use learn() action to generate LB hairpin reply + flows. + +The main trait of load balancer hairpin traffic is that it never leaves +the local hypervisor. Essentially this means that only hairpin +openflows installed for logical switches that have at least one logical +switch port bound locally can ever be hit. + +Until now, if a load balancer was applied on multiple logical switches +that are connected through a distributed router, ovn-controller would +install flows to detect hairpin replies for all logical switches. In +practice this leads to a very high number of openflows out of which +most will never be used. + +Instead we now use an additional action, learn(), on flows that match on +packets that create the hairpin session. The learn() action will then +generate the necessary flows to handle hairpin replies, but only for +the local datapaths which actually generate hairpin traffic. + +For example, simulating how ovn-k8s uses load balancer for services, +in a "switch per node" scenario, the script below would generate +10K (n_nodes * n_vips * n_backends) openflows on every node in table=69 +(hairpin reply). With this patch the maximum number of openflows that +can be created for hairpin replies is 200 (n_vips * n_backends). + +In general, for deployments that leverage switch-per-node topologies, +the number of openflows is reduced by a factor of N, where N is the +number of nodes. + + $ cat lbs.sh + NODES=50 + VIPS=20 + BACKENDS=10 + ovn-nbctl lr-add rtr + for ((i = 1; i <= $NODES; i++)); do + ovn-nbctl \ + -- ls-add ls$i \ + -- lsp-add ls$i vm$i \ + -- lsp-add ls$i ls$i-rtr \ + -- lsp-set-type ls$i-rtr router \ + -- lsp-set-options ls$i-rtr router-port=rtr-ls$i \ + -- lrp-add rtr rtr-ls$i 00:00:00:00:01:00 42.42.42.$i/24 + done + + for ((i = 1; i <= $VIPS; i++)); do + lb=lb$i + vip=10.10.10.$i:1 + bip=20.20.20.1:2 + for ((j = 2; j <= $BACKENDS; j++)); do + bip="$bip,20.20.20.$j:2" + done + ovn-nbctl lb-add $lb $vip $backends + done + + for ((i = 1; i <= $NODES; i++)); do + for ((j = 1; j <= $VIPS; j++)); do + ovn-nbctl ls-lb-add ls$i lb$j + done + done + + ovs-vsctl add-port br-int vm1 \ + -- set interface vm1 type=internal \ + -- set interface vm1 external-ids:iface-id=vm1 + +Suggested-by: Ilya Maximets +Signed-off-by: Dumitru Ceara +Signed-off-by: Numan Siddique +(cherry picked from upstream commit 022ea339c8e22824ba6f6f1257da0d1b6c66d401) + +Change-Id: Ic0c20047538d6881e8cb79e00c96da8afde67a72 +--- + controller/lflow.c | 204 ++++++++++++++++--- + tests/ofproto-macros.at | 5 +- + tests/ovn.at | 516 +++++++++++++++++++++++++----------------------- + 3 files changed, 455 insertions(+), 270 deletions(-) + +diff --git a/controller/lflow.c b/controller/lflow.c +index 946c1e0..2b7d356 100644 +--- a/controller/lflow.c ++++ b/controller/lflow.c +@@ -1171,6 +1171,178 @@ add_neighbor_flows(struct ovsdb_idl_index *sbrec_port_binding_by_name, + } + } + ++/* Builds the "learn()" action to be triggered by packets initiating a ++ * hairpin session. ++ * ++ * This will generate flows in table OFTABLE_CHK_LB_HAIRPIN_REPLY of the form: ++ * - match: ++ * metadata=,ip/ipv6,ip.src=,ip.dst= ++ * nw_proto='lb_proto',tp_src_port= ++ * - action: ++ * set MLF_LOOKUP_LB_HAIRPIN_BIT=1 ++ */ ++static void ++add_lb_vip_hairpin_reply_action(struct in6_addr *vip6, ovs_be32 vip, ++ uint8_t lb_proto, bool has_l4_port, ++ uint64_t cookie, struct ofpbuf *ofpacts) ++{ ++ struct match match = MATCH_CATCHALL_INITIALIZER; ++ struct ofpact_learn *ol = ofpact_put_LEARN(ofpacts); ++ struct ofpact_learn_spec *ol_spec; ++ unsigned int imm_bytes; ++ uint8_t *src_imm; ++ ++ /* Once learned, hairpin reply flows are permanent until the VIP/backend ++ * is removed. ++ */ ++ ol->flags = NX_LEARN_F_DELETE_LEARNED; ++ ol->idle_timeout = OFP_FLOW_PERMANENT; ++ ol->hard_timeout = OFP_FLOW_PERMANENT; ++ ol->priority = OFP_DEFAULT_PRIORITY; ++ ol->table_id = OFTABLE_CHK_LB_HAIRPIN_REPLY; ++ ol->cookie = htonll(cookie); ++ ++ /* Match on metadata of the packet that created the hairpin session. */ ++ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); ++ ++ ol_spec->dst.field = mf_from_id(MFF_METADATA); ++ ol_spec->dst.ofs = 0; ++ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; ++ ol_spec->n_bits = ol_spec->dst.n_bits; ++ ol_spec->dst_type = NX_LEARN_DST_MATCH; ++ ol_spec->src_type = NX_LEARN_SRC_FIELD; ++ ol_spec->src.field = mf_from_id(MFF_METADATA); ++ ++ /* Match on the same ETH type as the packet that created the hairpin ++ * session. ++ */ ++ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); ++ ol_spec->dst.field = mf_from_id(MFF_ETH_TYPE); ++ ol_spec->dst.ofs = 0; ++ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; ++ ol_spec->n_bits = ol_spec->dst.n_bits; ++ ol_spec->dst_type = NX_LEARN_DST_MATCH; ++ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; ++ union mf_value imm_eth_type = { ++ .be16 = !vip6 ? htons(ETH_TYPE_IP) : htons(ETH_TYPE_IPV6) ++ }; ++ mf_write_subfield_value(&ol_spec->dst, &imm_eth_type, &match); ++ ++ /* Push value last, as this may reallocate 'ol_spec'. */ ++ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); ++ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); ++ memcpy(src_imm, &imm_eth_type, imm_bytes); ++ ++ /* Hairpin replies have ip.src == . */ ++ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); ++ if (!vip6) { ++ ol_spec->dst.field = mf_from_id(MFF_IPV4_SRC); ++ ol_spec->src.field = mf_from_id(MFF_IPV4_SRC); ++ } else { ++ ol_spec->dst.field = mf_from_id(MFF_IPV6_SRC); ++ ol_spec->src.field = mf_from_id(MFF_IPV6_SRC); ++ } ++ ol_spec->dst.ofs = 0; ++ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; ++ ol_spec->n_bits = ol_spec->dst.n_bits; ++ ol_spec->dst_type = NX_LEARN_DST_MATCH; ++ ol_spec->src_type = NX_LEARN_SRC_FIELD; ++ ++ /* Hairpin replies have ip.dst == . */ ++ union mf_value imm_ip; ++ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); ++ if (!vip6) { ++ ol_spec->dst.field = mf_from_id(MFF_IPV4_DST); ++ imm_ip = (union mf_value) { ++ .be32 = vip ++ }; ++ } else { ++ ol_spec->dst.field = mf_from_id(MFF_IPV6_DST); ++ imm_ip = (union mf_value) { ++ .ipv6 = *vip6 ++ }; ++ } ++ ol_spec->dst.ofs = 0; ++ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; ++ ol_spec->n_bits = ol_spec->dst.n_bits; ++ ol_spec->dst_type = NX_LEARN_DST_MATCH; ++ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; ++ mf_write_subfield_value(&ol_spec->dst, &imm_ip, &match); ++ ++ /* Push value last, as this may reallocate 'ol_spec' */ ++ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); ++ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); ++ memcpy(src_imm, &imm_ip, imm_bytes); ++ ++ /* Hairpin replies have the same nw_proto as packets that created the ++ * session. ++ */ ++ union mf_value imm_proto = { ++ .u8 = lb_proto, ++ }; ++ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); ++ ol_spec->dst.field = mf_from_id(MFF_IP_PROTO); ++ ol_spec->src.field = mf_from_id(MFF_IP_PROTO); ++ ol_spec->dst.ofs = 0; ++ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; ++ ol_spec->n_bits = ol_spec->dst.n_bits; ++ ol_spec->dst_type = NX_LEARN_DST_MATCH; ++ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; ++ mf_write_subfield_value(&ol_spec->dst, &imm_proto, &match); ++ ++ /* Push value last, as this may reallocate 'ol_spec' */ ++ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); ++ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); ++ memcpy(src_imm, &imm_proto, imm_bytes); ++ ++ /* Hairpin replies have source port == . */ ++ if (has_l4_port) { ++ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); ++ switch (lb_proto) { ++ case IPPROTO_TCP: ++ ol_spec->dst.field = mf_from_id(MFF_TCP_SRC); ++ ol_spec->src.field = mf_from_id(MFF_TCP_DST); ++ break; ++ case IPPROTO_UDP: ++ ol_spec->dst.field = mf_from_id(MFF_UDP_SRC); ++ ol_spec->src.field = mf_from_id(MFF_UDP_DST); ++ break; ++ case IPPROTO_SCTP: ++ ol_spec->dst.field = mf_from_id(MFF_SCTP_SRC); ++ ol_spec->src.field = mf_from_id(MFF_SCTP_DST); ++ break; ++ default: ++ OVS_NOT_REACHED(); ++ break; ++ } ++ ol_spec->dst.ofs = 0; ++ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; ++ ol_spec->n_bits = ol_spec->dst.n_bits; ++ ol_spec->dst_type = NX_LEARN_DST_MATCH; ++ ol_spec->src_type = NX_LEARN_SRC_FIELD; ++ } ++ ++ /* Set MLF_LOOKUP_LB_HAIRPIN_BIT for hairpin replies. */ ++ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); ++ ol_spec->dst.field = mf_from_id(MFF_LOG_FLAGS); ++ ol_spec->dst.ofs = MLF_LOOKUP_LB_HAIRPIN_BIT; ++ ol_spec->dst.n_bits = 1; ++ ol_spec->n_bits = ol_spec->dst.n_bits; ++ ol_spec->dst_type = NX_LEARN_DST_LOAD; ++ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; ++ union mf_value imm_reg_value = { ++ .u8 = 1 ++ }; ++ mf_write_subfield_value(&ol_spec->dst, &imm_reg_value, &match); ++ ++ /* Push value last, as this may reallocate 'ol_spec' */ ++ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); ++ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); ++ memcpy(src_imm, &imm_reg_value, imm_bytes); ++ ++ ofpact_finish_LEARN(ofpacts, &ol); ++} ++ + static void + add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb, + struct ovn_lb_vip *lb_vip, +@@ -1180,14 +1352,12 @@ add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb, + { + uint64_t stub[1024 / 8]; + struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(stub); ++ struct match hairpin_match = MATCH_CATCHALL_INITIALIZER; + + uint8_t value = 1; + put_load(&value, sizeof value, MFF_LOG_FLAGS, + MLF_LOOKUP_LB_HAIRPIN_BIT, 1, &ofpacts); + +- struct match hairpin_match = MATCH_CATCHALL_INITIALIZER; +- struct match hairpin_reply_match = MATCH_CATCHALL_INITIALIZER; +- + if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { + ovs_be32 bip4 = in6_addr_get_mapped_ipv4(&lb_backend->ip); + ovs_be32 vip4 = lb->hairpin_snat_ips.n_ipv4_addrs +@@ -1198,9 +1368,10 @@ add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb, + match_set_nw_src(&hairpin_match, bip4); + match_set_nw_dst(&hairpin_match, bip4); + +- match_set_dl_type(&hairpin_reply_match, htons(ETH_TYPE_IP)); +- match_set_nw_src(&hairpin_reply_match, bip4); +- match_set_nw_dst(&hairpin_reply_match, vip4); ++ add_lb_vip_hairpin_reply_action(NULL, vip4, lb_proto, ++ lb_backend->port, ++ lb->slb->header_.uuid.parts[0], ++ &ofpacts); + } else { + struct in6_addr *bip6 = &lb_backend->ip; + struct in6_addr *vip6 = lb->hairpin_snat_ips.n_ipv6_addrs +@@ -1210,17 +1381,15 @@ add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb, + match_set_ipv6_src(&hairpin_match, bip6); + match_set_ipv6_dst(&hairpin_match, bip6); + +- match_set_dl_type(&hairpin_reply_match, htons(ETH_TYPE_IPV6)); +- match_set_ipv6_src(&hairpin_reply_match, bip6); +- match_set_ipv6_dst(&hairpin_reply_match, vip6); ++ add_lb_vip_hairpin_reply_action(vip6, 0, lb_proto, ++ lb_backend->port, ++ lb->slb->header_.uuid.parts[0], ++ &ofpacts); + } + + if (lb_backend->port) { + match_set_nw_proto(&hairpin_match, lb_proto); + match_set_tp_dst(&hairpin_match, htons(lb_backend->port)); +- +- match_set_nw_proto(&hairpin_reply_match, lb_proto); +- match_set_tp_src(&hairpin_reply_match, htons(lb_backend->port)); + } + + /* In the original direction, only match on traffic that was already +@@ -1241,17 +1410,6 @@ add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb, + ofctrl_add_flow(flow_table, OFTABLE_CHK_LB_HAIRPIN, 100, + lb->slb->header_.uuid.parts[0], &hairpin_match, + &ofpacts, &lb->slb->header_.uuid); +- +- for (size_t i = 0; i < lb->slb->n_datapaths; i++) { +- match_set_metadata(&hairpin_reply_match, +- htonll(lb->slb->datapaths[i]->tunnel_key)); +- +- ofctrl_add_flow(flow_table, OFTABLE_CHK_LB_HAIRPIN_REPLY, 100, +- lb->slb->header_.uuid.parts[0], +- &hairpin_reply_match, +- &ofpacts, &lb->slb->header_.uuid); +- } +- + ofpbuf_uninit(&ofpacts); + } + +diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at +index dd5d384..ff65d60 100644 +--- a/tests/ofproto-macros.at ++++ b/tests/ofproto-macros.at +@@ -12,7 +12,10 @@ strip_n_bytes () { + + # Strips 'cookie=...' from ovs-ofctl output. + strip_cookie () { +- sed 's/ cookie=0x[0-9a-fA-F]*,//' ++ sed ' ++s/ cookie=0x[0-9a-fA-F]*,// ++s/cookie=0x[0-9a-fA-F]*,// ++' + } + + # Strips out uninteresting parts of ovs-ofctl output, as well as parts +diff --git a/tests/ovn.at b/tests/ovn.at +index 14072ec..6c9bda0 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -20822,6 +20822,8 @@ ovn-sbctl dump-flows > sbflows + AT_CAPTURE_FILE([sbflows]) + > expected + ++AS_BOX([IPv4 TCP Hairpin]) ++ + # Inject IPv4 TCP packet from lsp. + tcp_payload=$(build_tcp_syn 84d0 1f90 05a7) + hp_tcp_payload=$(build_tcp_syn 84d0 0fc9 156e) +@@ -20835,9 +20837,15 @@ send_ipv4_pkt hv1 hv1-vif1 000000000001 000000000100 \ + # Check that traffic is hairpinned. + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) + ++# Check learned hairpin reply flows. ++OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [0], [dnl ++ table=69, tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] ++]) ++ + # Change LB Hairpin SNAT IP. + # Also flush conntrack to avoid reusing an existing entry. + as hv1 ovs-appctl dpctl/flush-conntrack ++ + ovn-nbctl --wait=hv set load_balancer lb-ipv4-tcp options:hairpin_snat_ip="88.88.88.87" + # Inject IPv4 TCP packet from lsp. + hp_tcp_payload=$(build_tcp_syn 84d0 0fc9 156f) +@@ -20851,6 +20859,13 @@ send_ipv4_pkt hv1 hv1-vif1 000000000001 000000000100 \ + # Check that traffic is hairpinned. + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) + ++# Check learned hairpin reply flows. ++OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [0], [dnl ++ table=69, tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] ++]) ++ ++AS_BOX([IPv4 UDP Hairpin]) ++ + # Inject IPv4 UDP packet from lsp. + udp_payload=$(build_udp 84d0 0fc8 6666) + hp_udp_payload=$(build_udp 84d0 07e5 6e49) +@@ -20864,6 +20879,12 @@ send_ipv4_pkt hv1 hv1-vif1 000000000001 000000000100 \ + # Check that traffic is hairpinned. + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) + ++# Check learned hairpin reply flows. ++OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [0], [dnl ++ table=69, tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] ++ table=69, udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++]) ++ + # Change LB Hairpin SNAT IP. + # Also flush conntrack to avoid reusing an existing entry. + as hv1 ovs-appctl dpctl/flush-conntrack +@@ -20880,6 +20901,14 @@ send_ipv4_pkt hv1 hv1-vif1 000000000001 000000000100 \ + # Check that traffic is hairpinned. + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) + ++# Check learned hairpin reply flows. ++OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [0], [dnl ++ table=69, tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] ++ table=69, udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++]) ++ ++AS_BOX([IPv6 TCP Hairpin]) ++ + # Inject IPv6 TCP packet from lsp. + tcp_payload=$(build_tcp_syn 84d0 1f90 3ff9) + hp_tcp_payload=$(build_tcp_syn 84d0 0fc9 4fc0) +@@ -20893,6 +20922,13 @@ send_ipv6_pkt hv1 hv1-vif1 000000000001 000000000100 \ + # Check that traffic is hairpinned. + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) + ++# Check learned hairpin reply flows. ++OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [0], [dnl ++ table=69, tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] ++ table=69, udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++ table=69, tcp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] ++]) ++ + # Change LB Hairpin SNAT IP. + # Also flush conntrack to avoid reusing an existing entry. + as hv1 ovs-appctl dpctl/flush-conntrack +@@ -20910,6 +20946,15 @@ send_ipv6_pkt hv1 hv1-vif1 000000000001 000000000100 \ + # Check that traffic is hairpinned. + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) + ++# Check learned hairpin reply flows. ++OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [0], [dnl ++ table=69, tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] ++ table=69, udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++ table=69, tcp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::87,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] ++]) ++ ++AS_BOX([IPv6 UDP Hairpin]) ++ + # Inject IPv6 UDP packet from lsp. + udp_payload=$(build_udp 84d0 0fc8 a0b8) + hp_udp_payload=$(build_udp 84d0 07e5 a89b) +@@ -20920,9 +20965,17 @@ send_ipv6_pkt hv1 hv1-vif1 000000000001 000000000100 \ + 88000000000000000000000000000088 ${hp_udp_payload} \ + expected + +-Check that traffic is hairpinned. ++# Check that traffic is hairpinned. + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) + ++# Check learned hairpin reply flows. ++OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [0], [dnl ++ table=69, tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] ++ table=69, udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++ table=69, tcp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::87,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] ++ table=69, udp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++]) ++ + # Change LB Hairpin SNAT IP. + # Also flush conntrack to avoid reusing an existing entry. + as hv1 ovs-appctl dpctl/flush-conntrack +@@ -20937,6 +20990,41 @@ send_ipv6_pkt hv1 hv1-vif1 000000000001 000000000100 \ + 88000000000000000000000000000087 ${hp_udp_payload} \ + expected + ++# Check learned hairpin reply flows. ++OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [0], [dnl ++ table=69, tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] ++ table=69, udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++ table=69, tcp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::87,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] ++ table=69, udp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::87,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++]) ++ ++AS_BOX([Delete VIP]) ++check ovn-nbctl --wait=hv set Load_Balancer lb-ipv4-tcp vips='"88.88.88.88:8080"=""' ++OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [0], [dnl ++ table=69, udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.87,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++ table=69, tcp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::87,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] ++ table=69, udp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::87,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++]) ++ ++AS_BOX([Delete LB]) ++check ovn-nbctl --wait=hv \ ++ -- lb-del lb-ipv4-tcp \ ++ -- lb-del lb-ipv4-udp ++ ++OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [0], [dnl ++ table=69, tcp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::87,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] ++ table=69, udp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::87,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++]) ++ ++check ovn-nbctl --wait=hv lb-del lb-ipv6-tcp ++OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [0], [dnl ++ table=69, udp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::87,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++]) ++ ++check ovn-nbctl --wait=hv lb-del lb-ipv6-udp ++OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=69 | ofctl_strip_all | grep -v NXST], [1], [dnl ++]) ++ + OVN_CLEANUP([hv1]) + AT_CLEANUP + +@@ -23188,93 +23276,79 @@ check ovn-nbctl lb-add lb-ipv4-udp 88.88.88.88:4040 42.42.42.1:2021 udp + check ovn-nbctl lb-add lb-ipv6-tcp [[8800::0088]]:8080 [[4200::1]]:4041 tcp + check ovn-nbctl --wait=hv lb-add lb-ipv6-udp [[8800::0088]]:4040 [[4200::1]]:2021 udp + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68], [0], [dnl +-NXST_FLOW reply (xid=0x8): ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST], [1], [dnl + ]) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=69], [0], [dnl +-NXST_FLOW reply (xid=0x8): ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=69 | grep -v NXST], [1], [dnl + ]) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70], [0], [dnl +-NXST_FLOW reply (xid=0x8): ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | grep -v NXST], [1], [dnl + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68], [0], [dnl +-NXST_FLOW reply (xid=0x8): ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST], [1], [dnl + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69], [0], [dnl +-NXST_FLOW reply (xid=0x8): ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST], [1], [dnl + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70], [0], [dnl +-NXST_FLOW reply (xid=0x8): ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | grep -v NXST], [1], [dnl + ]) + + check ovn-nbctl --wait=hv ls-lb-add sw0 lb-ipv4-tcp + + OVS_WAIT_UNTIL( +- [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | wc -l) -eq 1] ++ [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -c -v NXST) -eq 1] + ) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | cut -d ' ' -f8-], [0], [dnl +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) + ]) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=69 | grep -v NXST | cut -d ' ' -f8-], [0], [dnl +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69], [0], [dnl ++NXST_FLOW reply (xid=0x8): + ]) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8-], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68], [0], [dnl +-NXST_FLOW reply (xid=0x8): ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST], [1], [dnl + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69], [0], [dnl +-NXST_FLOW reply (xid=0x8): ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST], [1], [dnl + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70], [0], [dnl +-NXST_FLOW reply (xid=0x8): ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | grep -v NXST], [1], [dnl + ]) + + check ovn-nbctl lb-add lb-ipv4-tcp 88.88.88.90:8080 42.42.42.42:4041,52.52.52.52:4042 tcp + + OVS_WAIT_UNTIL( +- [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | wc -l) -eq 3] ++ [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -c -v NXST) -eq 3] + ) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) + ]) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=69 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.42,nw_dst=88.88.88.90,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=52.52.52.52,nw_dst=88.88.88.90,tp_src=4042 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69], [0], [dnl ++NXST_FLOW reply (xid=0x8): + ]) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8-], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68], [0], [dnl +-NXST_FLOW reply (xid=0x8): ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST], [1], [dnl + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69], [0], [dnl +-NXST_FLOW reply (xid=0x8): ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST], [1], [dnl + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70], [0], [dnl +-NXST_FLOW reply (xid=0x8): ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | grep -v NXST], [1], [dnl + ]) + + check ovn-nbctl lsp-add sw0 sw0-p2 +@@ -23282,192 +23356,159 @@ check ovn-nbctl lsp-add sw0 sw0-p2 + OVS_WAIT_UNTIL([test x$(ovn-nbctl lsp-get-up sw0-p2) = xup]) + + OVS_WAIT_UNTIL( +- [test $(as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | wc -l) -eq 3] ++ [test $(as hv2 ovs-ofctl dump-flows br-int table=68 | grep -c -v NXST) -eq 3] + ) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.42,nw_dst=88.88.88.90,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=52.52.52.52,nw_dst=88.88.88.90,tp_src=4042 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST], [1], [dnl + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8-], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + + check ovn-nbctl --wait=hv ls-lb-add sw0 lb-ipv4-udp + + OVS_WAIT_UNTIL( +- [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | wc -l) -eq 4] ++ [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -c -v NXST) -eq 4] + ) + + OVS_WAIT_UNTIL( +- [test $(as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | wc -l) -eq 4] ++ [test $(as hv2 ovs-ofctl dump-flows br-int table=68 | grep -c -v NXST) -eq 4] + ) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) + ]) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=69 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.42,nw_dst=88.88.88.90,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=52.52.52.52,nw_dst=88.88.88.90,tp_src=4042 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST], [1], [dnl + ]) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.42,nw_dst=88.88.88.90,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=52.52.52.52,nw_dst=88.88.88.90,tp_src=4042 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST], [1], [dnl + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + + check ovn-nbctl --wait=hv ls-lb-add sw0 lb-ipv6-tcp + + OVS_WAIT_UNTIL( +- [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | wc -l) -eq 5] ++ [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -c -v NXST) -eq 5] + ) + + OVS_WAIT_UNTIL( +- [test $(as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | wc -l) -eq 5] ++ [test $(as hv2 ovs-ofctl dump-flows br-int table=68 | grep -c -v NXST) -eq 5] + ) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x86dd,NXM_NX_IPV6_SRC[[]],ipv6_dst=8800::88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) + ]) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=69 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.42,nw_dst=88.88.88.90,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=52.52.52.52,nw_dst=88.88.88.90,tp_src=4042 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST], [1], [dnl + ]) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=70, priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x86dd,NXM_NX_IPV6_SRC[[]],ipv6_dst=8800::88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.42,nw_dst=88.88.88.90,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=52.52.52.52,nw_dst=88.88.88.90,tp_src=4042 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST], [1], [dnl + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=70, priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + + check ovn-nbctl --wait=hv ls-lb-add sw0 lb-ipv6-udp + + OVS_WAIT_UNTIL( +- [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | wc -l) -eq 6] ++ [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -c -v NXST) -eq 6] + ) + + OVS_WAIT_UNTIL( +- [test $(as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | wc -l) -eq 6] ++ [test $(as hv2 ovs-ofctl dump-flows br-int table=68 | grep -c -v NXST) -eq 6] + ) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,udp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x86dd,NXM_NX_IPV6_SRC[[]],ipv6_dst=8800::88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,udp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x86dd,NXM_NX_IPV6_SRC[[]],ipv6_dst=8800::88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) + ]) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=69 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.42,nw_dst=88.88.88.90,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=52.52.52.52,nw_dst=88.88.88.90,tp_src=4042 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,udp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST], [1], [dnl + ]) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=70, priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,udp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x86dd,NXM_NX_IPV6_SRC[[]],ipv6_dst=8800::88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,udp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x86dd,NXM_NX_IPV6_SRC[[]],ipv6_dst=8800::88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.42,nw_dst=88.88.88.90,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=52.52.52.52,nw_dst=88.88.88.90,tp_src=4042 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,udp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST], [1], [dnl + ]) + +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=70, priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + + check ovn-nbctl --wait=hv ls-lb-add sw1 lb-ipv6-udp +@@ -23475,67 +23516,53 @@ check ovn-nbctl --wait=hv ls-lb-add sw1 lb-ipv6-udp + # Number of hairpin flows shouldn't change as it doesn't depend on how many + # datapaths the LB is applied. + OVS_WAIT_UNTIL( +- [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | wc -l) -eq 6] ++ [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -c -v NXST) -eq 6] + ) + + OVS_WAIT_UNTIL( +- [test $(as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | wc -l) -eq 6] ++ [test $(as hv2 ovs-ofctl dump-flows br-int table=68 | grep -c -v NXST) -eq 6] + ) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,udp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]] +-]) +- +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=69 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.42,nw_dst=88.88.88.90,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=52.52.52.52,nw_dst=88.88.88.90,tp_src=4042 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,udp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,udp6,metadata=0x2,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] +-]) +- +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x2 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) +-]) +- +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,tcp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,udp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]] +-]) +- +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=42.42.42.42,nw_dst=88.88.88.90,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp,metadata=0x1,nw_src=52.52.52.52,nw_dst=88.88.88.90,tp_src=4042 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,tcp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,udp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,udp6,metadata=0x2,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] +-]) +- +-AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x2 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x86dd,NXM_NX_IPV6_SRC[[]],ipv6_dst=8800::88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,udp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x86dd,NXM_NX_IPV6_SRC[[]],ipv6_dst=8800::88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++]) ++ ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST], [1], [dnl ++]) ++ ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=70, priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x2 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) ++]) ++ ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=68 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=42.42.42.42,nw_dst=42.42.42.42,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp,nw_src=52.52.52.52,nw_dst=52.52.52.52,tp_dst=4042 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.90,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,tcp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x86dd,NXM_NX_IPV6_SRC[[]],ipv6_dst=8800::88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,udp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x86dd,NXM_NX_IPV6_SRC[[]],ipv6_dst=8800::88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++]) ++ ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST], [1], [dnl ++]) ++ ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=70 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=70, priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x2 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.90,ct_nw_proto=6,ct_tp_dst=8080,tcp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.90)) + ]) + + as hv2 ovs-vsctl del-port hv2-vif1 +@@ -23545,76 +23572,73 @@ OVS_WAIT_UNTIL([test x$(ovn-nbctl lsp-get-up sw0-p2) = xdown]) + as hv2 ovn-appctl -t ovn-controller recompute + + OVS_WAIT_UNTIL( +- [test $(as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | wc -l) -eq 0] ++ [test $(as hv2 ovs-ofctl dump-flows br-int table=68 | grep -c -v NXST) -eq 0] + ) + + OVS_WAIT_UNTIL( +- [test $(as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST | wc -l) -eq 0] ++ [test $(as hv2 ovs-ofctl dump-flows br-int table=69 | grep -c -v NXST) -eq 0] + ) + + OVS_WAIT_UNTIL( +- [test $(as hv2 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | wc -l) -eq 0] ++ [test $(as hv2 ovs-ofctl dump-flows br-int table=70 | grep -c -v NXST) -eq 0] + ) + + OVS_WAIT_UNTIL( +- [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | wc -l) -eq 6] ++ [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -c -v NXST) -eq 6] + ) + + check ovn-nbctl --wait=hv lb-del lb-ipv4-tcp + + OVS_WAIT_UNTIL( +- [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | wc -l) -eq 3] ++ [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -c -v NXST) -eq 3] + ) + + OVS_WAIT_UNTIL( +- [test $(as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | wc -l) -eq 0] ++ [test $(as hv2 ovs-ofctl dump-flows br-int table=68 | grep -c -v NXST) -eq 0] + ) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_label=0x2/0x2,tcp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,ct_label=0x2/0x2,udp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=68, priority=100,ct_label=0x2/0x2,tcp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x86dd,NXM_NX_IPV6_SRC[[]],ipv6_dst=8800::88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) ++ table=68, priority=100,ct_label=0x2/0x2,udp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x86dd,NXM_NX_IPV6_SRC[[]],ipv6_dst=8800::88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]]) + ]) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=69 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,tcp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=4041 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,udp,metadata=0x1,nw_src=42.42.42.1,nw_dst=88.88.88.88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,udp6,metadata=0x1,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] +-priority=100,udp6,metadata=0x2,ipv6_src=4200::1,ipv6_dst=8800::88,tp_src=2021 actions=load:0x1->NXM_NX_REG10[[7]] ++AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=69], [0], [dnl ++NXST_FLOW reply (xid=0x8): + ]) + +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | cut -d ' ' -f8- | sort], [0], [dnl +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x2 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) +-priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | ofctl_strip_all | grep -v NXST | sort], [0], [dnl ++ table=70, priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=17,ct_tp_dst=4040,udp6,metadata=0x2 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_ipv6_dst=8800::88,ct_nw_proto=6,ct_tp_dst=8080,tcp6,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=8800::88)) ++ table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp,metadata=0x1 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88)) + ]) + + check ovn-nbctl --wait=hv ls-del sw0 + check ovn-nbctl --wait=hv ls-del sw1 + + OVS_WAIT_UNTIL( +- [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | wc -l) -eq 0] ++ [test $(as hv1 ovs-ofctl dump-flows br-int table=68 | grep -c -v NXST) -eq 0] + ) + + OVS_WAIT_UNTIL( +- [test $(as hv1 ovs-ofctl dump-flows br-int table=69 | grep -v NXST | wc -l) -eq 0] ++ [test $(as hv1 ovs-ofctl dump-flows br-int table=69 | grep -c -v NXST) -eq 0] + ) + + OVS_WAIT_UNTIL( +- [test $(as hv1 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | wc -l) -eq 0] ++ [test $(as hv1 ovs-ofctl dump-flows br-int table=70 | grep -c -v NXST) -eq 0] + ) + + OVS_WAIT_UNTIL( +- [test $(as hv2 ovs-ofctl dump-flows br-int table=68 | grep -v NXST | wc -l) -eq 0] ++ [test $(as hv2 ovs-ofctl dump-flows br-int table=68 | grep -c -v NXST) -eq 0] + ) + + OVS_WAIT_UNTIL( +- [test $(as hv2 ovs-ofctl dump-flows br-int table=69 | grep -v NXST | wc -l) -eq 0] ++ [test $(as hv2 ovs-ofctl dump-flows br-int table=69 | grep -c -v NXST) -eq 0] + ) + + OVS_WAIT_UNTIL( +- [test $(as hv2 ovs-ofctl dump-flows br-int table=70 | grep -v NXST | wc -l) -eq 0] ++ [test $(as hv2 ovs-ofctl dump-flows br-int table=70 | grep -c -v NXST) -eq 0] + ) + + OVN_CLEANUP([hv1], [hv2]) +-- +1.8.3.1 + diff --git a/SOURCES/0002-ovn-nbctl-add-ecmp-ecmp-symmetric-reply-to-lr-route-.patch b/SOURCES/0002-ovn-nbctl-add-ecmp-ecmp-symmetric-reply-to-lr-route-.patch new file mode 100644 index 0000000..b51020c --- /dev/null +++ b/SOURCES/0002-ovn-nbctl-add-ecmp-ecmp-symmetric-reply-to-lr-route-.patch @@ -0,0 +1,212 @@ +From 5e1bb597df512510dc82ce47f9b65a02e2fb5c0b Mon Sep 17 00:00:00 2001 +Message-Id: <5e1bb597df512510dc82ce47f9b65a02e2fb5c0b.1611833004.git.lorenzo.bianconi@redhat.com> +In-Reply-To: <8770192b3b4732e02679f723ea5903a515c6bd8a.1611833004.git.lorenzo.bianconi@redhat.com> +References: <8770192b3b4732e02679f723ea5903a515c6bd8a.1611833004.git.lorenzo.bianconi@redhat.com> +From: Lorenzo Bianconi +Date: Mon, 25 Jan 2021 14:51:13 +0100 +Subject: [PATCH 2/2] ovn-nbctl: add ecmp/ecmp-symmetric-reply to lr-route-list + command + +Explicitly add ecmp/ecmp-symmetric-reply info to ovn-nbctl +lr-route-list command + +https://bugzilla.redhat.com/show_bug.cgi?id=1915958 + +Acked-by: Mark Michelson +Signed-off-by: Lorenzo Bianconi +Signed-off-by: Numan Siddique +--- + tests/ovn-nbctl.at | 36 ++++++++++++++--------- + utilities/ovn-nbctl.c | 67 +++++++++++++++++++++++++++++++++++-------- + 2 files changed, 78 insertions(+), 25 deletions(-) + +--- a/tests/ovn-nbctl.at ++++ b/tests/ovn-nbctl.at +@@ -1544,27 +1544,27 @@ AT_CHECK([ovn-nbctl --ecmp lr-route-add + AT_CHECK([ovn-nbctl --ecmp lr-route-add lr0 10.0.0.0/24 11.0.0.3 lp0]) + AT_CHECK([ovn-nbctl lr-route-list lr0], [0], [dnl + IPv4 Routes +- 10.0.0.0/24 11.0.0.1 dst-ip +- 10.0.0.0/24 11.0.0.2 dst-ip +- 10.0.0.0/24 11.0.0.2 dst-ip +- 10.0.0.0/24 11.0.0.3 dst-ip +- 10.0.0.0/24 11.0.0.3 dst-ip lp0 ++ 10.0.0.0/24 11.0.0.1 dst-ip ecmp ++ 10.0.0.0/24 11.0.0.2 dst-ip ecmp ++ 10.0.0.0/24 11.0.0.2 dst-ip ecmp ++ 10.0.0.0/24 11.0.0.3 dst-ip ecmp ++ 10.0.0.0/24 11.0.0.3 dst-ip lp0 ecmp + ]) + + dnl Delete ecmp routes + AT_CHECK([ovn-nbctl lr-route-del lr0 10.0.0.0/24 11.0.0.1]) + AT_CHECK([ovn-nbctl lr-route-list lr0], [0], [dnl + IPv4 Routes +- 10.0.0.0/24 11.0.0.2 dst-ip +- 10.0.0.0/24 11.0.0.2 dst-ip +- 10.0.0.0/24 11.0.0.3 dst-ip +- 10.0.0.0/24 11.0.0.3 dst-ip lp0 ++ 10.0.0.0/24 11.0.0.2 dst-ip ecmp ++ 10.0.0.0/24 11.0.0.2 dst-ip ecmp ++ 10.0.0.0/24 11.0.0.3 dst-ip ecmp ++ 10.0.0.0/24 11.0.0.3 dst-ip lp0 ecmp + ]) + AT_CHECK([ovn-nbctl lr-route-del lr0 10.0.0.0/24 11.0.0.2]) + AT_CHECK([ovn-nbctl lr-route-list lr0], [0], [dnl + IPv4 Routes +- 10.0.0.0/24 11.0.0.3 dst-ip +- 10.0.0.0/24 11.0.0.3 dst-ip lp0 ++ 10.0.0.0/24 11.0.0.3 dst-ip ecmp ++ 10.0.0.0/24 11.0.0.3 dst-ip lp0 ecmp + ]) + AT_CHECK([ovn-nbctl lr-route-del lr0 10.0.0.0/24 11.0.0.3 lp0]) + AT_CHECK([ovn-nbctl lr-route-list lr0], [0], [dnl +@@ -1605,7 +1605,12 @@ AT_CHECK([ovn-nbctl lr-route-add lr0 10. + AT_CHECK([ovn-nbctl lr-route-add lr0 10.0.0.1/24 11.0.0.1]) + AT_CHECK([ovn-nbctl lr-route-add lr0 0:0:0:0:0:0:0:0/0 2001:0db8:0:f101::1]) + AT_CHECK([ovn-nbctl lr-route-add lr0 2001:0db8:0::/64 2001:0db8:0:f102::1 lp0]) +-AT_CHECK([ovn-nbctl lr-route-add lr0 2001:0db8:1::/64 2001:0db8:0:f103::1]) ++AT_CHECK([ovn-nbctl --ecmp lr-route-add lr0 2001:0db8:1::/64 2001:0db8:0:f103::1]) ++AT_CHECK([ovn-nbctl --ecmp lr-route-add lr0 2001:0db8:1::/64 2001:0db8:0:f103::2]) ++AT_CHECK([ovn-nbctl --ecmp lr-route-add lr0 2001:0db8:1::/64 2001:0db8:0:f103::3]) ++AT_CHECK([ovn-nbctl --ecmp lr-route-add lr0 2001:0db8:1::/64 2001:0db8:0:f103::4]) ++AT_CHECK([ovn-nbctl lr-route-add lr0 2002:0db8:1::/64 2001:0db8:0:f103::5]) ++AT_CHECK([ovn-nbctl --ecmp-symmetric-reply lr-route-add lr0 2003:0db8:1::/64 2001:0db8:0:f103::6]) + + AT_CHECK([ovn-nbctl lr-route-list lr0], [0], [dnl + IPv4 Routes +@@ -1615,7 +1620,12 @@ IPv4 Routes + + IPv6 Routes + 2001:db8::/64 2001:db8:0:f102::1 dst-ip lp0 +- 2001:db8:1::/64 2001:db8:0:f103::1 dst-ip ++ 2001:db8:1::/64 2001:db8:0:f103::1 dst-ip ecmp ++ 2001:db8:1::/64 2001:db8:0:f103::2 dst-ip ecmp ++ 2001:db8:1::/64 2001:db8:0:f103::3 dst-ip ecmp ++ 2001:db8:1::/64 2001:db8:0:f103::4 dst-ip ecmp ++ 2002:db8:1::/64 2001:db8:0:f103::5 dst-ip ++ 2003:db8:1::/64 2001:db8:0:f103::6 dst-ip ecmp-symmetric-reply + ::/0 2001:db8:0:f101::1 dst-ip + ]) + +--- a/utilities/ovn-nbctl.c ++++ b/utilities/ovn-nbctl.c +@@ -5443,16 +5443,26 @@ struct ipv4_route { + }; + + static int ++__ipv4_route_cmp(const struct ipv4_route *r1, const struct ipv4_route *r2) ++{ ++ if (r1->priority != r2->priority) { ++ return r1->priority > r2->priority ? -1 : 1; ++ } ++ if (r1->addr != r2->addr) { ++ return ntohl(r1->addr) < ntohl(r2->addr) ? -1 : 1; ++ } ++ return 0; ++} ++ ++static int + ipv4_route_cmp(const void *route1_, const void *route2_) + { + const struct ipv4_route *route1p = route1_; + const struct ipv4_route *route2p = route2_; + +- if (route1p->priority != route2p->priority) { +- return route1p->priority > route2p->priority ? -1 : 1; +- } +- if (route1p->addr != route2p->addr) { +- return ntohl(route1p->addr) < ntohl(route2p->addr) ? -1 : 1; ++ int ret = __ipv4_route_cmp(route1p, route2p); ++ if (ret) { ++ return ret; + } + return route_cmp_details(route1p->route, route2p->route); + } +@@ -5464,15 +5474,21 @@ struct ipv6_route { + }; + + static int ++__ipv6_route_cmp(const struct ipv6_route *r1, const struct ipv6_route *r2) ++{ ++ if (r1->priority != r2->priority) { ++ return r1->priority > r2->priority ? -1 : 1; ++ } ++ return memcmp(&r1->addr, &r2->addr, sizeof(r1->addr)); ++} ++ ++static int + ipv6_route_cmp(const void *route1_, const void *route2_) + { + const struct ipv6_route *route1p = route1_; + const struct ipv6_route *route2p = route2_; + +- if (route1p->priority != route2p->priority) { +- return route1p->priority > route2p->priority ? -1 : 1; +- } +- int ret = memcmp(&route1p->addr, &route2p->addr, sizeof(route1p->addr)); ++ int ret = __ipv6_route_cmp(route1p, route2p); + if (ret) { + return ret; + } +@@ -5480,7 +5496,8 @@ ipv6_route_cmp(const void *route1_, cons + } + + static void +-print_route(const struct nbrec_logical_router_static_route *route, struct ds *s) ++print_route(const struct nbrec_logical_router_static_route *route, ++ struct ds *s, bool ecmp) + { + + char *prefix = normalize_prefix_str(route->ip_prefix); +@@ -5503,6 +5520,14 @@ print_route(const struct nbrec_logical_r + ds_put_format(s, " (learned)"); + } + ++ if (ecmp) { ++ ds_put_cstr(s, " ecmp"); ++ } ++ ++ if (smap_get_bool(&route->options, "ecmp_symmetric_reply", false)) { ++ ds_put_cstr(s, " ecmp-symmetric-reply"); ++ } ++ + if (route->bfd) { + ds_put_cstr(s, " bfd"); + } +@@ -5572,7 +5597,16 @@ nbctl_lr_route_list(struct ctl_context * + ds_put_cstr(&ctx->output, "IPv4 Routes\n"); + } + for (int i = 0; i < n_ipv4_routes; i++) { +- print_route(ipv4_routes[i].route, &ctx->output); ++ bool ecmp = false; ++ if (i < n_ipv4_routes - 1 && ++ !__ipv4_route_cmp(&ipv4_routes[i], &ipv4_routes[i + 1])) { ++ ecmp = true; ++ } else if (i > 0 && ++ !__ipv4_route_cmp(&ipv4_routes[i], ++ &ipv4_routes[i - 1])) { ++ ecmp = true; ++ } ++ print_route(ipv4_routes[i].route, &ctx->output, ecmp); + } + + if (n_ipv6_routes) { +@@ -5580,7 +5614,16 @@ nbctl_lr_route_list(struct ctl_context * + n_ipv4_routes ? "\n" : ""); + } + for (int i = 0; i < n_ipv6_routes; i++) { +- print_route(ipv6_routes[i].route, &ctx->output); ++ bool ecmp = false; ++ if (i < n_ipv6_routes - 1 && ++ !__ipv6_route_cmp(&ipv6_routes[i], &ipv6_routes[i + 1])) { ++ ecmp = true; ++ } else if (i > 0 && ++ !__ipv6_route_cmp(&ipv6_routes[i], ++ &ipv6_routes[i - 1])) { ++ ecmp = true; ++ } ++ print_route(ipv6_routes[i].route, &ctx->output, ecmp); + } + + free(ipv4_routes); diff --git a/SOURCES/0002-ovn-northd-Move-DHCP-Options-and-Response-to-a-funct.patch b/SOURCES/0002-ovn-northd-Move-DHCP-Options-and-Response-to-a-funct.patch new file mode 100644 index 0000000..d4ba4c9 --- /dev/null +++ b/SOURCES/0002-ovn-northd-Move-DHCP-Options-and-Response-to-a-funct.patch @@ -0,0 +1,144 @@ +From e513bafe5718f42844f41d248ddf1777b71aaa50 Mon Sep 17 00:00:00 2001 +Message-Id: +In-Reply-To: +References: +From: Anton Ivanov +Date: Tue, 5 Jan 2021 17:49:29 +0000 +Subject: [PATCH 02/16] ovn-northd: Move DHCP Options and Response to a + function. + +Signed-off-by: Anton Ivanov +Signed-off-by: Numan Siddique +Signed-off-by: Lorenzo Bianconi +--- + northd/ovn-northd.c | 100 ++++++++++++++++++++++---------------------- + 1 file changed, 51 insertions(+), 49 deletions(-) + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index d17cc55ac..a5b28584f 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -6780,55 +6780,6 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, + struct ovn_datapath *od; + struct ovn_port *op; + +- +- /* Logical switch ingress table 14 and 15: DHCP options and response +- * priority 100 flows. */ +- HMAP_FOR_EACH (op, key_node, ports) { +- if (!op->nbsp) { +- continue; +- } +- +- if (!lsp_is_enabled(op->nbsp) || lsp_is_router(op->nbsp)) { +- /* Don't add the DHCP flows if the port is not enabled or if the +- * port is a router port. */ +- continue; +- } +- +- if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) { +- /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport. +- */ +- continue; +- } +- +- bool is_external = lsp_is_external(op->nbsp); +- if (is_external && (!op->od->n_localnet_ports || +- !op->nbsp->ha_chassis_group)) { +- /* If it's an external port and there are no localnet ports +- * and if it doesn't belong to an HA chassis group ignore it. */ +- continue; +- } +- +- for (size_t i = 0; i < op->n_lsp_addrs; i++) { +- if (is_external) { +- for (size_t j = 0; j < op->od->n_localnet_ports; j++) { +- build_dhcpv4_options_flows( +- op, &op->lsp_addrs[i], +- op->od->localnet_ports[j]->json_key, is_external, +- lflows); +- build_dhcpv6_options_flows( +- op, &op->lsp_addrs[i], +- op->od->localnet_ports[j]->json_key, is_external, +- lflows); +- } +- } else { +- build_dhcpv4_options_flows(op, &op->lsp_addrs[i], op->json_key, +- is_external, lflows); +- build_dhcpv6_options_flows(op, &op->lsp_addrs[i], op->json_key, +- is_external, lflows); +- } +- } +- } +- + /* Logical switch ingress table 17 and 18: DNS lookup and response + * priority 100 flows. + */ +@@ -7484,6 +7435,55 @@ build_lswitch_arp_nd_service_monitor(struct ovn_northd_lb *lb, + } + + ++/* Logical switch ingress table 14 and 15: DHCP options and response ++ * priority 100 flows. */ ++static void ++build_lswitch_dhcp_options_and_response(struct ovn_port *op, ++ struct hmap *lflows) ++{ ++ if (op->nbsp) { ++ if (!lsp_is_enabled(op->nbsp) || lsp_is_router(op->nbsp)) { ++ /* Don't add the DHCP flows if the port is not enabled or if the ++ * port is a router port. */ ++ return; ++ } ++ ++ if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) { ++ /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport. ++ */ ++ return; ++ } ++ ++ bool is_external = lsp_is_external(op->nbsp); ++ if (is_external && (!op->od->n_localnet_ports || ++ !op->nbsp->ha_chassis_group)) { ++ /* If it's an external port and there are no localnet ports ++ * and if it doesn't belong to an HA chassis group ignore it. */ ++ return; ++ } ++ ++ for (size_t i = 0; i < op->n_lsp_addrs; i++) { ++ if (is_external) { ++ for (size_t j = 0; j < op->od->n_localnet_ports; j++) { ++ build_dhcpv4_options_flows( ++ op, &op->lsp_addrs[i], ++ op->od->localnet_ports[j]->json_key, is_external, ++ lflows); ++ build_dhcpv6_options_flows( ++ op, &op->lsp_addrs[i], ++ op->od->localnet_ports[j]->json_key, is_external, ++ lflows); ++ } ++ } else { ++ build_dhcpv4_options_flows(op, &op->lsp_addrs[i], op->json_key, ++ is_external, lflows); ++ build_dhcpv6_options_flows(op, &op->lsp_addrs[i], op->json_key, ++ is_external, lflows); ++ } ++ } ++ } ++} ++ + /* Returns a string of the IP address of the router port 'op' that + * overlaps with 'ip_s". If one is not found, returns NULL. + * +@@ -11371,6 +11371,8 @@ build_lswitch_and_lrouter_iterate_by_op(struct ovn_port *op, + lsi->ports, + &lsi->actions, + &lsi->match); ++ build_lswitch_dhcp_options_and_response(op,lsi->lflows); ++ + /* Build Logical Router Flows. */ + build_adm_ctrl_flows_for_lrouter_port(op, lsi->lflows, &lsi->match, + &lsi->actions); +-- +2.29.2 + diff --git a/SOURCES/0003-binding-Set-Logical_Switch_Port.up-when-all-OVS-flow.patch b/SOURCES/0003-binding-Set-Logical_Switch_Port.up-when-all-OVS-flow.patch new file mode 100644 index 0000000..c66c5be --- /dev/null +++ b/SOURCES/0003-binding-Set-Logical_Switch_Port.up-when-all-OVS-flow.patch @@ -0,0 +1,483 @@ +From 3719a1add73b860c50d85fad0b270c1b69fb9147 Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Wed, 13 Jan 2021 10:23:32 +0100 +Subject: [PATCH 3/3] binding: Set Logical_Switch_Port.up when all OVS flows + are installed. + +Using the ofctrl-seqno generic barrier, register a new type of +notifications for Port_Bindings. This allows us to delay setting +the Logical_Switch_Port.up field until all OVS flows corresponding +to the logical port and underlying OVS interface have been installed. + +This commit also marks the OVS interface as "installed by OVN" by +setting a new "ovn-installed" external-id in the OVS Interface record +when the port is fully wired by OVN. + +Signed-off-by: Dumitru Ceara +Acked-by: Mark Michelson +Signed-off-by: Numan Siddique +(cherry picked from upstream master commit 4d3cb42b076bb58fd8f01ab8ad146ffd539f2152) + +Change-Id: Id8e4fc3b28110cab2571d4ff8bc5ef81ae9b88c4 +--- + NEWS | 6 ++ + controller/binding.c | 181 ++++++++++++++++++++++++++++++++++++++++++++ + controller/binding.h | 5 ++ + controller/ovn-controller.c | 14 +++- + northd/ovn-northd.c | 6 +- + ovn-sb.ovsschema | 5 +- + ovn-sb.xml | 8 ++ + tests/ovn.at | 32 ++++++++ + utilities/ovn-sbctl.c | 4 + + 9 files changed, 254 insertions(+), 7 deletions(-) + +diff --git a/NEWS b/NEWS +index 7d2ba56..e89c5f4 100644 +--- a/NEWS ++++ b/NEWS +@@ -4,6 +4,12 @@ Post-v20.12.0 + - BFD protocol support according to RFC880 [0]. Introduce next-hop BFD + availability check for OVN static routes. + [0] https://tools.ietf.org/html/rfc5880) ++ - Change the semantic of the "Logical_Switch_Port.up" field such that it is ++ set to "true" only when all corresponding OVS openflow operations have ++ been processed. This also introduces a new "OVS.Interface.external-id", ++ "ovn-installed". This external-id is set by ovn-controller only after all ++ openflow operations corresponding to the OVS interface being added have ++ been processed. + + OVN v20.12.0 - 18 Dec 2020 + -------------------------- +diff --git a/controller/binding.c b/controller/binding.c +index c8e8591..d44f635 100644 +--- a/controller/binding.c ++++ b/controller/binding.c +@@ -18,6 +18,7 @@ + #include "ha-chassis.h" + #include "lflow.h" + #include "lport.h" ++#include "ofctrl-seqno.h" + #include "patch.h" + + #include "lib/bitmap.h" +@@ -34,6 +35,38 @@ + + VLOG_DEFINE_THIS_MODULE(binding); + ++/* External ID to be set in the OVS.Interface record when the OVS interface ++ * is ready for use, i.e., is bound to an OVN port and its corresponding ++ * flows have been installed. ++ */ ++#define OVN_INSTALLED_EXT_ID "ovn-installed" ++ ++/* Set of OVS interface IDs that have been released in the most recent ++ * processing iterations. This gets updated in release_lport() and is ++ * periodically emptied in binding_seqno_run(). ++ */ ++static struct sset binding_iface_released_set = ++ SSET_INITIALIZER(&binding_iface_released_set); ++ ++/* Set of OVS interface IDs that have been bound in the most recent ++ * processing iterations. This gets updated in release_lport() and is ++ * periodically emptied in binding_seqno_run(). ++ */ ++static struct sset binding_iface_bound_set = ++ SSET_INITIALIZER(&binding_iface_bound_set); ++ ++static void ++binding_iface_released_add(const char *iface_id) ++{ ++ sset_add(&binding_iface_released_set, iface_id); ++} ++ ++static void ++binding_iface_bound_add(const char *iface_id) ++{ ++ sset_add(&binding_iface_bound_set, iface_id); ++} ++ + #define OVN_QOS_TYPE "linux-htb" + + struct qos_queue { +@@ -845,6 +878,7 @@ claim_lport(const struct sbrec_port_binding *pb, + return false; + } + ++ binding_iface_bound_add(pb->logical_port); + if (pb->chassis) { + VLOG_INFO("Changing chassis for lport %s from %s to %s.", + pb->logical_port, pb->chassis->name, +@@ -908,7 +942,9 @@ release_lport(const struct sbrec_port_binding *pb, bool sb_readonly, + sbrec_port_binding_set_virtual_parent(pb, NULL); + } + ++ sbrec_port_binding_set_up(pb, NULL, 0); + update_lport_tracking(pb, tracked_datapaths); ++ binding_iface_released_add(pb->logical_port); + VLOG_INFO("Releasing lport %s from this chassis.", pb->logical_port); + return true; + } +@@ -2358,3 +2394,148 @@ delete_done: + destroy_qos_map(&qos_map); + return handled; + } ++ ++/* Registered ofctrl seqno type for port_binding flow installation. */ ++static size_t binding_seq_type_pb_cfg; ++ ++/* Binding specific seqno to be acked by ofctrl when flows for new interfaces ++ * have been installed. ++ */ ++static uint32_t binding_iface_seqno = 0; ++ ++/* Map indexed by iface-id containing the sequence numbers that when acked ++ * indicate that the OVS flows for the iface-id have been installed. ++ */ ++static struct simap binding_iface_seqno_map = ++ SIMAP_INITIALIZER(&binding_iface_seqno_map); ++ ++void ++binding_init(void) ++{ ++ binding_seq_type_pb_cfg = ofctrl_seqno_add_type(); ++} ++ ++/* Processes new release/bind operations OVN ports. For newly bound ports ++ * it creates ofctrl seqno update requests that will be acked when ++ * corresponding OVS flows have been installed. ++ * ++ * NOTE: Should be called only when valid SB and OVS transactions are ++ * available. ++ */ ++void ++binding_seqno_run(struct shash *local_bindings) ++{ ++ const char *iface_id; ++ const char *iface_id_next; ++ ++ SSET_FOR_EACH_SAFE (iface_id, iface_id_next, &binding_iface_released_set) { ++ struct shash_node *lb_node = shash_find(local_bindings, iface_id); ++ ++ /* If the local binding still exists (i.e., the OVS interface is ++ * still configured locally) then remove the external id and remove ++ * it from the in-flight seqno map. ++ */ ++ if (lb_node) { ++ struct local_binding *lb = lb_node->data; ++ ++ if (lb->iface && smap_get(&lb->iface->external_ids, ++ OVN_INSTALLED_EXT_ID)) { ++ ovsrec_interface_update_external_ids_delkey( ++ lb->iface, OVN_INSTALLED_EXT_ID); ++ } ++ } ++ simap_find_and_delete(&binding_iface_seqno_map, iface_id); ++ sset_delete(&binding_iface_released_set, ++ SSET_NODE_FROM_NAME(iface_id)); ++ } ++ ++ bool new_ifaces = false; ++ uint32_t new_seqno = binding_iface_seqno + 1; ++ ++ SSET_FOR_EACH_SAFE (iface_id, iface_id_next, &binding_iface_bound_set) { ++ struct shash_node *lb_node = shash_find(local_bindings, iface_id); ++ ++ if (lb_node) { ++ /* This is a newly bound interface, make sure we reset the ++ * Port_Binding 'up' field and the OVS Interface 'external-id'. ++ */ ++ struct local_binding *lb = lb_node->data; ++ ++ ovs_assert(lb->pb && lb->iface); ++ new_ifaces = true; ++ ++ if (smap_get(&lb->iface->external_ids, OVN_INSTALLED_EXT_ID)) { ++ ovsrec_interface_update_external_ids_delkey( ++ lb->iface, OVN_INSTALLED_EXT_ID); ++ } ++ sbrec_port_binding_set_up(lb->pb, NULL, 0); ++ simap_put(&binding_iface_seqno_map, lb->name, new_seqno); ++ } ++ sset_delete(&binding_iface_bound_set, SSET_NODE_FROM_NAME(iface_id)); ++ } ++ ++ /* Request a seqno update when the flows for new interfaces have been ++ * installed in OVS. ++ */ ++ if (new_ifaces) { ++ binding_iface_seqno = new_seqno; ++ ofctrl_seqno_update_create(binding_seq_type_pb_cfg, new_seqno); ++ } ++} ++ ++/* Processes ofctrl seqno ACKs for new bindings. Sets the ++ * 'OVN_INSTALLED_EXT_ID' external-id in the OVS interface and the ++ * Port_Binding.up field for all ports for which OVS flows have been ++ * installed. ++ * ++ * NOTE: Should be called only when valid SB and OVS transactions are ++ * available. ++ */ ++void ++binding_seqno_install(struct shash *local_bindings) ++{ ++ struct ofctrl_acked_seqnos *acked_seqnos = ++ ofctrl_acked_seqnos_get(binding_seq_type_pb_cfg); ++ struct simap_node *node; ++ struct simap_node *node_next; ++ ++ SIMAP_FOR_EACH_SAFE (node, node_next, &binding_iface_seqno_map) { ++ struct shash_node *lb_node = shash_find(local_bindings, node->name); ++ bool up = true; ++ ++ if (!lb_node) { ++ goto del_seqno; ++ } ++ ++ struct local_binding *lb = lb_node->data; ++ if (!lb->pb || !lb->iface) { ++ goto del_seqno; ++ } ++ ++ if (!ofctrl_acked_seqnos_contains(acked_seqnos, node->data)) { ++ continue; ++ } ++ ++ ovsrec_interface_update_external_ids_setkey(lb->iface, ++ OVN_INSTALLED_EXT_ID, ++ "true"); ++ sbrec_port_binding_set_up(lb->pb, &up, 1); ++ ++ struct shash_node *child_node; ++ SHASH_FOR_EACH (child_node, &lb->children) { ++ struct local_binding *lb_child = child_node->data; ++ sbrec_port_binding_set_up(lb_child->pb, &up, 1); ++ } ++ ++del_seqno: ++ simap_delete(&binding_iface_seqno_map, node); ++ } ++ ++ ofctrl_acked_seqnos_destroy(acked_seqnos); ++} ++ ++void ++binding_seqno_flush(void) ++{ ++ simap_clear(&binding_iface_seqno_map); ++} +diff --git a/controller/binding.h b/controller/binding.h +index 2885971..c9ebef4 100644 +--- a/controller/binding.h ++++ b/controller/binding.h +@@ -135,4 +135,9 @@ bool binding_handle_ovs_interface_changes(struct binding_ctx_in *, + bool binding_handle_port_binding_changes(struct binding_ctx_in *, + struct binding_ctx_out *); + void binding_tracked_dp_destroy(struct hmap *tracked_datapaths); ++ ++void binding_init(void); ++void binding_seqno_run(struct shash *local_bindings); ++void binding_seqno_install(struct shash *local_bindings); ++void binding_seqno_flush(void); + #endif /* controller/binding.h */ +diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c +index bb1c659..5599ea4 100644 +--- a/controller/ovn-controller.c ++++ b/controller/ovn-controller.c +@@ -981,6 +981,7 @@ en_ofctrl_is_connected_run(struct engine_node *node, void *data) + /* Flush ofctrl seqno requests when the ofctrl connection goes down. */ + if (!of_data->connected) { + ofctrl_seqno_flush(); ++ binding_seqno_flush(); + } + engine_set_node_state(node, EN_UPDATED); + return; +@@ -2404,13 +2405,14 @@ main(int argc, char *argv[]) + + daemonize_complete(); + ++ /* Register ofctrl seqno types. */ ++ ofctrl_seq_type_nb_cfg = ofctrl_seqno_add_type(); ++ ++ binding_init(); + patch_init(); + pinctrl_init(); + lflow_init(); + +- /* Register ofctrl seqno types. */ +- ofctrl_seq_type_nb_cfg = ofctrl_seqno_add_type(); +- + /* Connect to OVS OVSDB instance. */ + struct ovsdb_idl_loop ovs_idl_loop = OVSDB_IDL_LOOP_INITIALIZER( + ovsdb_idl_create(ovs_remote, &ovsrec_idl_class, false, true)); +@@ -2879,6 +2881,9 @@ main(int argc, char *argv[]) + ovnsb_idl_loop.idl), + ovnsb_cond_seqno, + ovnsb_expected_cond_seqno)); ++ if (runtime_data && ovs_idl_txn && ovnsb_idl_txn) { ++ binding_seqno_run(&runtime_data->local_bindings); ++ } + + flow_output_data = engine_get_data(&en_flow_output); + if (flow_output_data && ct_zones_data) { +@@ -2889,6 +2894,9 @@ main(int argc, char *argv[]) + engine_node_changed(&en_flow_output)); + } + ofctrl_seqno_run(ofctrl_get_cur_cfg()); ++ if (runtime_data && ovs_idl_txn && ovnsb_idl_txn) { ++ binding_seqno_install(&runtime_data->local_bindings); ++ } + } + + } +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 9f8fb3b..307ee9c 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -13049,7 +13049,7 @@ handle_port_binding_changes(struct northd_context *ctx, struct hmap *ports, + continue; + } + +- bool up = (sb->chassis || lsp_is_router(op->nbsp)); ++ bool up = ((sb->up && (*sb->up)) || lsp_is_router(op->nbsp)); + if (!op->nbsp->up || *op->nbsp->up != up) { + nbrec_logical_switch_port_set_up(op->nbsp, &up, 1); + } +@@ -13197,7 +13197,7 @@ static const char *rbac_encap_update[] = + static const char *rbac_port_binding_auth[] = + {""}; + static const char *rbac_port_binding_update[] = +- {"chassis"}; ++ {"chassis", "up"}; + + static const char *rbac_mac_binding_auth[] = + {""}; +@@ -13684,6 +13684,8 @@ main(int argc, char *argv[]) + ovsdb_idl_add_column(ovnsb_idl_loop.idl, + &sbrec_port_binding_col_virtual_parent); + ovsdb_idl_add_column(ovnsb_idl_loop.idl, ++ &sbrec_port_binding_col_up); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, + &sbrec_gateway_chassis_col_chassis); + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_gateway_chassis_col_name); + ovsdb_idl_add_column(ovnsb_idl_loop.idl, +diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema +index 97db6de..b418434 100644 +--- a/ovn-sb.ovsschema ++++ b/ovn-sb.ovsschema +@@ -1,7 +1,7 @@ + { + "name": "OVN_Southbound", +- "version": "20.13.0", +- "cksum": "3035725595 25676", ++ "version": "20.14.0", ++ "cksum": "1412040198 25748", + "tables": { + "SB_Global": { + "columns": { +@@ -225,6 +225,7 @@ + "nat_addresses": {"type": {"key": "string", + "min": 0, + "max": "unlimited"}}, ++ "up": {"type": {"key": "boolean", "min": 0, "max": 1}}, + "external_ids": {"type": {"key": "string", + "value": "string", + "min": 0, +diff --git a/ovn-sb.xml b/ovn-sb.xml +index eb440e4..4c82d51 100644 +--- a/ovn-sb.xml ++++ b/ovn-sb.xml +@@ -2771,6 +2771,14 @@ tcp.flags = RST; +

    + + ++ ++

    ++ This is set to true whenever all OVS flows ++ required by this Port_Binding have been installed. This is ++ populated by ovn-controller. ++

    ++
    ++ + +

    + A number that represents the logical port in the key (e.g. STT key or +diff --git a/tests/ovn.at b/tests/ovn.at +index a4fafa5..dfb94d2 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -23662,3 +23662,35 @@ as ovn-nb + OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + + AT_CLEANUP ++ ++AT_SETUP([ovn -- propagate Port_Binding.up to NB and OVS]) ++ovn_start ++ ++net_add n1 ++sim_add hv1 ++as hv1 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.1 ++ ++check ovn-nbctl ls-add ls ++ ++AS_BOX([add OVS port for existing LSP]) ++check ovn-nbctl lsp-add ls lsp1 ++check ovn-nbctl --wait=hv sync ++check_column "[]" Port_Binding up logical_port=lsp1 ++ ++check ovs-vsctl add-port br-int lsp1 -- set Interface lsp1 external-ids:iface-id=lsp1 ++check_column "true" Port_Binding up logical_port=lsp1 ++wait_column "true" nb:Logical_Switch_Port up name=lsp1 ++OVS_WAIT_UNTIL([test `ovs-vsctl get Interface lsp1 external_ids:ovn-installed` = '"true"']) ++ ++AS_BOX([add LSP for existing OVS port]) ++check ovs-vsctl add-port br-int lsp2 -- set Interface lsp2 external-ids:iface-id=lsp2 ++check ovn-nbctl lsp-add ls lsp2 ++check ovn-nbctl --wait=hv sync ++check_column "true" Port_Binding up logical_port=lsp2 ++wait_column "true" nb:Logical_Switch_Port up name=lsp2 ++OVS_WAIT_UNTIL([test `ovs-vsctl get Interface lsp2 external_ids:ovn-installed` = '"true"']) ++ ++OVN_CLEANUP([hv1]) ++AT_CLEANUP +diff --git a/utilities/ovn-sbctl.c b/utilities/ovn-sbctl.c +index 0a1b9ff..c38e8ec 100644 +--- a/utilities/ovn-sbctl.c ++++ b/utilities/ovn-sbctl.c +@@ -526,6 +526,7 @@ pre_get_info(struct ctl_context *ctx) + ovsdb_idl_add_column(ctx->idl, &sbrec_port_binding_col_tunnel_key); + ovsdb_idl_add_column(ctx->idl, &sbrec_port_binding_col_chassis); + ovsdb_idl_add_column(ctx->idl, &sbrec_port_binding_col_datapath); ++ ovsdb_idl_add_column(ctx->idl, &sbrec_port_binding_col_up); + + ovsdb_idl_add_column(ctx->idl, &sbrec_logical_flow_col_logical_datapath); + ovsdb_idl_add_column(ctx->idl, &sbrec_logical_flow_col_logical_dp_group); +@@ -665,6 +666,7 @@ cmd_lsp_bind(struct ctl_context *ctx) + struct sbctl_chassis *sbctl_ch; + struct sbctl_port_binding *sbctl_bd; + char *lport_name, *ch_name; ++ bool up = true; + + /* port_binding must exist, chassis must exist! */ + lport_name = ctx->argv[1]; +@@ -683,6 +685,7 @@ cmd_lsp_bind(struct ctl_context *ctx) + } + } + sbrec_port_binding_set_chassis(sbctl_bd->bd_cfg, sbctl_ch->ch_cfg); ++ sbrec_port_binding_set_up(sbctl_bd->bd_cfg, &up, 1); + sbctl_context_invalidate_cache(ctx); + } + +@@ -699,6 +702,7 @@ cmd_lsp_unbind(struct ctl_context *ctx) + sbctl_bd = find_port_binding(sbctl_ctx, lport_name, must_exist); + if (sbctl_bd) { + sbrec_port_binding_set_chassis(sbctl_bd->bd_cfg, NULL); ++ sbrec_port_binding_set_up(sbctl_bd->bd_cfg, NULL, 0); + } + } + +-- +1.8.3.1 + diff --git a/SOURCES/0003-northd-Allow-backwards-compatibility-for-Logical_Swi.patch b/SOURCES/0003-northd-Allow-backwards-compatibility-for-Logical_Swi.patch new file mode 100644 index 0000000..93143b4 --- /dev/null +++ b/SOURCES/0003-northd-Allow-backwards-compatibility-for-Logical_Swi.patch @@ -0,0 +1,222 @@ +From 07b0f0468faeeb1e149dcc3e4926a54cbb9bb367 Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Wed, 3 Feb 2021 20:36:52 +0100 +Subject: [PATCH 3/4] northd: Allow backwards compatibility for + Logical_Switch_Port.up. + +In general, ovn-northd expects ovn-controller to set Port_Binding.up +before it declares the logical switch port as being up. + +Even though the recommended upgrade procedure for OVN states that +ovn-controllers should be upgraded before ovn-northd, there are cases +when CMSs don't follow this guideline. + +This would cause all existing and bound Logical_Switch_Ports to be +declared "down" until ovn-controllers are upgraded. + +To avoid this situation, ovn-controllers now explicitly set +Chassis.other_config:port-up-notif in their own chassis record. Based +on this value, ovn-northd can determine if it needs to use the old type +of logic or the new one (Port_Binding.up) when setting LSP.up. + +Note: +In case of downgrading ovn-controller before ovn-northd, if +ovn-controller is forcefully stopped it will not clear its chassis +record from the SB. Older versions will not have the capability to +clear the other_config:port-up-notif value so LSPs will be declared +"down" until ovn-northd is downgraded as well. As this +upgrade/downgrade procedure is not the recommended one, we don't try +to deal with this scenario. + +Signed-off-by: Dumitru Ceara +Signed-off-by: Numan Siddique +(cherry picked from upstream commit a99af0367acc744321747bad33bf598d06a612de) + +Change-Id: Iaec681d05abec490b7e7cb330f1ca8f00149cefb +--- + controller/chassis.c | 7 +++++++ + include/ovn/automake.mk | 1 + + include/ovn/features.h | 22 ++++++++++++++++++++++ + northd/ovn-northd.c | 13 ++++++++++++- + ovn-sb.xml | 5 +++++ + tests/ovn-controller.at | 17 +++++++++++++++++ + tests/ovn-northd.at | 22 ++++++++++++++++++++++ + 7 files changed, 86 insertions(+), 1 deletion(-) + create mode 100644 include/ovn/features.h + +diff --git a/controller/chassis.c b/controller/chassis.c +index b4d4b0e..0937e33 100644 +--- a/controller/chassis.c ++++ b/controller/chassis.c +@@ -28,6 +28,7 @@ + #include "lib/ovn-sb-idl.h" + #include "ovn-controller.h" + #include "lib/util.h" ++#include "ovn/features.h" + + VLOG_DEFINE_THIS_MODULE(chassis); + +@@ -293,6 +294,7 @@ chassis_build_other_config(struct smap *config, const char *bridge_mappings, + smap_replace(config, "iface-types", iface_types); + smap_replace(config, "ovn-chassis-mac-mappings", chassis_macs); + smap_replace(config, "is-interconn", is_interconn ? "true" : "false"); ++ smap_replace(config, OVN_FEATURE_PORT_UP_NOTIF, "true"); + } + + /* +@@ -363,6 +365,11 @@ chassis_other_config_changed(const char *bridge_mappings, + return true; + } + ++ if (!smap_get_bool(&chassis_rec->other_config, OVN_FEATURE_PORT_UP_NOTIF, ++ false)) { ++ return true; ++ } ++ + return false; + } + +diff --git a/include/ovn/automake.mk b/include/ovn/automake.mk +index 54b0e2c..582241a 100644 +--- a/include/ovn/automake.mk ++++ b/include/ovn/automake.mk +@@ -2,5 +2,6 @@ ovnincludedir = $(includedir)/ovn + ovninclude_HEADERS = \ + include/ovn/actions.h \ + include/ovn/expr.h \ ++ include/ovn/features.h \ + include/ovn/lex.h \ + include/ovn/logical-fields.h +diff --git a/include/ovn/features.h b/include/ovn/features.h +new file mode 100644 +index 0000000..10ee46f +--- /dev/null ++++ b/include/ovn/features.h +@@ -0,0 +1,22 @@ ++/* Copyright (c) 2021, Red Hat, Inc. ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at: ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++#ifndef OVN_FEATURES_H ++#define OVN_FEATURES_H 1 ++ ++/* ovn-controller supported feature names. */ ++#define OVN_FEATURE_PORT_UP_NOTIF "port-up-notif" ++ ++#endif +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 0dc920b..62d45f9 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -38,6 +38,7 @@ + #include "lib/ovn-util.h" + #include "lib/lb.h" + #include "ovn/actions.h" ++#include "ovn/features.h" + #include "ovn/logical-fields.h" + #include "packets.h" + #include "openvswitch/poll-loop.h" +@@ -13057,7 +13058,17 @@ handle_port_binding_changes(struct northd_context *ctx, struct hmap *ports, + continue; + } + +- bool up = ((sb->up && (*sb->up)) || lsp_is_router(op->nbsp)); ++ bool up = false; ++ ++ if (lsp_is_router(op->nbsp)) { ++ up = true; ++ } else if (sb->chassis) { ++ up = smap_get_bool(&sb->chassis->other_config, ++ OVN_FEATURE_PORT_UP_NOTIF, false) ++ ? sb->n_up && sb->up[0] ++ : true; ++ } ++ + if (!op->nbsp->up || *op->nbsp->up != up) { + nbrec_logical_switch_port_set_up(op->nbsp, &up, 1); + } +diff --git a/ovn-sb.xml b/ovn-sb.xml +index 4c82d51..980a096 100644 +--- a/ovn-sb.xml ++++ b/ovn-sb.xml +@@ -322,6 +322,11 @@ + table. See ovn-controller(8) for more information. + + ++ ++ ovn-controller populates this key with true ++ when it supports Port_Binding.up. ++ ++ + + The overall purpose of these columns is described under Common + Columns at the beginning of this document. +diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at +index 1b46799..f818f9c 100644 +--- a/tests/ovn-controller.at ++++ b/tests/ovn-controller.at +@@ -414,3 +414,20 @@ OVS_WAIT_UNTIL([ovs-vsctl get Bridge br-int external_ids:ovn-nb-cfg], [0], [1]) + + OVN_CLEANUP([hv1]) + AT_CLEANUP ++ ++AT_SETUP([ovn -- features]) ++AT_KEYWORDS([features]) ++ovn_start ++ ++net_add n1 ++sim_add hv1 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.1 ++ ++# Wait for ovn-controller to register in the SB. ++OVS_WAIT_UNTIL([ ++ test "$(ovn-sbctl get chassis hv1 other_config:port-up-notif)" = '"true"' ++]) ++ ++OVN_CLEANUP([hv1]) ++AT_CLEANUP +diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at +index c00225e..d52aeed 100644 +--- a/tests/ovn-northd.at ++++ b/tests/ovn-northd.at +@@ -2452,3 +2452,25 @@ check ovn-nbctl --wait=sb sync + AT_CHECK([grep -qE 'duplicate logical.*port p1' northd/ovn-northd.log], [0]) + + AT_CLEANUP ++ ++AT_SETUP([ovn -- Port_Binding.up backwards compatibility]) ++ovn_start ++ ++ovn-nbctl ls-add ls1 ++ovn-nbctl --wait=sb lsp-add ls1 lsp1 ++ ++# Simulate the fact that lsp1 had been previously bound on hv1 by an ++# ovn-controller running an older version. ++ovn-sbctl \ ++ --id=@e create encap chassis_name=hv1 ip="192.168.0.1" type="geneve" \ ++ -- --id=@c create chassis name=hv1 encaps=@e \ ++ -- set Port_Binding lsp1 chassis=@c ++ ++wait_for_ports_up lsp1 ++ ++# Simulate the fact that hv1 is aware of Port_Binding.up, ovn-northd ++# should transition the port state to down. ++check ovn-sbctl set chassis hv1 other_config:port-up-notif=true ++wait_row_count nb:Logical_Switch_Port 1 up=false name=lsp1 ++ ++AT_CLEANUP +-- +1.8.3.1 + diff --git a/SOURCES/0003-ovn-northd-Move-lswitch-DNS-lookup-and-response-to-a.patch b/SOURCES/0003-ovn-northd-Move-lswitch-DNS-lookup-and-response-to-a.patch new file mode 100644 index 0000000..80b31c3 --- /dev/null +++ b/SOURCES/0003-ovn-northd-Move-lswitch-DNS-lookup-and-response-to-a.patch @@ -0,0 +1,94 @@ +From 685d26ba45965b2268fbbc36d167115419321f25 Mon Sep 17 00:00:00 2001 +Message-Id: <685d26ba45965b2268fbbc36d167115419321f25.1610458802.git.lorenzo.bianconi@redhat.com> +In-Reply-To: +References: +From: Anton Ivanov +Date: Tue, 5 Jan 2021 17:49:30 +0000 +Subject: [PATCH 03/16] ovn-northd: Move lswitch DNS lookup and response to a + function. + +Signed-off-by: Anton Ivanov +Signed-off-by: Numan Siddique +Signed-off-by: Lorenzo Bianconi +--- + northd/ovn-northd.c | 50 ++++++++++++++++++++++++--------------------- + 1 file changed, 27 insertions(+), 23 deletions(-) + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index a5b28584f..be98a6013 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -6780,29 +6780,6 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, + struct ovn_datapath *od; + struct ovn_port *op; + +- /* Logical switch ingress table 17 and 18: DNS lookup and response +- * priority 100 flows. +- */ +- HMAP_FOR_EACH (od, key_node, datapaths) { +- if (!od->nbs || !ls_has_dns_records(od->nbs)) { +- continue; +- } +- +- ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 100, +- "udp.dst == 53", +- REGBIT_DNS_LOOKUP_RESULT" = dns_lookup(); next;"); +- const char *dns_action = "eth.dst <-> eth.src; ip4.src <-> ip4.dst; " +- "udp.dst = udp.src; udp.src = 53; outport = inport; " +- "flags.loopback = 1; output;"; +- const char *dns_match = "udp.dst == 53 && "REGBIT_DNS_LOOKUP_RESULT; +- ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100, +- dns_match, dns_action); +- dns_action = "eth.dst <-> eth.src; ip6.src <-> ip6.dst; " +- "udp.dst = udp.src; udp.src = 53; outport = inport; " +- "flags.loopback = 1; output;"; +- ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100, +- dns_match, dns_action); +- } + + /* Ingress table 14 and 15: DHCP options and response, by default goto + * next. (priority 0). +@@ -7484,6 +7461,32 @@ build_lswitch_dhcp_options_and_response(struct ovn_port *op, + } + } + ++/* Logical switch ingress table 17 and 18: DNS lookup and response ++* priority 100 flows. ++*/ ++static void ++build_lswitch_dns_lookup_and_response(struct ovn_datapath *od, ++ struct hmap *lflows) ++{ ++ if (od->nbs && ls_has_dns_records(od->nbs)) { ++ ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 100, ++ "udp.dst == 53", ++ REGBIT_DNS_LOOKUP_RESULT" = dns_lookup(); next;"); ++ const char *dns_action = "eth.dst <-> eth.src; ip4.src <-> ip4.dst; " ++ "udp.dst = udp.src; udp.src = 53; outport = inport; " ++ "flags.loopback = 1; output;"; ++ const char *dns_match = "udp.dst == 53 && "REGBIT_DNS_LOOKUP_RESULT; ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100, ++ dns_match, dns_action); ++ dns_action = "eth.dst <-> eth.src; ip6.src <-> ip6.dst; " ++ "udp.dst = udp.src; udp.src = 53; outport = inport; " ++ "flags.loopback = 1; output;"; ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100, ++ dns_match, dns_action); ++ } ++} ++ + /* Returns a string of the IP address of the router port 'op' that + * overlaps with 'ip_s". If one is not found, returns NULL. + * +@@ -11335,6 +11338,7 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od, + build_lswitch_lflows_admission_control(od, lsi->lflows); + build_lswitch_input_port_sec_od(od, lsi->lflows); + build_lswitch_arp_nd_responder_default(od, lsi->lflows); ++ build_lswitch_dns_lookup_and_response(od, lsi->lflows); + + /* Build Logical Router Flows. */ + build_adm_ctrl_flows_for_lrouter(od, lsi->lflows); +-- +2.29.2 + diff --git a/SOURCES/0004-ovn-northd-Move-DNS-and-DHCP-defaults-to-a-function.patch b/SOURCES/0004-ovn-northd-Move-DNS-and-DHCP-defaults-to-a-function.patch new file mode 100644 index 0000000..0668159 --- /dev/null +++ b/SOURCES/0004-ovn-northd-Move-DNS-and-DHCP-defaults-to-a-function.patch @@ -0,0 +1,83 @@ +From 502d52712bca01f237aa15e5853bc3090e6034e5 Mon Sep 17 00:00:00 2001 +Message-Id: <502d52712bca01f237aa15e5853bc3090e6034e5.1610458802.git.lorenzo.bianconi@redhat.com> +In-Reply-To: +References: +From: Anton Ivanov +Date: Tue, 5 Jan 2021 17:49:31 +0000 +Subject: [PATCH 04/16] ovn-northd: Move DNS and DHCP defaults to a function. + +Signed-off-by: Anton Ivanov +Signed-off-by: Numan Siddique +Signed-off-by: Lorenzo Bianconi +--- + northd/ovn-northd.c | 40 ++++++++++++++++++++-------------------- + 1 file changed, 20 insertions(+), 20 deletions(-) + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index be98a6013..45d6a6a2e 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -6780,26 +6780,6 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, + struct ovn_datapath *od; + struct ovn_port *op; + +- +- /* Ingress table 14 and 15: DHCP options and response, by default goto +- * next. (priority 0). +- * Ingress table 16 and 17: DNS lookup and response, by default goto next. +- * (priority 0). +- * Ingress table 18 - External port handling, by default goto next. +- * (priority 0). */ +- +- HMAP_FOR_EACH (od, key_node, datapaths) { +- if (!od->nbs) { +- continue; +- } +- +- ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;"); +- ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;"); +- ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 0, "1", "next;"); +- ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 0, "1", "next;"); +- ovn_lflow_add(lflows, od, S_SWITCH_IN_EXTERNAL_PORT, 0, "1", "next;"); +- } +- + HMAP_FOR_EACH (op, key_node, ports) { + if (!op->nbsp || !lsp_is_external(op->nbsp)) { + continue; +@@ -7461,6 +7441,25 @@ build_lswitch_dhcp_options_and_response(struct ovn_port *op, + } + } + ++/* Ingress table 14 and 15: DHCP options and response, by default goto ++ * next. (priority 0). ++ * Ingress table 16 and 17: DNS lookup and response, by default goto next. ++ * (priority 0). ++ * Ingress table 18 - External port handling, by default goto next. ++ * (priority 0). */ ++static void ++build_lswitch_dhcp_and_dns_defaults(struct ovn_datapath *od, ++ struct hmap *lflows) ++{ ++ if (od->nbs) { ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_EXTERNAL_PORT, 0, "1", "next;"); ++ } ++} ++ + /* Logical switch ingress table 17 and 18: DNS lookup and response + * priority 100 flows. + */ +@@ -11339,6 +11338,7 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od, + build_lswitch_input_port_sec_od(od, lsi->lflows); + build_lswitch_arp_nd_responder_default(od, lsi->lflows); + build_lswitch_dns_lookup_and_response(od, lsi->lflows); ++ build_lswitch_dhcp_and_dns_defaults(od, lsi->lflows); + + /* Build Logical Router Flows. */ + build_adm_ctrl_flows_for_lrouter(od, lsi->lflows); +-- +2.29.2 + diff --git a/SOURCES/0004-tests-Fix-Port_Binding-up-test.patch b/SOURCES/0004-tests-Fix-Port_Binding-up-test.patch new file mode 100644 index 0000000..7741842 --- /dev/null +++ b/SOURCES/0004-tests-Fix-Port_Binding-up-test.patch @@ -0,0 +1,59 @@ +From 4e143c1e58b18adf6914ec783ee4503a63dbf3a8 Mon Sep 17 00:00:00 2001 +From: Gongming Chen +Date: Sun, 7 Feb 2021 02:52:53 +0000 +Subject: [PATCH 4/4] tests: Fix Port_Binding up test. + +After setting the iface-id, immediately check the up status of the port +binding, it will occasionally fail, especially when the port binding +status is reported later. + +When it fails, the following will be output: +Checking values in sb Port_Binding with logical_port=lsp1 against false... found false +ovs-vsctl add-port br-int lsp1 -- set Interface lsp1 external-ids:iface-id=lsp1 +./ovn-macros.at:307: "$@" +Checking values in sb Port_Binding with logical_port=lsp1 against true... found false +_uuid : 15ebabb6-3dbb-4806-aa85-d1c03e3b39f6 +logical_port : lsp1 +up : true +./ovn-macros.at:393: hard failure + +Fixes: 4d3cb42b076b ("binding: Set Logical_Switch_Port.up when all OVS flows are installed.") +Signed-off-by: Gongming Chen +Acked-by: Dumitru Ceara +Signed-off-by: Numan Siddique +(cherry picked from upstream commit 44ea2ec88136f83e7eab9790473025b6c95bdcc0) + +Change-Id: I53d1834cc6b59cc42b494661378d00bf722dc88a +--- + AUTHORS.rst | 1 + + tests/ovn.at | 2 +- + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/AUTHORS.rst b/AUTHORS.rst +index 5d926c1..29c2c01 100644 +--- a/AUTHORS.rst ++++ b/AUTHORS.rst +@@ -155,6 +155,7 @@ Geoffrey Wossum gwossum@acm.org + Gianluca Merlo gianluca.merlo@gmail.com + Giuseppe Lettieri g.lettieri@iet.unipi.it + Glen Gibb grg@stanford.edu ++Gongming Chen gmingchen@tencent.com + Guoshuai Li ligs@dtdream.com + Guolin Yang gyang@vmware.com + Guru Chaitanya Perakam gperakam@Brocade.com +diff --git a/tests/ovn.at b/tests/ovn.at +index 2ef056b..9f2e152 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -23700,7 +23700,7 @@ check ovn-nbctl --wait=hv sync + check_column "false" Port_Binding up logical_port=lsp1 + + check ovs-vsctl add-port br-int lsp1 -- set Interface lsp1 external-ids:iface-id=lsp1 +-check_column "true" Port_Binding up logical_port=lsp1 ++wait_column "true" Port_Binding up logical_port=lsp1 + wait_column "true" nb:Logical_Switch_Port up name=lsp1 + OVS_WAIT_UNTIL([test `ovs-vsctl get Interface lsp1 external_ids:ovn-installed` = '"true"']) + +-- +1.8.3.1 + diff --git a/SOURCES/0005-ovn-northd-Move-ARP-response-for-external-ports-to-a.patch b/SOURCES/0005-ovn-northd-Move-ARP-response-for-external-ports-to-a.patch new file mode 100644 index 0000000..85c1abb --- /dev/null +++ b/SOURCES/0005-ovn-northd-Move-ARP-response-for-external-ports-to-a.patch @@ -0,0 +1,77 @@ +From d63444b7fcdcc2c68b7af94090410bc3e40e574b Mon Sep 17 00:00:00 2001 +Message-Id: +In-Reply-To: +References: +From: Anton Ivanov +Date: Tue, 5 Jan 2021 17:49:32 +0000 +Subject: [PATCH 05/16] ovn-northd: Move ARP response for external ports to a + function. + +Signed-off-by: Anton Ivanov +Signed-off-by: Numan Siddique +Signed-off-by: Lorenzo Bianconi +--- + northd/ovn-northd.c | 33 ++++++++++++++++++--------------- + 1 file changed, 18 insertions(+), 15 deletions(-) + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 45d6a6a2e..09bfaae5e 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -6780,21 +6780,6 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, + struct ovn_datapath *od; + struct ovn_port *op; + +- HMAP_FOR_EACH (op, key_node, ports) { +- if (!op->nbsp || !lsp_is_external(op->nbsp)) { +- continue; +- } +- +- /* Table 18: External port. Drop ARP request for router ips from +- * external ports on chassis not binding those ports. +- * This makes the router pipeline to be run only on the chassis +- * binding the external ports. */ +- for (size_t i = 0; i < op->od->n_localnet_ports; i++) { +- build_drop_arp_nd_flows_for_unbound_router_ports( +- op, op->od->localnet_ports[i], lflows); +- } +- } +- + /* Ingress table 19: Destination lookup, broadcast and multicast handling + * (priority 70 - 100). */ + HMAP_FOR_EACH (od, key_node, datapaths) { +@@ -7486,6 +7471,23 @@ build_lswitch_dns_lookup_and_response(struct ovn_datapath *od, + } + } + ++/* Table 18: External port. Drop ARP request for router ips from ++ * external ports on chassis not binding those ports. ++ * This makes the router pipeline to be run only on the chassis ++ * binding the external ports. */ ++static void ++build_lswitch_external_port(struct ovn_port *op, ++ struct hmap *lflows) ++{ ++ if (op->nbsp && lsp_is_external(op->nbsp)) { ++ ++ for (size_t i = 0; i < op->od->n_localnet_ports; i++) { ++ build_drop_arp_nd_flows_for_unbound_router_ports( ++ op, op->od->localnet_ports[i], lflows); ++ } ++ } ++} ++ + /* Returns a string of the IP address of the router port 'op' that + * overlaps with 'ip_s". If one is not found, returns NULL. + * +@@ -11376,6 +11378,7 @@ build_lswitch_and_lrouter_iterate_by_op(struct ovn_port *op, + &lsi->actions, + &lsi->match); + build_lswitch_dhcp_options_and_response(op,lsi->lflows); ++ build_lswitch_external_port(op, lsi->lflows); + + /* Build Logical Router Flows. */ + build_adm_ctrl_flows_for_lrouter_port(op, lsi->lflows, &lsi->match, +-- +2.29.2 + diff --git a/SOURCES/0006-ovn-northd-Move-broadcast-and-multicast-lookup-in-ls.patch b/SOURCES/0006-ovn-northd-Move-broadcast-and-multicast-lookup-in-ls.patch new file mode 100644 index 0000000..d734a8c --- /dev/null +++ b/SOURCES/0006-ovn-northd-Move-broadcast-and-multicast-lookup-in-ls.patch @@ -0,0 +1,213 @@ +From 9e60b5574786c0ef8f6403ac61567553c1a7717f Mon Sep 17 00:00:00 2001 +Message-Id: <9e60b5574786c0ef8f6403ac61567553c1a7717f.1610458802.git.lorenzo.bianconi@redhat.com> +In-Reply-To: +References: +From: Anton Ivanov +Date: Tue, 5 Jan 2021 17:49:33 +0000 +Subject: [PATCH 06/16] ovn-northd: Move broadcast and multicast lookup in + lswitch to a function. + +Signed-off-by: Anton Ivanov +Signed-off-by: Numan Siddique +Signed-off-by: Lorenzo Bianconi +--- + northd/ovn-northd.c | 169 +++++++++++++++++++++++--------------------- + 1 file changed, 87 insertions(+), 82 deletions(-) + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 09bfaae5e..f4e248f55 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -6780,88 +6780,6 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, + struct ovn_datapath *od; + struct ovn_port *op; + +- /* Ingress table 19: Destination lookup, broadcast and multicast handling +- * (priority 70 - 100). */ +- HMAP_FOR_EACH (od, key_node, datapaths) { +- if (!od->nbs) { +- continue; +- } +- +- ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 110, +- "eth.dst == $svc_monitor_mac", +- "handle_svc_check(inport);"); +- +- struct mcast_switch_info *mcast_sw_info = &od->mcast_info.sw; +- +- if (mcast_sw_info->enabled) { +- ds_clear(&actions); +- if (mcast_sw_info->flood_reports) { +- ds_put_cstr(&actions, +- "clone { " +- "outport = \""MC_MROUTER_STATIC"\"; " +- "output; " +- "};"); +- } +- ds_put_cstr(&actions, "igmp;"); +- /* Punt IGMP traffic to controller. */ +- ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 100, +- "ip4 && ip.proto == 2", ds_cstr(&actions)); +- +- /* Punt MLD traffic to controller. */ +- ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 100, +- "mldv1 || mldv2", ds_cstr(&actions)); +- +- /* Flood all IP multicast traffic destined to 224.0.0.X to all +- * ports - RFC 4541, section 2.1.2, item 2. +- */ +- ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 85, +- "ip4.mcast && ip4.dst == 224.0.0.0/24", +- "outport = \""MC_FLOOD"\"; output;"); +- +- /* Flood all IPv6 multicast traffic destined to reserved +- * multicast IPs (RFC 4291, 2.7.1). +- */ +- ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 85, +- "ip6.mcast_flood", +- "outport = \""MC_FLOOD"\"; output;"); +- +- /* Forward uregistered IP multicast to routers with relay enabled +- * and to any ports configured to flood IP multicast traffic. +- * If configured to flood unregistered traffic this will be +- * handled by the L2 multicast flow. +- */ +- if (!mcast_sw_info->flood_unregistered) { +- ds_clear(&actions); +- +- if (mcast_sw_info->flood_relay) { +- ds_put_cstr(&actions, +- "clone { " +- "outport = \""MC_MROUTER_FLOOD"\"; " +- "output; " +- "}; "); +- } +- +- if (mcast_sw_info->flood_static) { +- ds_put_cstr(&actions, "outport =\""MC_STATIC"\"; output;"); +- } +- +- /* Explicitly drop the traffic if relay or static flooding +- * is not configured. +- */ +- if (!mcast_sw_info->flood_relay && +- !mcast_sw_info->flood_static) { +- ds_put_cstr(&actions, "drop;"); +- } +- +- ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 80, +- "ip4.mcast || ip6.mcast", +- ds_cstr(&actions)); +- } +- } +- +- ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 70, "eth.mcast", +- "outport = \""MC_FLOOD"\"; output;"); +- } + + /* Ingress table 19: Add IP multicast flows learnt from IGMP/MLD + * (priority 90). */ +@@ -7488,6 +7406,92 @@ build_lswitch_external_port(struct ovn_port *op, + } + } + ++/* Ingress table 19: Destination lookup, broadcast and multicast handling ++ * (priority 70 - 100). */ ++static void ++build_lswitch_destination_lookup_bmcast(struct ovn_datapath *od, ++ struct hmap *lflows, ++ struct ds *actions) ++{ ++ if (od->nbs) { ++ ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 110, ++ "eth.dst == $svc_monitor_mac", ++ "handle_svc_check(inport);"); ++ ++ struct mcast_switch_info *mcast_sw_info = &od->mcast_info.sw; ++ ++ if (mcast_sw_info->enabled) { ++ ds_clear(actions); ++ if (mcast_sw_info->flood_reports) { ++ ds_put_cstr(actions, ++ "clone { " ++ "outport = \""MC_MROUTER_STATIC"\"; " ++ "output; " ++ "};"); ++ } ++ ds_put_cstr(actions, "igmp;"); ++ /* Punt IGMP traffic to controller. */ ++ ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 100, ++ "ip4 && ip.proto == 2", ds_cstr(actions)); ++ ++ /* Punt MLD traffic to controller. */ ++ ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 100, ++ "mldv1 || mldv2", ds_cstr(actions)); ++ ++ /* Flood all IP multicast traffic destined to 224.0.0.X to all ++ * ports - RFC 4541, section 2.1.2, item 2. ++ */ ++ ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 85, ++ "ip4.mcast && ip4.dst == 224.0.0.0/24", ++ "outport = \""MC_FLOOD"\"; output;"); ++ ++ /* Flood all IPv6 multicast traffic destined to reserved ++ * multicast IPs (RFC 4291, 2.7.1). ++ */ ++ ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 85, ++ "ip6.mcast_flood", ++ "outport = \""MC_FLOOD"\"; output;"); ++ ++ /* Forward uregistered IP multicast to routers with relay enabled ++ * and to any ports configured to flood IP multicast traffic. ++ * If configured to flood unregistered traffic this will be ++ * handled by the L2 multicast flow. ++ */ ++ if (!mcast_sw_info->flood_unregistered) { ++ ds_clear(actions); ++ ++ if (mcast_sw_info->flood_relay) { ++ ds_put_cstr(actions, ++ "clone { " ++ "outport = \""MC_MROUTER_FLOOD"\"; " ++ "output; " ++ "}; "); ++ } ++ ++ if (mcast_sw_info->flood_static) { ++ ds_put_cstr(actions, "outport =\""MC_STATIC"\"; output;"); ++ } ++ ++ /* Explicitly drop the traffic if relay or static flooding ++ * is not configured. ++ */ ++ if (!mcast_sw_info->flood_relay && ++ !mcast_sw_info->flood_static) { ++ ds_put_cstr(actions, "drop;"); ++ } ++ ++ ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 80, ++ "ip4.mcast || ip6.mcast", ++ ds_cstr(actions)); ++ } ++ } ++ ++ ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 70, "eth.mcast", ++ "outport = \""MC_FLOOD"\"; output;"); ++ } ++} ++ + /* Returns a string of the IP address of the router port 'op' that + * overlaps with 'ip_s". If one is not found, returns NULL. + * +@@ -11341,6 +11345,7 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od, + build_lswitch_arp_nd_responder_default(od, lsi->lflows); + build_lswitch_dns_lookup_and_response(od, lsi->lflows); + build_lswitch_dhcp_and_dns_defaults(od, lsi->lflows); ++ build_lswitch_destination_lookup_bmcast(od, lsi->lflows, &lsi->actions); + + /* Build Logical Router Flows. */ + build_adm_ctrl_flows_for_lrouter(od, lsi->lflows); +-- +2.29.2 + diff --git a/SOURCES/0007-ovn-northd-Move-destination-handling-into-functions.patch b/SOURCES/0007-ovn-northd-Move-destination-handling-into-functions.patch new file mode 100644 index 0000000..68fd93e --- /dev/null +++ b/SOURCES/0007-ovn-northd-Move-destination-handling-into-functions.patch @@ -0,0 +1,506 @@ +From 137b049777cfc301eadba8a2c3b55764bde6f451 Mon Sep 17 00:00:00 2001 +Message-Id: <137b049777cfc301eadba8a2c3b55764bde6f451.1610458802.git.lorenzo.bianconi@redhat.com> +In-Reply-To: +References: +From: Anton Ivanov +Date: Tue, 5 Jan 2021 17:49:34 +0000 +Subject: [PATCH 07/16] ovn-northd: Move destination handling into functions. + +1. Move igmp/mld destination handling into a function. +2. Move unicast destination handling into a function. + +Signed-off-by: Anton Ivanov +Signed-off-by: Numan Siddique +Signed-off-by: Lorenzo Bianconi +--- + northd/ovn-northd.c | 433 +++++++++++++++++++++++--------------------- + 1 file changed, 223 insertions(+), 210 deletions(-) + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index f4e248f55..27a788095 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -6769,8 +6769,7 @@ is_vlan_transparent(const struct ovn_datapath *od) + + static void + build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +- struct hmap *lflows, struct hmap *mcgroups, +- struct hmap *igmp_groups) ++ struct hmap *lflows) + { + /* This flow table structure is documented in ovn-northd(8), so please + * update ovn-northd.8.xml if you change anything. */ +@@ -6778,212 +6777,6 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, + struct ds match = DS_EMPTY_INITIALIZER; + struct ds actions = DS_EMPTY_INITIALIZER; + struct ovn_datapath *od; +- struct ovn_port *op; +- +- +- /* Ingress table 19: Add IP multicast flows learnt from IGMP/MLD +- * (priority 90). */ +- struct ovn_igmp_group *igmp_group; +- +- HMAP_FOR_EACH (igmp_group, hmap_node, igmp_groups) { +- if (!igmp_group->datapath) { +- continue; +- } +- +- ds_clear(&match); +- ds_clear(&actions); +- +- struct mcast_switch_info *mcast_sw_info = +- &igmp_group->datapath->mcast_info.sw; +- +- if (IN6_IS_ADDR_V4MAPPED(&igmp_group->address)) { +- /* RFC 4541, section 2.1.2, item 2: Skip groups in the 224.0.0.X +- * range. +- */ +- ovs_be32 group_address = +- in6_addr_get_mapped_ipv4(&igmp_group->address); +- if (ip_is_local_multicast(group_address)) { +- continue; +- } +- +- if (mcast_sw_info->active_v4_flows >= mcast_sw_info->table_size) { +- continue; +- } +- mcast_sw_info->active_v4_flows++; +- ds_put_format(&match, "eth.mcast && ip4 && ip4.dst == %s ", +- igmp_group->mcgroup.name); +- } else { +- /* RFC 4291, section 2.7.1: Skip groups that correspond to all +- * hosts. +- */ +- if (ipv6_is_all_hosts(&igmp_group->address)) { +- continue; +- } +- if (mcast_sw_info->active_v6_flows >= mcast_sw_info->table_size) { +- continue; +- } +- mcast_sw_info->active_v6_flows++; +- ds_put_format(&match, "eth.mcast && ip6 && ip6.dst == %s ", +- igmp_group->mcgroup.name); +- } +- +- /* Also flood traffic to all multicast routers with relay enabled. */ +- if (mcast_sw_info->flood_relay) { +- ds_put_cstr(&actions, +- "clone { " +- "outport = \""MC_MROUTER_FLOOD "\"; " +- "output; " +- "};"); +- } +- if (mcast_sw_info->flood_static) { +- ds_put_cstr(&actions, +- "clone { " +- "outport =\""MC_STATIC"\"; " +- "output; " +- "};"); +- } +- ds_put_format(&actions, "outport = \"%s\"; output; ", +- igmp_group->mcgroup.name); +- +- ovn_lflow_add_unique(lflows, igmp_group->datapath, S_SWITCH_IN_L2_LKUP, +- 90, ds_cstr(&match), ds_cstr(&actions)); +- } +- +- /* Ingress table 19: Destination lookup, unicast handling (priority 50), */ +- HMAP_FOR_EACH (op, key_node, ports) { +- if (!op->nbsp || lsp_is_external(op->nbsp)) { +- continue; +- } +- +- /* For ports connected to logical routers add flows to bypass the +- * broadcast flooding of ARP/ND requests in table 19. We direct the +- * requests only to the router port that owns the IP address. +- */ +- if (lsp_is_router(op->nbsp)) { +- build_lswitch_rport_arp_req_flows(op->peer, op->od, op, lflows, +- &op->nbsp->header_); +- } +- +- for (size_t i = 0; i < op->nbsp->n_addresses; i++) { +- /* Addresses are owned by the logical port. +- * Ethernet address followed by zero or more IPv4 +- * or IPv6 addresses (or both). */ +- struct eth_addr mac; +- if (ovs_scan(op->nbsp->addresses[i], +- ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) { +- ds_clear(&match); +- ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT, +- ETH_ADDR_ARGS(mac)); +- +- ds_clear(&actions); +- ds_put_format(&actions, "outport = %s; output;", op->json_key); +- ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_L2_LKUP, +- 50, ds_cstr(&match), +- ds_cstr(&actions), +- &op->nbsp->header_); +- } else if (!strcmp(op->nbsp->addresses[i], "unknown")) { +- if (lsp_is_enabled(op->nbsp)) { +- ovn_multicast_add(mcgroups, &mc_unknown, op); +- op->od->has_unknown = true; +- } +- } else if (is_dynamic_lsp_address(op->nbsp->addresses[i])) { +- if (!op->nbsp->dynamic_addresses +- || !ovs_scan(op->nbsp->dynamic_addresses, +- ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) { +- continue; +- } +- ds_clear(&match); +- ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT, +- ETH_ADDR_ARGS(mac)); +- +- ds_clear(&actions); +- ds_put_format(&actions, "outport = %s; output;", op->json_key); +- ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_L2_LKUP, +- 50, ds_cstr(&match), +- ds_cstr(&actions), +- &op->nbsp->header_); +- } else if (!strcmp(op->nbsp->addresses[i], "router")) { +- if (!op->peer || !op->peer->nbrp +- || !ovs_scan(op->peer->nbrp->mac, +- ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) { +- continue; +- } +- ds_clear(&match); +- ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT, +- ETH_ADDR_ARGS(mac)); +- if (op->peer->od->l3dgw_port +- && op->peer->od->l3redirect_port +- && op->od->n_localnet_ports) { +- bool add_chassis_resident_check = false; +- if (op->peer == op->peer->od->l3dgw_port) { +- /* The peer of this port represents a distributed +- * gateway port. The destination lookup flow for the +- * router's distributed gateway port MAC address should +- * only be programmed on the gateway chassis. */ +- add_chassis_resident_check = true; +- } else { +- /* Check if the option 'reside-on-redirect-chassis' +- * is set to true on the peer port. If set to true +- * and if the logical switch has a localnet port, it +- * means the router pipeline for the packets from +- * this logical switch should be run on the chassis +- * hosting the gateway port. +- */ +- add_chassis_resident_check = smap_get_bool( +- &op->peer->nbrp->options, +- "reside-on-redirect-chassis", false); +- } +- +- if (add_chassis_resident_check) { +- ds_put_format(&match, " && is_chassis_resident(%s)", +- op->peer->od->l3redirect_port->json_key); +- } +- } +- +- ds_clear(&actions); +- ds_put_format(&actions, "outport = %s; output;", op->json_key); +- ovn_lflow_add_with_hint(lflows, op->od, +- S_SWITCH_IN_L2_LKUP, 50, +- ds_cstr(&match), ds_cstr(&actions), +- &op->nbsp->header_); +- +- /* Add ethernet addresses specified in NAT rules on +- * distributed logical routers. */ +- if (op->peer->od->l3dgw_port +- && op->peer == op->peer->od->l3dgw_port) { +- for (int j = 0; j < op->peer->od->nbr->n_nat; j++) { +- const struct nbrec_nat *nat +- = op->peer->od->nbr->nat[j]; +- if (!strcmp(nat->type, "dnat_and_snat") +- && nat->logical_port && nat->external_mac +- && eth_addr_from_string(nat->external_mac, &mac)) { +- +- ds_clear(&match); +- ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT +- " && is_chassis_resident(\"%s\")", +- ETH_ADDR_ARGS(mac), +- nat->logical_port); +- +- ds_clear(&actions); +- ds_put_format(&actions, "outport = %s; output;", +- op->json_key); +- ovn_lflow_add_with_hint(lflows, op->od, +- S_SWITCH_IN_L2_LKUP, 50, +- ds_cstr(&match), +- ds_cstr(&actions), +- &op->nbsp->header_); +- } +- } +- } +- } else { +- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); +- +- VLOG_INFO_RL(&rl, +- "%s: invalid syntax '%s' in addresses column", +- op->nbsp->name, op->nbsp->addresses[i]); +- } +- } +- } + + /* Ingress table 19: Destination lookup for unknown MACs (priority 0). */ + HMAP_FOR_EACH (od, key_node, datapaths) { +@@ -7492,6 +7285,218 @@ build_lswitch_destination_lookup_bmcast(struct ovn_datapath *od, + } + } + ++ ++/* Ingress table 19: Add IP multicast flows learnt from IGMP/MLD ++ * (priority 90). */ ++static void ++build_lswitch_ip_mcast_igmp_mld(struct ovn_igmp_group *igmp_group, ++ struct hmap *lflows, ++ struct ds *actions, ++ struct ds *match) ++{ ++ if (igmp_group->datapath) { ++ ++ ds_clear(match); ++ ds_clear(actions); ++ ++ struct mcast_switch_info *mcast_sw_info = ++ &igmp_group->datapath->mcast_info.sw; ++ ++ if (IN6_IS_ADDR_V4MAPPED(&igmp_group->address)) { ++ /* RFC 4541, section 2.1.2, item 2: Skip groups in the 224.0.0.X ++ * range. ++ */ ++ ovs_be32 group_address = ++ in6_addr_get_mapped_ipv4(&igmp_group->address); ++ if (ip_is_local_multicast(group_address)) { ++ return; ++ } ++ ++ if (mcast_sw_info->active_v4_flows >= mcast_sw_info->table_size) { ++ return; ++ } ++ mcast_sw_info->active_v4_flows++; ++ ds_put_format(match, "eth.mcast && ip4 && ip4.dst == %s ", ++ igmp_group->mcgroup.name); ++ } else { ++ /* RFC 4291, section 2.7.1: Skip groups that correspond to all ++ * hosts. ++ */ ++ if (ipv6_is_all_hosts(&igmp_group->address)) { ++ return; ++ } ++ if (mcast_sw_info->active_v6_flows >= mcast_sw_info->table_size) { ++ return; ++ } ++ mcast_sw_info->active_v6_flows++; ++ ds_put_format(match, "eth.mcast && ip6 && ip6.dst == %s ", ++ igmp_group->mcgroup.name); ++ } ++ ++ /* Also flood traffic to all multicast routers with relay enabled. */ ++ if (mcast_sw_info->flood_relay) { ++ ds_put_cstr(actions, ++ "clone { " ++ "outport = \""MC_MROUTER_FLOOD "\"; " ++ "output; " ++ "};"); ++ } ++ if (mcast_sw_info->flood_static) { ++ ds_put_cstr(actions, ++ "clone { " ++ "outport =\""MC_STATIC"\"; " ++ "output; " ++ "};"); ++ } ++ ds_put_format(actions, "outport = \"%s\"; output; ", ++ igmp_group->mcgroup.name); ++ ++ ovn_lflow_add_unique(lflows, igmp_group->datapath, S_SWITCH_IN_L2_LKUP, ++ 90, ds_cstr(match), ds_cstr(actions)); ++ } ++} ++ ++/* Ingress table 19: Destination lookup, unicast handling (priority 50), */ ++static void ++build_lswitch_ip_unicast_lookup(struct ovn_port *op, ++ struct hmap *lflows, ++ struct hmap *mcgroups, ++ struct ds *actions, ++ struct ds *match) ++{ ++ if (op->nbsp && (!lsp_is_external(op->nbsp))) { ++ ++ /* For ports connected to logical routers add flows to bypass the ++ * broadcast flooding of ARP/ND requests in table 19. We direct the ++ * requests only to the router port that owns the IP address. ++ */ ++ if (lsp_is_router(op->nbsp)) { ++ build_lswitch_rport_arp_req_flows(op->peer, op->od, op, lflows, ++ &op->nbsp->header_); ++ } ++ ++ for (size_t i = 0; i < op->nbsp->n_addresses; i++) { ++ /* Addresses are owned by the logical port. ++ * Ethernet address followed by zero or more IPv4 ++ * or IPv6 addresses (or both). */ ++ struct eth_addr mac; ++ if (ovs_scan(op->nbsp->addresses[i], ++ ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) { ++ ds_clear(match); ++ ds_put_format(match, "eth.dst == "ETH_ADDR_FMT, ++ ETH_ADDR_ARGS(mac)); ++ ++ ds_clear(actions); ++ ds_put_format(actions, "outport = %s; output;", op->json_key); ++ ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_L2_LKUP, ++ 50, ds_cstr(match), ++ ds_cstr(actions), ++ &op->nbsp->header_); ++ } else if (!strcmp(op->nbsp->addresses[i], "unknown")) { ++ if (lsp_is_enabled(op->nbsp)) { ++ ovn_multicast_add(mcgroups, &mc_unknown, op); ++ op->od->has_unknown = true; ++ } ++ } else if (is_dynamic_lsp_address(op->nbsp->addresses[i])) { ++ if (!op->nbsp->dynamic_addresses ++ || !ovs_scan(op->nbsp->dynamic_addresses, ++ ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) { ++ continue; ++ } ++ ds_clear(match); ++ ds_put_format(match, "eth.dst == "ETH_ADDR_FMT, ++ ETH_ADDR_ARGS(mac)); ++ ++ ds_clear(actions); ++ ds_put_format(actions, "outport = %s; output;", op->json_key); ++ ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_L2_LKUP, ++ 50, ds_cstr(match), ++ ds_cstr(actions), ++ &op->nbsp->header_); ++ } else if (!strcmp(op->nbsp->addresses[i], "router")) { ++ if (!op->peer || !op->peer->nbrp ++ || !ovs_scan(op->peer->nbrp->mac, ++ ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) { ++ continue; ++ } ++ ds_clear(match); ++ ds_put_format(match, "eth.dst == "ETH_ADDR_FMT, ++ ETH_ADDR_ARGS(mac)); ++ if (op->peer->od->l3dgw_port ++ && op->peer->od->l3redirect_port ++ && op->od->n_localnet_ports) { ++ bool add_chassis_resident_check = false; ++ if (op->peer == op->peer->od->l3dgw_port) { ++ /* The peer of this port represents a distributed ++ * gateway port. The destination lookup flow for the ++ * router's distributed gateway port MAC address should ++ * only be programmed on the gateway chassis. */ ++ add_chassis_resident_check = true; ++ } else { ++ /* Check if the option 'reside-on-redirect-chassis' ++ * is set to true on the peer port. If set to true ++ * and if the logical switch has a localnet port, it ++ * means the router pipeline for the packets from ++ * this logical switch should be run on the chassis ++ * hosting the gateway port. ++ */ ++ add_chassis_resident_check = smap_get_bool( ++ &op->peer->nbrp->options, ++ "reside-on-redirect-chassis", false); ++ } ++ ++ if (add_chassis_resident_check) { ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ op->peer->od->l3redirect_port->json_key); ++ } ++ } ++ ++ ds_clear(actions); ++ ds_put_format(actions, "outport = %s; output;", op->json_key); ++ ovn_lflow_add_with_hint(lflows, op->od, ++ S_SWITCH_IN_L2_LKUP, 50, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbsp->header_); ++ ++ /* Add ethernet addresses specified in NAT rules on ++ * distributed logical routers. */ ++ if (op->peer->od->l3dgw_port ++ && op->peer == op->peer->od->l3dgw_port) { ++ for (int j = 0; j < op->peer->od->nbr->n_nat; j++) { ++ const struct nbrec_nat *nat ++ = op->peer->od->nbr->nat[j]; ++ if (!strcmp(nat->type, "dnat_and_snat") ++ && nat->logical_port && nat->external_mac ++ && eth_addr_from_string(nat->external_mac, &mac)) { ++ ++ ds_clear(match); ++ ds_put_format(match, "eth.dst == "ETH_ADDR_FMT ++ " && is_chassis_resident(\"%s\")", ++ ETH_ADDR_ARGS(mac), ++ nat->logical_port); ++ ++ ds_clear(actions); ++ ds_put_format(actions, "outport = %s; output;", ++ op->json_key); ++ ovn_lflow_add_with_hint(lflows, op->od, ++ S_SWITCH_IN_L2_LKUP, 50, ++ ds_cstr(match), ++ ds_cstr(actions), ++ &op->nbsp->header_); ++ } ++ } ++ } ++ } else { ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); ++ ++ VLOG_INFO_RL(&rl, ++ "%s: invalid syntax '%s' in addresses column", ++ op->nbsp->name, op->nbsp->addresses[i]); ++ } ++ } ++ } ++} ++ + /* Returns a string of the IP address of the router port 'op' that + * overlaps with 'ip_s". If one is not found, returns NULL. + * +@@ -11384,6 +11389,8 @@ build_lswitch_and_lrouter_iterate_by_op(struct ovn_port *op, + &lsi->match); + build_lswitch_dhcp_options_and_response(op,lsi->lflows); + build_lswitch_external_port(op, lsi->lflows); ++ build_lswitch_ip_unicast_lookup(op, lsi->lflows, lsi->mcgroups, ++ &lsi->actions, &lsi->match); + + /* Build Logical Router Flows. */ + build_adm_ctrl_flows_for_lrouter_port(op, lsi->lflows, &lsi->match, +@@ -11412,6 +11419,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + struct ovn_datapath *od; + struct ovn_port *op; + struct ovn_northd_lb *lb; ++ struct ovn_igmp_group *igmp_group; + + char *svc_check_match = xasprintf("eth.dst == %s", svc_monitor_mac); + +@@ -11443,14 +11451,19 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + &lsi.actions, + &lsi.match); + } ++ HMAP_FOR_EACH (igmp_group, hmap_node, igmp_groups) { ++ build_lswitch_ip_mcast_igmp_mld(igmp_group, ++ lsi.lflows, ++ &lsi.actions, ++ &lsi.match); ++ } + free(svc_check_match); + + ds_destroy(&lsi.match); + ds_destroy(&lsi.actions); + + /* Legacy lswitch build - to be migrated. */ +- build_lswitch_flows(datapaths, ports, lflows, mcgroups, +- igmp_groups); ++ build_lswitch_flows(datapaths, ports, lflows); + + /* Legacy lrouter build - to be migrated. */ + build_lrouter_flows(datapaths, ports, lflows, meter_groups, lbs); +-- +2.29.2 + diff --git a/SOURCES/0008-ovn-northd-split-build_lswitch_output_port_sec-into-.patch b/SOURCES/0008-ovn-northd-split-build_lswitch_output_port_sec-into-.patch new file mode 100644 index 0000000..4982ad2 --- /dev/null +++ b/SOURCES/0008-ovn-northd-split-build_lswitch_output_port_sec-into-.patch @@ -0,0 +1,181 @@ +From a6b4b14ac1b6523f85fb13a7f259d9698a70444f Mon Sep 17 00:00:00 2001 +Message-Id: +In-Reply-To: +References: +From: Anton Ivanov +Date: Tue, 5 Jan 2021 17:49:35 +0000 +Subject: [PATCH 08/16] ovn-northd: split build_lswitch_output_port_sec into + iterators. + +Split build_lswitch_output_port_sec into a per port and per +datapath iterator. Migrate to the relevant per-port and +per-datapath loops. + +Signed-off-by: Anton Ivanov +Signed-off-by: Numan Siddique +Signed-off-by: Lorenzo Bianconi +--- + northd/ovn-northd.c | 82 ++++++++++++++++++++------------------------- + 1 file changed, 37 insertions(+), 45 deletions(-) + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 27a788095..92300e017 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -4917,51 +4917,47 @@ build_lswitch_input_port_sec_od( + } + } + ++/* Egress table 8: Egress port security - IP (priorities 90 and 80) ++ * if port security enabled. ++ * ++ * Egress table 9: Egress port security - L2 (priorities 50 and 150). ++ * ++ * Priority 50 rules implement port security for enabled logical port. ++ * ++ * Priority 150 rules drop packets to disabled logical ports, so that ++ * they don't even receive multicast or broadcast packets. ++ */ + static void +-build_lswitch_output_port_sec(struct hmap *ports, struct hmap *datapaths, +- struct hmap *lflows) ++build_lswitch_output_port_sec_op(struct ovn_port *op, ++ struct hmap *lflows, ++ struct ds *match, ++ struct ds *actions) + { +- struct ds actions = DS_EMPTY_INITIALIZER; +- struct ds match = DS_EMPTY_INITIALIZER; +- struct ovn_port *op; + +- /* Egress table 8: Egress port security - IP (priorities 90 and 80) +- * if port security enabled. +- * +- * Egress table 9: Egress port security - L2 (priorities 50 and 150). +- * +- * Priority 50 rules implement port security for enabled logical port. +- * +- * Priority 150 rules drop packets to disabled logical ports, so that +- * they don't even receive multicast or broadcast packets. +- */ +- HMAP_FOR_EACH (op, key_node, ports) { +- if (!op->nbsp || lsp_is_external(op->nbsp)) { +- continue; +- } ++ if (op->nbsp && (!lsp_is_external(op->nbsp))) { + +- ds_clear(&actions); +- ds_clear(&match); ++ ds_clear(actions); ++ ds_clear(match); + +- ds_put_format(&match, "outport == %s", op->json_key); ++ ds_put_format(match, "outport == %s", op->json_key); + if (lsp_is_enabled(op->nbsp)) { + build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs, +- &match); ++ match); + + if (!strcmp(op->nbsp->type, "localnet")) { + const char *queue_id = smap_get(&op->sb->options, + "qdisc_queue_id"); + if (queue_id) { +- ds_put_format(&actions, "set_queue(%s); ", queue_id); ++ ds_put_format(actions, "set_queue(%s); ", queue_id); + } + } +- ds_put_cstr(&actions, "output;"); ++ ds_put_cstr(actions, "output;"); + ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, +- 50, ds_cstr(&match), ds_cstr(&actions), ++ 50, ds_cstr(match), ds_cstr(actions), + &op->nbsp->header_); + } else { + ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, +- 150, ds_cstr(&match), "drop;", ++ 150, ds_cstr(match), "drop;", + &op->nbsp->header_); + } + +@@ -4969,23 +4965,20 @@ build_lswitch_output_port_sec(struct hmap *ports, struct hmap *datapaths, + build_port_security_ip(P_OUT, op, lflows, &op->nbsp->header_); + } + } ++} + +- /* Egress tables 8: Egress port security - IP (priority 0) +- * Egress table 9: Egress port security L2 - multicast/broadcast +- * (priority 100). */ +- struct ovn_datapath *od; +- HMAP_FOR_EACH (od, key_node, datapaths) { +- if (!od->nbs) { +- continue; +- } +- ++/* Egress tables 8: Egress port security - IP (priority 0) ++ * Egress table 9: Egress port security L2 - multicast/broadcast ++ * (priority 100). */ ++static void ++build_lswitch_output_port_sec_od(struct ovn_datapath *od, ++ struct hmap *lflows) ++{ ++ if (od->nbs) { + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;"); + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast", + "output;"); + } +- +- ds_destroy(&match); +- ds_destroy(&actions); + } + + static void +@@ -6768,8 +6761,7 @@ is_vlan_transparent(const struct ovn_datapath *od) + } + + static void +-build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, +- struct hmap *lflows) ++build_lswitch_flows(struct hmap *datapaths, struct hmap *lflows) + { + /* This flow table structure is documented in ovn-northd(8), so please + * update ovn-northd.8.xml if you change anything. */ +@@ -6790,8 +6782,6 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, + } + } + +- build_lswitch_output_port_sec(ports, datapaths, lflows); +- + ds_destroy(&match); + ds_destroy(&actions); + } +@@ -11351,6 +11341,7 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od, + build_lswitch_dns_lookup_and_response(od, lsi->lflows); + build_lswitch_dhcp_and_dns_defaults(od, lsi->lflows); + build_lswitch_destination_lookup_bmcast(od, lsi->lflows, &lsi->actions); ++ build_lswitch_output_port_sec_od(od, lsi->lflows); + + /* Build Logical Router Flows. */ + build_adm_ctrl_flows_for_lrouter(od, lsi->lflows); +@@ -11391,6 +11382,8 @@ build_lswitch_and_lrouter_iterate_by_op(struct ovn_port *op, + build_lswitch_external_port(op, lsi->lflows); + build_lswitch_ip_unicast_lookup(op, lsi->lflows, lsi->mcgroups, + &lsi->actions, &lsi->match); ++ build_lswitch_output_port_sec_op(op, lsi->lflows, ++ &lsi->actions, &lsi->match); + + /* Build Logical Router Flows. */ + build_adm_ctrl_flows_for_lrouter_port(op, lsi->lflows, &lsi->match, +@@ -11462,8 +11455,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + ds_destroy(&lsi.match); + ds_destroy(&lsi.actions); + +- /* Legacy lswitch build - to be migrated. */ +- build_lswitch_flows(datapaths, ports, lflows); ++ build_lswitch_flows(datapaths, lflows); + + /* Legacy lrouter build - to be migrated. */ + build_lrouter_flows(datapaths, ports, lflows, meter_groups, lbs); +-- +2.29.2 + diff --git a/SOURCES/0009-ovn-northd-Move-lrouter-arp-and-nd-datapath-processi.patch b/SOURCES/0009-ovn-northd-Move-lrouter-arp-and-nd-datapath-processi.patch new file mode 100644 index 0000000..2c011c7 --- /dev/null +++ b/SOURCES/0009-ovn-northd-Move-lrouter-arp-and-nd-datapath-processi.patch @@ -0,0 +1,140 @@ +From 34c2afc7d49f735e825e0d01bf1b2b64bb277f76 Mon Sep 17 00:00:00 2001 +Message-Id: <34c2afc7d49f735e825e0d01bf1b2b64bb277f76.1610458802.git.lorenzo.bianconi@redhat.com> +In-Reply-To: +References: +From: Anton Ivanov +Date: Tue, 5 Jan 2021 17:49:36 +0000 +Subject: [PATCH 09/16] ovn-northd: Move lrouter arp and nd datapath processing + to a function. + +Signed-off-by: Anton Ivanov +Signed-off-by: Numan Siddique +Signed-off-by: Lorenzo Bianconi +--- + northd/ovn-northd.c | 96 +++++++++++++++++++++++---------------------- + 1 file changed, 50 insertions(+), 46 deletions(-) + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 92300e017..7f7bb07be 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -8937,52 +8937,6 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + struct ovn_datapath *od; + struct ovn_port *op; + +- HMAP_FOR_EACH (od, key_node, datapaths) { +- if (!od->nbr) { +- continue; +- } +- +- /* Priority-90-92 flows handle ARP requests and ND packets. Most are +- * per logical port but DNAT addresses can be handled per datapath +- * for non gateway router ports. +- * +- * Priority 91 and 92 flows are added for each gateway router +- * port to handle the special cases. In case we get the packet +- * on a regular port, just reply with the port's ETH address. +- */ +- for (int i = 0; i < od->nbr->n_nat; i++) { +- struct ovn_nat *nat_entry = &od->nat_entries[i]; +- +- /* Skip entries we failed to parse. */ +- if (!nat_entry_is_valid(nat_entry)) { +- continue; +- } +- +- /* Skip SNAT entries for now, we handle unique SNAT IPs separately +- * below. +- */ +- if (!strcmp(nat_entry->nb->type, "snat")) { +- continue; +- } +- build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows); +- } +- +- /* Now handle SNAT entries too, one per unique SNAT IP. */ +- struct shash_node *snat_snode; +- SHASH_FOR_EACH (snat_snode, &od->snat_ips) { +- struct ovn_snat_ip *snat_ip = snat_snode->data; +- +- if (ovs_list_is_empty(&snat_ip->snat_entries)) { +- continue; +- } +- +- struct ovn_nat *nat_entry = +- CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries), +- struct ovn_nat, ext_addr_list_node); +- build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows); +- } +- } +- + /* Logical router ingress table 3: IP Input for IPv4. */ + HMAP_FOR_EACH (op, key_node, ports) { + if (!op->nbrp) { +@@ -11308,6 +11262,55 @@ build_ipv6_input_flows_for_lrouter_port( + + } + ++static void ++build_lrouter_arp_nd_for_datapath(struct ovn_datapath *od, ++ struct hmap *lflows) ++{ ++ if (od->nbr) { ++ ++ /* Priority-90-92 flows handle ARP requests and ND packets. Most are ++ * per logical port but DNAT addresses can be handled per datapath ++ * for non gateway router ports. ++ * ++ * Priority 91 and 92 flows are added for each gateway router ++ * port to handle the special cases. In case we get the packet ++ * on a regular port, just reply with the port's ETH address. ++ */ ++ for (int i = 0; i < od->nbr->n_nat; i++) { ++ struct ovn_nat *nat_entry = &od->nat_entries[i]; ++ ++ /* Skip entries we failed to parse. */ ++ if (!nat_entry_is_valid(nat_entry)) { ++ continue; ++ } ++ ++ /* Skip SNAT entries for now, we handle unique SNAT IPs separately ++ * below. ++ */ ++ if (!strcmp(nat_entry->nb->type, "snat")) { ++ continue; ++ } ++ build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows); ++ } ++ ++ /* Now handle SNAT entries too, one per unique SNAT IP. */ ++ struct shash_node *snat_snode; ++ SHASH_FOR_EACH (snat_snode, &od->snat_ips) { ++ struct ovn_snat_ip *snat_ip = snat_snode->data; ++ ++ if (ovs_list_is_empty(&snat_ip->snat_entries)) { ++ continue; ++ } ++ ++ struct ovn_nat *nat_entry = ++ CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries), ++ struct ovn_nat, ext_addr_list_node); ++ build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows); ++ } ++ } ++} ++ ++ + struct lswitch_flow_build_info { + struct hmap *datapaths; + struct hmap *ports; +@@ -11360,6 +11363,7 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od, + build_arp_request_flows_for_lrouter(od, lsi->lflows, &lsi->match, + &lsi->actions); + build_misc_local_traffic_drop_flows_for_lrouter(od, lsi->lflows); ++ build_lrouter_arp_nd_for_datapath(od, lsi->lflows); + } + + /* Helper function to combine all lflow generation which is iterated by port. +-- +2.29.2 + diff --git a/SOURCES/0010-ovn-northd-Move-ipv4-input-to-a-function.patch b/SOURCES/0010-ovn-northd-Move-ipv4-input-to-a-function.patch new file mode 100644 index 0000000..d15e521 --- /dev/null +++ b/SOURCES/0010-ovn-northd-Move-ipv4-input-to-a-function.patch @@ -0,0 +1,556 @@ +From 761f760a42d97184c870e892d299587e657a2c52 Mon Sep 17 00:00:00 2001 +Message-Id: <761f760a42d97184c870e892d299587e657a2c52.1610458802.git.lorenzo.bianconi@redhat.com> +In-Reply-To: +References: +From: Anton Ivanov +Date: Tue, 5 Jan 2021 17:49:37 +0000 +Subject: [PATCH 10/16] ovn-northd: Move ipv4 input to a function. + +Signed-off-by: Anton Ivanov +Signed-off-by: Numan Siddique +Signed-off-by: Lorenzo Bianconi +--- + northd/ovn-northd.c | 499 ++++++++++++++++++++++---------------------- + 1 file changed, 249 insertions(+), 250 deletions(-) + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 7f7bb07be..f9b8d588b 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -8924,7 +8924,7 @@ build_lrouter_force_snat_flows(struct hmap *lflows, struct ovn_datapath *od, + } + + static void +-build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, ++build_lrouter_flows(struct hmap *datapaths, + struct hmap *lflows, struct shash *meter_groups, + struct hmap *lbs) + { +@@ -8935,254 +8935,6 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + struct ds actions = DS_EMPTY_INITIALIZER; + + struct ovn_datapath *od; +- struct ovn_port *op; +- +- /* Logical router ingress table 3: IP Input for IPv4. */ +- HMAP_FOR_EACH (op, key_node, ports) { +- if (!op->nbrp) { +- continue; +- } +- +- if (op->derived) { +- /* No ingress packets are accepted on a chassisredirect +- * port, so no need to program flows for that port. */ +- continue; +- } +- +- if (op->lrp_networks.n_ipv4_addrs) { +- /* L3 admission control: drop packets that originate from an +- * IPv4 address owned by the router or a broadcast address +- * known to the router (priority 100). */ +- ds_clear(&match); +- ds_put_cstr(&match, "ip4.src == "); +- op_put_v4_networks(&match, op, true); +- ds_put_cstr(&match, " && "REGBIT_EGRESS_LOOPBACK" == 0"); +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, +- ds_cstr(&match), "drop;", +- &op->nbrp->header_); +- +- /* ICMP echo reply. These flows reply to ICMP echo requests +- * received for the router's IP address. Since packets only +- * get here as part of the logical router datapath, the inport +- * (i.e. the incoming locally attached net) does not matter. +- * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */ +- ds_clear(&match); +- ds_put_cstr(&match, "ip4.dst == "); +- op_put_v4_networks(&match, op, false); +- ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0"); +- +- const char * icmp_actions = "ip4.dst <-> ip4.src; " +- "ip.ttl = 255; " +- "icmp4.type = 0; " +- "flags.loopback = 1; " +- "next; "; +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, +- ds_cstr(&match), icmp_actions, +- &op->nbrp->header_); +- } +- +- /* ICMP time exceeded */ +- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { +- ds_clear(&match); +- ds_clear(&actions); +- +- ds_put_format(&match, +- "inport == %s && ip4 && " +- "ip.ttl == {0, 1} && !ip.later_frag", op->json_key); +- ds_put_format(&actions, +- "icmp4 {" +- "eth.dst <-> eth.src; " +- "icmp4.type = 11; /* Time exceeded */ " +- "icmp4.code = 0; /* TTL exceeded in transit */ " +- "ip4.dst = ip4.src; " +- "ip4.src = %s; " +- "ip.ttl = 255; " +- "next; };", +- op->lrp_networks.ipv4_addrs[i].addr_s); +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40, +- ds_cstr(&match), ds_cstr(&actions), +- &op->nbrp->header_); +- } +- +- /* ARP reply. These flows reply to ARP requests for the router's own +- * IP address. */ +- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { +- ds_clear(&match); +- ds_put_format(&match, "arp.spa == %s/%u", +- op->lrp_networks.ipv4_addrs[i].network_s, +- op->lrp_networks.ipv4_addrs[i].plen); +- +- if (op->od->l3dgw_port && op->od->l3redirect_port && op->peer +- && op->peer->od->n_localnet_ports) { +- bool add_chassis_resident_check = false; +- if (op == op->od->l3dgw_port) { +- /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s +- * should only be sent from the gateway chassis, so that +- * upstream MAC learning points to the gateway chassis. +- * Also need to avoid generation of multiple ARP responses +- * from different chassis. */ +- add_chassis_resident_check = true; +- } else { +- /* Check if the option 'reside-on-redirect-chassis' +- * is set to true on the router port. If set to true +- * and if peer's logical switch has a localnet port, it +- * means the router pipeline for the packets from +- * peer's logical switch is be run on the chassis +- * hosting the gateway port and it should reply to the +- * ARP requests for the router port IPs. +- */ +- add_chassis_resident_check = smap_get_bool( +- &op->nbrp->options, +- "reside-on-redirect-chassis", false); +- } +- +- if (add_chassis_resident_check) { +- ds_put_format(&match, " && is_chassis_resident(%s)", +- op->od->l3redirect_port->json_key); +- } +- } +- +- build_lrouter_arp_flow(op->od, op, +- op->lrp_networks.ipv4_addrs[i].addr_s, +- REG_INPORT_ETH_ADDR, &match, false, 90, +- &op->nbrp->header_, lflows); +- } +- +- /* A set to hold all load-balancer vips that need ARP responses. */ +- struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4); +- struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6); +- get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6); +- +- const char *ip_address; +- SSET_FOR_EACH (ip_address, &all_ips_v4) { +- ds_clear(&match); +- if (op == op->od->l3dgw_port) { +- ds_put_format(&match, "is_chassis_resident(%s)", +- op->od->l3redirect_port->json_key); +- } +- +- build_lrouter_arp_flow(op->od, op, +- ip_address, REG_INPORT_ETH_ADDR, +- &match, false, 90, NULL, lflows); +- } +- +- SSET_FOR_EACH (ip_address, &all_ips_v6) { +- ds_clear(&match); +- if (op == op->od->l3dgw_port) { +- ds_put_format(&match, "is_chassis_resident(%s)", +- op->od->l3redirect_port->json_key); +- } +- +- build_lrouter_nd_flow(op->od, op, "nd_na", +- ip_address, NULL, REG_INPORT_ETH_ADDR, +- &match, false, 90, NULL, lflows); +- } +- +- sset_destroy(&all_ips_v4); +- sset_destroy(&all_ips_v6); +- +- if (!smap_get(&op->od->nbr->options, "chassis") +- && !op->od->l3dgw_port) { +- /* UDP/TCP port unreachable. */ +- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { +- ds_clear(&match); +- ds_put_format(&match, +- "ip4 && ip4.dst == %s && !ip.later_frag && udp", +- op->lrp_networks.ipv4_addrs[i].addr_s); +- const char *action = "icmp4 {" +- "eth.dst <-> eth.src; " +- "ip4.dst <-> ip4.src; " +- "ip.ttl = 255; " +- "icmp4.type = 3; " +- "icmp4.code = 3; " +- "next; };"; +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, +- 80, ds_cstr(&match), action, +- &op->nbrp->header_); +- +- ds_clear(&match); +- ds_put_format(&match, +- "ip4 && ip4.dst == %s && !ip.later_frag && tcp", +- op->lrp_networks.ipv4_addrs[i].addr_s); +- action = "tcp_reset {" +- "eth.dst <-> eth.src; " +- "ip4.dst <-> ip4.src; " +- "next; };"; +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, +- 80, ds_cstr(&match), action, +- &op->nbrp->header_); +- +- ds_clear(&match); +- ds_put_format(&match, +- "ip4 && ip4.dst == %s && !ip.later_frag", +- op->lrp_networks.ipv4_addrs[i].addr_s); +- action = "icmp4 {" +- "eth.dst <-> eth.src; " +- "ip4.dst <-> ip4.src; " +- "ip.ttl = 255; " +- "icmp4.type = 3; " +- "icmp4.code = 2; " +- "next; };"; +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, +- 70, ds_cstr(&match), action, +- &op->nbrp->header_); +- } +- } +- +- /* Drop IP traffic destined to router owned IPs except if the IP is +- * also a SNAT IP. Those are dropped later, in stage +- * "lr_in_arp_resolve", if unSNAT was unsuccessful. +- * +- * Priority 60. +- */ +- build_lrouter_drop_own_dest(op, S_ROUTER_IN_IP_INPUT, 60, false, +- lflows); +- +- /* ARP / ND handling for external IP addresses. +- * +- * DNAT and SNAT IP addresses are external IP addresses that need ARP +- * handling. +- * +- * These are already taken care globally, per router. The only +- * exception is on the l3dgw_port where we might need to use a +- * different ETH address. +- */ +- if (op != op->od->l3dgw_port) { +- continue; +- } +- +- for (size_t i = 0; i < op->od->nbr->n_nat; i++) { +- struct ovn_nat *nat_entry = &op->od->nat_entries[i]; +- +- /* Skip entries we failed to parse. */ +- if (!nat_entry_is_valid(nat_entry)) { +- continue; +- } +- +- /* Skip SNAT entries for now, we handle unique SNAT IPs separately +- * below. +- */ +- if (!strcmp(nat_entry->nb->type, "snat")) { +- continue; +- } +- build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows); +- } +- +- /* Now handle SNAT entries too, one per unique SNAT IP. */ +- struct shash_node *snat_snode; +- SHASH_FOR_EACH (snat_snode, &op->od->snat_ips) { +- struct ovn_snat_ip *snat_ip = snat_snode->data; +- +- if (ovs_list_is_empty(&snat_ip->snat_entries)) { +- continue; +- } +- +- struct ovn_nat *nat_entry = +- CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries), +- struct ovn_nat, ext_addr_list_node); +- build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows); +- } +- } + + /* NAT, Defrag and load balancing. */ + HMAP_FOR_EACH (od, key_node, datapaths) { +@@ -11310,6 +11062,251 @@ build_lrouter_arp_nd_for_datapath(struct ovn_datapath *od, + } + } + ++/* Logical router ingress table 3: IP Input for IPv4. */ ++static void ++build_lrouter_ipv4_ip_input(struct ovn_port *op, ++ struct hmap *lflows, ++ struct ds *match, struct ds *actions) ++{ ++ /* No ingress packets are accepted on a chassisredirect ++ * port, so no need to program flows for that port. */ ++ if (op->nbrp && (!op->derived)) { ++ if (op->lrp_networks.n_ipv4_addrs) { ++ /* L3 admission control: drop packets that originate from an ++ * IPv4 address owned by the router or a broadcast address ++ * known to the router (priority 100). */ ++ ds_clear(match); ++ ds_put_cstr(match, "ip4.src == "); ++ op_put_v4_networks(match, op, true); ++ ds_put_cstr(match, " && "REGBIT_EGRESS_LOOPBACK" == 0"); ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, ++ ds_cstr(match), "drop;", ++ &op->nbrp->header_); ++ ++ /* ICMP echo reply. These flows reply to ICMP echo requests ++ * received for the router's IP address. Since packets only ++ * get here as part of the logical router datapath, the inport ++ * (i.e. the incoming locally attached net) does not matter. ++ * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */ ++ ds_clear(match); ++ ds_put_cstr(match, "ip4.dst == "); ++ op_put_v4_networks(match, op, false); ++ ds_put_cstr(match, " && icmp4.type == 8 && icmp4.code == 0"); ++ ++ const char * icmp_actions = "ip4.dst <-> ip4.src; " ++ "ip.ttl = 255; " ++ "icmp4.type = 0; " ++ "flags.loopback = 1; " ++ "next; "; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, ++ ds_cstr(match), icmp_actions, ++ &op->nbrp->header_); ++ } ++ ++ /* ICMP time exceeded */ ++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { ++ ds_clear(match); ++ ds_clear(actions); ++ ++ ds_put_format(match, ++ "inport == %s && ip4 && " ++ "ip.ttl == {0, 1} && !ip.later_frag", op->json_key); ++ ds_put_format(actions, ++ "icmp4 {" ++ "eth.dst <-> eth.src; " ++ "icmp4.type = 11; /* Time exceeded */ " ++ "icmp4.code = 0; /* TTL exceeded in transit */ " ++ "ip4.dst = ip4.src; " ++ "ip4.src = %s; " ++ "ip.ttl = 255; " ++ "next; };", ++ op->lrp_networks.ipv4_addrs[i].addr_s); ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbrp->header_); ++ } ++ ++ /* ARP reply. These flows reply to ARP requests for the router's own ++ * IP address. */ ++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { ++ ds_clear(match); ++ ds_put_format(match, "arp.spa == %s/%u", ++ op->lrp_networks.ipv4_addrs[i].network_s, ++ op->lrp_networks.ipv4_addrs[i].plen); ++ ++ if (op->od->l3dgw_port && op->od->l3redirect_port && op->peer ++ && op->peer->od->n_localnet_ports) { ++ bool add_chassis_resident_check = false; ++ if (op == op->od->l3dgw_port) { ++ /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s ++ * should only be sent from the gateway chassis, so that ++ * upstream MAC learning points to the gateway chassis. ++ * Also need to avoid generation of multiple ARP responses ++ * from different chassis. */ ++ add_chassis_resident_check = true; ++ } else { ++ /* Check if the option 'reside-on-redirect-chassis' ++ * is set to true on the router port. If set to true ++ * and if peer's logical switch has a localnet port, it ++ * means the router pipeline for the packets from ++ * peer's logical switch is be run on the chassis ++ * hosting the gateway port and it should reply to the ++ * ARP requests for the router port IPs. ++ */ ++ add_chassis_resident_check = smap_get_bool( ++ &op->nbrp->options, ++ "reside-on-redirect-chassis", false); ++ } ++ ++ if (add_chassis_resident_check) { ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ op->od->l3redirect_port->json_key); ++ } ++ } ++ ++ build_lrouter_arp_flow(op->od, op, ++ op->lrp_networks.ipv4_addrs[i].addr_s, ++ REG_INPORT_ETH_ADDR, match, false, 90, ++ &op->nbrp->header_, lflows); ++ } ++ ++ /* A set to hold all load-balancer vips that need ARP responses. */ ++ struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4); ++ struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6); ++ get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6); ++ ++ const char *ip_address; ++ SSET_FOR_EACH (ip_address, &all_ips_v4) { ++ ds_clear(match); ++ if (op == op->od->l3dgw_port) { ++ ds_put_format(match, "is_chassis_resident(%s)", ++ op->od->l3redirect_port->json_key); ++ } ++ ++ build_lrouter_arp_flow(op->od, op, ++ ip_address, REG_INPORT_ETH_ADDR, ++ match, false, 90, NULL, lflows); ++ } ++ ++ SSET_FOR_EACH (ip_address, &all_ips_v6) { ++ ds_clear(match); ++ if (op == op->od->l3dgw_port) { ++ ds_put_format(match, "is_chassis_resident(%s)", ++ op->od->l3redirect_port->json_key); ++ } ++ ++ build_lrouter_nd_flow(op->od, op, "nd_na", ++ ip_address, NULL, REG_INPORT_ETH_ADDR, ++ match, false, 90, NULL, lflows); ++ } ++ ++ sset_destroy(&all_ips_v4); ++ sset_destroy(&all_ips_v6); ++ ++ if (!smap_get(&op->od->nbr->options, "chassis") ++ && !op->od->l3dgw_port) { ++ /* UDP/TCP port unreachable. */ ++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { ++ ds_clear(match); ++ ds_put_format(match, ++ "ip4 && ip4.dst == %s && !ip.later_frag && udp", ++ op->lrp_networks.ipv4_addrs[i].addr_s); ++ const char *action = "icmp4 {" ++ "eth.dst <-> eth.src; " ++ "ip4.dst <-> ip4.src; " ++ "ip.ttl = 255; " ++ "icmp4.type = 3; " ++ "icmp4.code = 3; " ++ "next; };"; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, ++ 80, ds_cstr(match), action, ++ &op->nbrp->header_); ++ ++ ds_clear(match); ++ ds_put_format(match, ++ "ip4 && ip4.dst == %s && !ip.later_frag && tcp", ++ op->lrp_networks.ipv4_addrs[i].addr_s); ++ action = "tcp_reset {" ++ "eth.dst <-> eth.src; " ++ "ip4.dst <-> ip4.src; " ++ "next; };"; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, ++ 80, ds_cstr(match), action, ++ &op->nbrp->header_); ++ ++ ds_clear(match); ++ ds_put_format(match, ++ "ip4 && ip4.dst == %s && !ip.later_frag", ++ op->lrp_networks.ipv4_addrs[i].addr_s); ++ action = "icmp4 {" ++ "eth.dst <-> eth.src; " ++ "ip4.dst <-> ip4.src; " ++ "ip.ttl = 255; " ++ "icmp4.type = 3; " ++ "icmp4.code = 2; " ++ "next; };"; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, ++ 70, ds_cstr(match), action, ++ &op->nbrp->header_); ++ } ++ } ++ ++ /* Drop IP traffic destined to router owned IPs except if the IP is ++ * also a SNAT IP. Those are dropped later, in stage ++ * "lr_in_arp_resolve", if unSNAT was unsuccessful. ++ * ++ * Priority 60. ++ */ ++ build_lrouter_drop_own_dest(op, S_ROUTER_IN_IP_INPUT, 60, false, ++ lflows); ++ ++ /* ARP / ND handling for external IP addresses. ++ * ++ * DNAT and SNAT IP addresses are external IP addresses that need ARP ++ * handling. ++ * ++ * These are already taken care globally, per router. The only ++ * exception is on the l3dgw_port where we might need to use a ++ * different ETH address. ++ */ ++ if (op != op->od->l3dgw_port) { ++ return; ++ } ++ ++ for (size_t i = 0; i < op->od->nbr->n_nat; i++) { ++ struct ovn_nat *nat_entry = &op->od->nat_entries[i]; ++ ++ /* Skip entries we failed to parse. */ ++ if (!nat_entry_is_valid(nat_entry)) { ++ continue; ++ } ++ ++ /* Skip SNAT entries for now, we handle unique SNAT IPs separately ++ * below. ++ */ ++ if (!strcmp(nat_entry->nb->type, "snat")) { ++ continue; ++ } ++ build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows); ++ } ++ ++ /* Now handle SNAT entries too, one per unique SNAT IP. */ ++ struct shash_node *snat_snode; ++ SHASH_FOR_EACH (snat_snode, &op->od->snat_ips) { ++ struct ovn_snat_ip *snat_ip = snat_snode->data; ++ ++ if (ovs_list_is_empty(&snat_ip->snat_entries)) { ++ continue; ++ } ++ ++ struct ovn_nat *nat_entry = ++ CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries), ++ struct ovn_nat, ext_addr_list_node); ++ build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows); ++ } ++ } ++} ++ + + struct lswitch_flow_build_info { + struct hmap *datapaths; +@@ -11404,6 +11401,8 @@ build_lswitch_and_lrouter_iterate_by_op(struct ovn_port *op, + build_dhcpv6_reply_flows_for_lrouter_port(op, lsi->lflows, &lsi->match); + build_ipv6_input_flows_for_lrouter_port(op, lsi->lflows, + &lsi->match, &lsi->actions); ++ build_lrouter_ipv4_ip_input(op, lsi->lflows, ++ &lsi->match, &lsi->actions); + } + + static void +@@ -11462,7 +11461,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + build_lswitch_flows(datapaths, lflows); + + /* Legacy lrouter build - to be migrated. */ +- build_lrouter_flows(datapaths, ports, lflows, meter_groups, lbs); ++ build_lrouter_flows(datapaths, lflows, meter_groups, lbs); + } + + struct ovn_dp_group { +-- +2.29.2 + diff --git a/SOURCES/0011-ovn-northd-move-NAT-Defrag-and-lb-to-a-function.patch b/SOURCES/0011-ovn-northd-move-NAT-Defrag-and-lb-to-a-function.patch new file mode 100644 index 0000000..a27b30b --- /dev/null +++ b/SOURCES/0011-ovn-northd-move-NAT-Defrag-and-lb-to-a-function.patch @@ -0,0 +1,4489 @@ +From 7699c1043a3fec9eb215fc430202ca01846c505e Mon Sep 17 00:00:00 2001 +Message-Id: <7699c1043a3fec9eb215fc430202ca01846c505e.1610458802.git.lorenzo.bianconi@redhat.com> +In-Reply-To: +References: +From: Anton Ivanov +Date: Tue, 5 Jan 2021 17:49:38 +0000 +Subject: [PATCH 11/16] ovn-northd: move NAT, Defrag and lb to a function. + +Signed-off-by: Anton Ivanov +Signed-off-by: Numan Siddique +Signed-off-by: Lorenzo Bianconi +--- + northd/ovn-northd.c | 4128 +++++++++++++++++++++---------------------- + 1 file changed, 2058 insertions(+), 2070 deletions(-) + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index f9b8d588b..f588d8c32 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -8923,2391 +8923,2380 @@ build_lrouter_force_snat_flows(struct hmap *lflows, struct ovn_datapath *od, + ds_destroy(&actions); + } + ++/* Logical router ingress Table 0: L2 Admission Control ++ * Generic admission control flows (without inport check). ++ */ + static void +-build_lrouter_flows(struct hmap *datapaths, +- struct hmap *lflows, struct shash *meter_groups, +- struct hmap *lbs) ++build_adm_ctrl_flows_for_lrouter( ++ struct ovn_datapath *od, struct hmap *lflows) + { +- /* This flow table structure is documented in ovn-northd(8), so please +- * update ovn-northd.8.xml if you change anything. */ +- +- struct ds match = DS_EMPTY_INITIALIZER; +- struct ds actions = DS_EMPTY_INITIALIZER; ++ if (od->nbr) { ++ /* Logical VLANs not supported. ++ * Broadcast/multicast source address is invalid. */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100, ++ "vlan.present || eth.src[40]", "drop;"); ++ } ++} + +- struct ovn_datapath *od; ++/* Logical router ingress Table 0: L2 Admission Control ++ * This table drops packets that the router shouldn’t see at all based ++ * on their Ethernet headers. ++ */ ++static void ++build_adm_ctrl_flows_for_lrouter_port( ++ struct ovn_port *op, struct hmap *lflows, ++ struct ds *match, struct ds *actions) ++{ ++ if (op->nbrp) { ++ if (!lrport_is_enabled(op->nbrp)) { ++ /* Drop packets from disabled logical ports (since logical flow ++ * tables are default-drop). */ ++ return; ++ } + +- /* NAT, Defrag and load balancing. */ +- HMAP_FOR_EACH (od, key_node, datapaths) { +- if (!od->nbr) { +- continue; ++ if (op->derived) { ++ /* No ingress packets should be received on a chassisredirect ++ * port. */ ++ return; + } + +- /* Packets are allowed by default. */ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;"); +- ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;"); +- ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;"); +- ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;"); +- ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;"); +- ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;"); +- ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;"); ++ /* Store the ethernet address of the port receiving the packet. ++ * This will save us from having to match on inport further down in ++ * the pipeline. ++ */ ++ ds_clear(actions); ++ ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;", ++ op->lrp_networks.ea_s); + +- /* Send the IPv6 NS packets to next table. When ovn-controller +- * generates IPv6 NS (for the action - nd_ns{}), the injected +- * packet would go through conntrack - which is not required. */ +- ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, "nd_ns", "next;"); ++ ds_clear(match); ++ ds_put_format(match, "eth.mcast && inport == %s", op->json_key); ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbrp->header_); + +- /* NAT rules are only valid on Gateway routers and routers with +- * l3dgw_port (router has a port with gateway chassis +- * specified). */ +- if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { +- continue; ++ ds_clear(match); ++ ds_put_format(match, "eth.dst == %s && inport == %s", ++ op->lrp_networks.ea_s, op->json_key); ++ if (op->od->l3dgw_port && op == op->od->l3dgw_port ++ && op->od->l3redirect_port) { ++ /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s ++ * should only be received on the gateway chassis. */ ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ op->od->l3redirect_port->json_key); + } ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbrp->header_); ++ } ++} + +- struct sset nat_entries = SSET_INITIALIZER(&nat_entries); + +- bool dnat_force_snat_ip = +- !lport_addresses_is_empty(&od->dnat_force_snat_addrs); +- bool lb_force_snat_ip = +- !lport_addresses_is_empty(&od->lb_force_snat_addrs); ++/* Logical router ingress Table 1 and 2: Neighbor lookup and learning ++ * lflows for logical routers. */ ++static void ++build_neigh_learning_flows_for_lrouter( ++ struct ovn_datapath *od, struct hmap *lflows, ++ struct ds *match, struct ds *actions) ++{ ++ if (od->nbr) { + +- for (int i = 0; i < od->nbr->n_nat; i++) { +- const struct nbrec_nat *nat; ++ /* Learn MAC bindings from ARP/IPv6 ND. ++ * ++ * For ARP packets, table LOOKUP_NEIGHBOR does a lookup for the ++ * (arp.spa, arp.sha) in the mac binding table using the 'lookup_arp' ++ * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_RESULT bit. ++ * If "always_learn_from_arp_request" is set to false, it will also ++ * lookup for the (arp.spa) in the mac binding table using the ++ * "lookup_arp_ip" action for ARP request packets, and stores the ++ * result in REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit; or set that bit ++ * to "1" directly for ARP response packets. ++ * ++ * For IPv6 ND NA packets, table LOOKUP_NEIGHBOR does a lookup ++ * for the (nd.target, nd.tll) in the mac binding table using the ++ * 'lookup_nd' action and stores the result in ++ * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. If ++ * "always_learn_from_arp_request" is set to false, ++ * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit is set. ++ * ++ * For IPv6 ND NS packets, table LOOKUP_NEIGHBOR does a lookup ++ * for the (ip6.src, nd.sll) in the mac binding table using the ++ * 'lookup_nd' action and stores the result in ++ * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. If ++ * "always_learn_from_arp_request" is set to false, it will also lookup ++ * for the (ip6.src) in the mac binding table using the "lookup_nd_ip" ++ * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_IP_RESULT ++ * bit. ++ * ++ * Table LEARN_NEIGHBOR learns the mac-binding using the action ++ * - 'put_arp/put_nd'. Learning mac-binding is skipped if ++ * REGBIT_LOOKUP_NEIGHBOR_RESULT bit is set or ++ * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT is not set. ++ * ++ * */ + +- nat = od->nbr->nat[i]; ++ /* Flows for LOOKUP_NEIGHBOR. */ ++ bool learn_from_arp_request = smap_get_bool(&od->nbr->options, ++ "always_learn_from_arp_request", true); ++ ds_clear(actions); ++ ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT ++ " = lookup_arp(inport, arp.spa, arp.sha); %snext;", ++ learn_from_arp_request ? "" : ++ REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; "); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, ++ "arp.op == 2", ds_cstr(actions)); + +- ovs_be32 ip, mask; +- struct in6_addr ipv6, mask_v6, v6_exact = IN6ADDR_EXACT_INIT; +- bool is_v6 = false; +- bool stateless = lrouter_nat_is_stateless(nat); +- struct nbrec_address_set *allowed_ext_ips = +- nat->allowed_ext_ips; +- struct nbrec_address_set *exempted_ext_ips = +- nat->exempted_ext_ips; ++ ds_clear(actions); ++ ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT ++ " = lookup_nd(inport, nd.target, nd.tll); %snext;", ++ learn_from_arp_request ? "" : ++ REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; "); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_na", ++ ds_cstr(actions)); + +- if (allowed_ext_ips && exempted_ext_ips) { +- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); +- VLOG_WARN_RL(&rl, "NAT rule: "UUID_FMT" not applied, since " +- "both allowed and exempt external ips set", +- UUID_ARGS(&(nat->header_.uuid))); +- continue; +- } ++ ds_clear(actions); ++ ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT ++ " = lookup_nd(inport, ip6.src, nd.sll); %snext;", ++ learn_from_arp_request ? "" : ++ REGBIT_LOOKUP_NEIGHBOR_IP_RESULT ++ " = lookup_nd_ip(inport, ip6.src); "); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_ns", ++ ds_cstr(actions)); + +- char *error = ip_parse_masked(nat->external_ip, &ip, &mask); +- if (error || mask != OVS_BE32_MAX) { +- free(error); +- error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6); +- if (error || memcmp(&mask_v6, &v6_exact, sizeof(mask_v6))) { +- /* Invalid for both IPv4 and IPv6 */ +- static struct vlog_rate_limit rl = +- VLOG_RATE_LIMIT_INIT(5, 1); +- VLOG_WARN_RL(&rl, "bad external ip %s for nat", +- nat->external_ip); +- free(error); +- continue; +- } +- /* It was an invalid IPv4 address, but valid IPv6. +- * Treat the rest of the handling of this NAT rule +- * as IPv6. */ +- is_v6 = true; +- } ++ /* For other packet types, we can skip neighbor learning. ++ * So set REGBIT_LOOKUP_NEIGHBOR_RESULT to 1. */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 0, "1", ++ REGBIT_LOOKUP_NEIGHBOR_RESULT" = 1; next;"); + +- /* Check the validity of nat->logical_ip. 'logical_ip' can +- * be a subnet when the type is "snat". */ +- int cidr_bits; +- if (is_v6) { +- error = ipv6_parse_masked(nat->logical_ip, &ipv6, &mask_v6); +- cidr_bits = ipv6_count_cidr_bits(&mask_v6); +- } else { +- error = ip_parse_masked(nat->logical_ip, &ip, &mask); +- cidr_bits = ip_count_cidr_bits(mask); +- } +- if (!strcmp(nat->type, "snat")) { +- if (error) { +- /* Invalid for both IPv4 and IPv6 */ +- static struct vlog_rate_limit rl = +- VLOG_RATE_LIMIT_INIT(5, 1); +- VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat " +- "in router "UUID_FMT"", +- nat->logical_ip, UUID_ARGS(&od->key)); +- free(error); +- continue; +- } +- } else { +- if (error || (!is_v6 && mask != OVS_BE32_MAX) +- || (is_v6 && memcmp(&mask_v6, &v6_exact, +- sizeof mask_v6))) { +- /* Invalid for both IPv4 and IPv6 */ +- static struct vlog_rate_limit rl = +- VLOG_RATE_LIMIT_INIT(5, 1); +- VLOG_WARN_RL(&rl, "bad ip %s for dnat in router " +- ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key)); +- free(error); +- continue; +- } +- } ++ /* Flows for LEARN_NEIGHBOR. */ ++ /* Skip Neighbor learning if not required. */ ++ ds_clear(match); ++ ds_put_format(match, REGBIT_LOOKUP_NEIGHBOR_RESULT" == 1%s", ++ learn_from_arp_request ? "" : ++ " || "REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" == 0"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 100, ++ ds_cstr(match), "next;"); + +- /* For distributed router NAT, determine whether this NAT rule +- * satisfies the conditions for distributed NAT processing. */ +- bool distributed = false; +- struct eth_addr mac; +- if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") && +- nat->logical_port && nat->external_mac) { +- if (eth_addr_from_string(nat->external_mac, &mac)) { +- distributed = true; +- } else { +- static struct vlog_rate_limit rl = +- VLOG_RATE_LIMIT_INIT(5, 1); +- VLOG_WARN_RL(&rl, "bad mac %s for dnat in router " +- ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key)); +- continue; +- } +- } ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90, ++ "arp", "put_arp(inport, arp.spa, arp.sha); next;"); + +- /* Ingress UNSNAT table: It is for already established connections' +- * reverse traffic. i.e., SNAT has already been done in egress +- * pipeline and now the packet has entered the ingress pipeline as +- * part of a reply. We undo the SNAT here. +- * +- * Undoing SNAT has to happen before DNAT processing. This is +- * because when the packet was DNATed in ingress pipeline, it did +- * not know about the possibility of eventual additional SNAT in +- * egress pipeline. */ +- if (!strcmp(nat->type, "snat") +- || !strcmp(nat->type, "dnat_and_snat")) { +- if (!od->l3dgw_port) { +- /* Gateway router. */ +- ds_clear(&match); +- ds_clear(&actions); +- ds_put_format(&match, "ip && ip%s.dst == %s", +- is_v6 ? "6" : "4", +- nat->external_ip); +- if (!strcmp(nat->type, "dnat_and_snat") && stateless) { +- ds_put_format(&actions, "ip%s.dst=%s; next;", +- is_v6 ? "6" : "4", nat->logical_ip); +- } else { +- ds_put_cstr(&actions, "ct_snat;"); +- } ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90, ++ "nd_na", "put_nd(inport, nd.target, nd.tll); next;"); + +- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, +- 90, ds_cstr(&match), +- ds_cstr(&actions), +- &nat->header_); +- } else { +- /* Distributed router. */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90, ++ "nd_ns", "put_nd(inport, ip6.src, nd.sll); next;"); ++ } + +- /* Traffic received on l3dgw_port is subject to NAT. */ +- ds_clear(&match); +- ds_clear(&actions); +- ds_put_format(&match, "ip && ip%s.dst == %s" +- " && inport == %s", +- is_v6 ? "6" : "4", +- nat->external_ip, +- od->l3dgw_port->json_key); +- if (!distributed && od->l3redirect_port) { +- /* Flows for NAT rules that are centralized are only +- * programmed on the gateway chassis. */ +- ds_put_format(&match, " && is_chassis_resident(%s)", +- od->l3redirect_port->json_key); +- } ++} + +- if (!strcmp(nat->type, "dnat_and_snat") && stateless) { +- ds_put_format(&actions, "ip%s.dst=%s; next;", +- is_v6 ? "6" : "4", nat->logical_ip); +- } else { +- ds_put_cstr(&actions, "ct_snat;"); +- } ++/* Logical router ingress Table 1: Neighbor lookup lflows ++ * for logical router ports. */ ++static void ++build_neigh_learning_flows_for_lrouter_port( ++ struct ovn_port *op, struct hmap *lflows, ++ struct ds *match, struct ds *actions) ++{ ++ if (op->nbrp) { + +- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, +- 100, +- ds_cstr(&match), ds_cstr(&actions), +- &nat->header_); ++ bool learn_from_arp_request = smap_get_bool(&op->od->nbr->options, ++ "always_learn_from_arp_request", true); ++ ++ /* Check if we need to learn mac-binding from ARP requests. */ ++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { ++ if (!learn_from_arp_request) { ++ /* ARP request to this address should always get learned, ++ * so add a priority-110 flow to set ++ * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT to 1. */ ++ ds_clear(match); ++ ds_put_format(match, ++ "inport == %s && arp.spa == %s/%u && " ++ "arp.tpa == %s && arp.op == 1", ++ op->json_key, ++ op->lrp_networks.ipv4_addrs[i].network_s, ++ op->lrp_networks.ipv4_addrs[i].plen, ++ op->lrp_networks.ipv4_addrs[i].addr_s); ++ if (op->od->l3dgw_port && op == op->od->l3dgw_port ++ && op->od->l3redirect_port) { ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ op->od->l3redirect_port->json_key); + } ++ const char *actions_s = REGBIT_LOOKUP_NEIGHBOR_RESULT ++ " = lookup_arp(inport, arp.spa, arp.sha); " ++ REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1;" ++ " next;"; ++ ovn_lflow_add_with_hint(lflows, op->od, ++ S_ROUTER_IN_LOOKUP_NEIGHBOR, 110, ++ ds_cstr(match), actions_s, ++ &op->nbrp->header_); ++ } ++ ds_clear(match); ++ ds_put_format(match, ++ "inport == %s && arp.spa == %s/%u && arp.op == 1", ++ op->json_key, ++ op->lrp_networks.ipv4_addrs[i].network_s, ++ op->lrp_networks.ipv4_addrs[i].plen); ++ if (op->od->l3dgw_port && op == op->od->l3dgw_port ++ && op->od->l3redirect_port) { ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ op->od->l3redirect_port->json_key); + } ++ ds_clear(actions); ++ ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT ++ " = lookup_arp(inport, arp.spa, arp.sha); %snext;", ++ learn_from_arp_request ? "" : ++ REGBIT_LOOKUP_NEIGHBOR_IP_RESULT ++ " = lookup_arp_ip(inport, arp.spa); "); ++ ovn_lflow_add_with_hint(lflows, op->od, ++ S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbrp->header_); ++ } ++ } ++} + +- /* Ingress DNAT table: Packets enter the pipeline with destination +- * IP address that needs to be DNATted from a external IP address +- * to a logical IP address. */ +- if (!strcmp(nat->type, "dnat") +- || !strcmp(nat->type, "dnat_and_snat")) { +- if (!od->l3dgw_port) { +- /* Gateway router. */ +- /* Packet when it goes from the initiator to destination. +- * We need to set flags.loopback because the router can +- * send the packet back through the same interface. */ +- ds_clear(&match); +- ds_put_format(&match, "ip && ip%s.dst == %s", +- is_v6 ? "6" : "4", +- nat->external_ip); +- ds_clear(&actions); +- if (allowed_ext_ips || exempted_ext_ips) { +- lrouter_nat_add_ext_ip_match(od, lflows, &match, nat, +- is_v6, true, mask); +- } ++/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: IPv6 Router ++ * Adv (RA) options and response. */ ++static void ++build_ND_RA_flows_for_lrouter_port( ++ struct ovn_port *op, struct hmap *lflows, ++ struct ds *match, struct ds *actions) ++{ ++ if (!op->nbrp || op->nbrp->peer || !op->peer) { ++ return; ++ } + +- if (dnat_force_snat_ip) { +- /* Indicate to the future tables that a DNAT has taken +- * place and a force SNAT needs to be done in the +- * Egress SNAT table. */ +- ds_put_format(&actions, +- "flags.force_snat_for_dnat = 1; "); +- } ++ if (!op->lrp_networks.n_ipv6_addrs) { ++ return; ++ } + +- if (!strcmp(nat->type, "dnat_and_snat") && stateless) { +- ds_put_format(&actions, "flags.loopback = 1; " +- "ip%s.dst=%s; next;", +- is_v6 ? "6" : "4", nat->logical_ip); +- } else { +- ds_put_format(&actions, "flags.loopback = 1; " +- "ct_dnat(%s", nat->logical_ip); ++ struct smap options; ++ smap_clone(&options, &op->sb->options); + +- if (nat->external_port_range[0]) { +- ds_put_format(&actions, ",%s", +- nat->external_port_range); +- } +- ds_put_format(&actions, ");"); +- } ++ /* enable IPv6 prefix delegation */ ++ bool prefix_delegation = smap_get_bool(&op->nbrp->options, ++ "prefix_delegation", false); ++ if (!lrport_is_enabled(op->nbrp)) { ++ prefix_delegation = false; ++ } ++ smap_add(&options, "ipv6_prefix_delegation", ++ prefix_delegation ? "true" : "false"); + +- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100, +- ds_cstr(&match), ds_cstr(&actions), +- &nat->header_); +- } else { +- /* Distributed router. */ ++ bool ipv6_prefix = smap_get_bool(&op->nbrp->options, ++ "prefix", false); ++ if (!lrport_is_enabled(op->nbrp)) { ++ ipv6_prefix = false; ++ } ++ smap_add(&options, "ipv6_prefix", ++ ipv6_prefix ? "true" : "false"); ++ sbrec_port_binding_set_options(op->sb, &options); + +- /* Traffic received on l3dgw_port is subject to NAT. */ +- ds_clear(&match); +- ds_put_format(&match, "ip && ip%s.dst == %s" +- " && inport == %s", +- is_v6 ? "6" : "4", +- nat->external_ip, +- od->l3dgw_port->json_key); +- if (!distributed && od->l3redirect_port) { +- /* Flows for NAT rules that are centralized are only +- * programmed on the gateway chassis. */ +- ds_put_format(&match, " && is_chassis_resident(%s)", +- od->l3redirect_port->json_key); +- } +- ds_clear(&actions); +- if (allowed_ext_ips || exempted_ext_ips) { +- lrouter_nat_add_ext_ip_match(od, lflows, &match, nat, +- is_v6, true, mask); +- } ++ smap_destroy(&options); + +- if (!strcmp(nat->type, "dnat_and_snat") && stateless) { +- ds_put_format(&actions, "ip%s.dst=%s; next;", +- is_v6 ? "6" : "4", nat->logical_ip); +- } else { +- ds_put_format(&actions, "ct_dnat(%s", nat->logical_ip); +- if (nat->external_port_range[0]) { +- ds_put_format(&actions, ",%s", +- nat->external_port_range); +- } +- ds_put_format(&actions, ");"); +- } ++ const char *address_mode = smap_get( ++ &op->nbrp->ipv6_ra_configs, "address_mode"); + +- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100, +- ds_cstr(&match), ds_cstr(&actions), +- &nat->header_); +- } +- } ++ if (!address_mode) { ++ return; ++ } ++ if (strcmp(address_mode, "slaac") && ++ strcmp(address_mode, "dhcpv6_stateful") && ++ strcmp(address_mode, "dhcpv6_stateless")) { ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); ++ VLOG_WARN_RL(&rl, "Invalid address mode [%s] defined", ++ address_mode); ++ return; ++ } + +- /* ARP resolve for NAT IPs. */ +- if (od->l3dgw_port) { +- if (!strcmp(nat->type, "snat")) { +- ds_clear(&match); +- ds_put_format( +- &match, "inport == %s && %s == %s", +- od->l3dgw_port->json_key, +- is_v6 ? "ip6.src" : "ip4.src", nat->external_ip); +- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT, +- 120, ds_cstr(&match), "next;", +- &nat->header_); +- } ++ if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic", ++ false)) { ++ copy_ra_to_sb(op, address_mode); ++ } + +- if (!sset_contains(&nat_entries, nat->external_ip)) { +- ds_clear(&match); +- ds_put_format( +- &match, "outport == %s && %s == %s", +- od->l3dgw_port->json_key, +- is_v6 ? REG_NEXT_HOP_IPV6 : REG_NEXT_HOP_IPV4, +- nat->external_ip); +- ds_clear(&actions); +- ds_put_format( +- &actions, "eth.dst = %s; next;", +- distributed ? nat->external_mac : +- od->l3dgw_port->lrp_networks.ea_s); +- ovn_lflow_add_with_hint(lflows, od, +- S_ROUTER_IN_ARP_RESOLVE, +- 100, ds_cstr(&match), +- ds_cstr(&actions), +- &nat->header_); +- sset_add(&nat_entries, nat->external_ip); +- } +- } else { +- /* Add the NAT external_ip to the nat_entries even for +- * gateway routers. This is required for adding load balancer +- * flows.*/ +- sset_add(&nat_entries, nat->external_ip); +- } ++ ds_clear(match); ++ ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && nd_rs", ++ op->json_key); ++ ds_clear(actions); + +- /* Egress UNDNAT table: It is for already established connections' +- * reverse traffic. i.e., DNAT has already been done in ingress +- * pipeline and now the packet has entered the egress pipeline as +- * part of a reply. We undo the DNAT here. +- * +- * Note that this only applies for NAT on a distributed router. +- * Undo DNAT on a gateway router is done in the ingress DNAT +- * pipeline stage. */ +- if (od->l3dgw_port && (!strcmp(nat->type, "dnat") +- || !strcmp(nat->type, "dnat_and_snat"))) { +- ds_clear(&match); +- ds_put_format(&match, "ip && ip%s.src == %s" +- " && outport == %s", +- is_v6 ? "6" : "4", +- nat->logical_ip, +- od->l3dgw_port->json_key); +- if (!distributed && od->l3redirect_port) { +- /* Flows for NAT rules that are centralized are only +- * programmed on the gateway chassis. */ +- ds_put_format(&match, " && is_chassis_resident(%s)", +- od->l3redirect_port->json_key); +- } +- ds_clear(&actions); +- if (distributed) { +- ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ", +- ETH_ADDR_ARGS(mac)); +- } ++ const char *mtu_s = smap_get( ++ &op->nbrp->ipv6_ra_configs, "mtu"); + +- if (!strcmp(nat->type, "dnat_and_snat") && stateless) { +- ds_put_format(&actions, "ip%s.src=%s; next;", +- is_v6 ? "6" : "4", nat->external_ip); +- } else { +- ds_put_format(&actions, "ct_dnat;"); +- } ++ /* As per RFC 2460, 1280 is minimum IPv6 MTU. */ ++ uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0; + +- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 100, +- ds_cstr(&match), ds_cstr(&actions), +- &nat->header_); +- } ++ ds_put_format(actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts(" ++ "addr_mode = \"%s\", slla = %s", ++ address_mode, op->lrp_networks.ea_s); ++ if (mtu > 0) { ++ ds_put_format(actions, ", mtu = %u", mtu); ++ } + +- /* Egress SNAT table: Packets enter the egress pipeline with +- * source ip address that needs to be SNATted to a external ip +- * address. */ +- if (!strcmp(nat->type, "snat") +- || !strcmp(nat->type, "dnat_and_snat")) { +- if (!od->l3dgw_port) { +- /* Gateway router. */ +- ds_clear(&match); +- ds_put_format(&match, "ip && ip%s.src == %s", +- is_v6 ? "6" : "4", +- nat->logical_ip); +- ds_clear(&actions); ++ const char *prf = smap_get_def( ++ &op->nbrp->ipv6_ra_configs, "router_preference", "MEDIUM"); ++ if (strcmp(prf, "MEDIUM")) { ++ ds_put_format(actions, ", router_preference = \"%s\"", prf); ++ } + +- if (allowed_ext_ips || exempted_ext_ips) { +- lrouter_nat_add_ext_ip_match(od, lflows, &match, nat, +- is_v6, false, mask); +- } ++ bool add_rs_response_flow = false; + +- if (!strcmp(nat->type, "dnat_and_snat") && stateless) { +- ds_put_format(&actions, "ip%s.src=%s; next;", +- is_v6 ? "6" : "4", nat->external_ip); +- } else { +- ds_put_format(&actions, "ct_snat(%s", +- nat->external_ip); ++ for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { ++ if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) { ++ continue; ++ } + +- if (nat->external_port_range[0]) { +- ds_put_format(&actions, ",%s", +- nat->external_port_range); +- } +- ds_put_format(&actions, ");"); +- } ++ ds_put_format(actions, ", prefix = %s/%u", ++ op->lrp_networks.ipv6_addrs[i].network_s, ++ op->lrp_networks.ipv6_addrs[i].plen); + +- /* The priority here is calculated such that the +- * nat->logical_ip with the longest mask gets a higher +- * priority. */ +- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT, +- cidr_bits + 1, +- ds_cstr(&match), ds_cstr(&actions), +- &nat->header_); +- } else { +- uint16_t priority = cidr_bits + 1; ++ add_rs_response_flow = true; ++ } + +- /* Distributed router. */ +- ds_clear(&match); +- ds_put_format(&match, "ip && ip%s.src == %s" +- " && outport == %s", +- is_v6 ? "6" : "4", +- nat->logical_ip, +- od->l3dgw_port->json_key); +- if (!distributed && od->l3redirect_port) { +- /* Flows for NAT rules that are centralized are only +- * programmed on the gateway chassis. */ +- priority += 128; +- ds_put_format(&match, " && is_chassis_resident(%s)", +- od->l3redirect_port->json_key); +- } +- ds_clear(&actions); ++ if (add_rs_response_flow) { ++ ds_put_cstr(actions, "); next;"); ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS, ++ 50, ds_cstr(match), ds_cstr(actions), ++ &op->nbrp->header_); ++ ds_clear(actions); ++ ds_clear(match); ++ ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && " ++ "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key); + +- if (allowed_ext_ips || exempted_ext_ips) { +- lrouter_nat_add_ext_ip_match(od, lflows, &match, nat, +- is_v6, false, mask); +- } ++ char ip6_str[INET6_ADDRSTRLEN + 1]; ++ struct in6_addr lla; ++ in6_generate_lla(op->lrp_networks.ea, &lla); ++ memset(ip6_str, 0, sizeof(ip6_str)); ++ ipv6_string_mapped(ip6_str, &lla); ++ ds_put_format(actions, "eth.dst = eth.src; eth.src = %s; " ++ "ip6.dst = ip6.src; ip6.src = %s; " ++ "outport = inport; flags.loopback = 1; " ++ "output;", ++ op->lrp_networks.ea_s, ip6_str); ++ ovn_lflow_add_with_hint(lflows, op->od, ++ S_ROUTER_IN_ND_RA_RESPONSE, 50, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbrp->header_); ++ } ++} + +- if (distributed) { +- ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ", +- ETH_ADDR_ARGS(mac)); +- } ++/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: RS ++ * responder, by default goto next. (priority 0). */ ++static void ++build_ND_RA_flows_for_lrouter(struct ovn_datapath *od, struct hmap *lflows) ++{ ++ if (od->nbr) { ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;"); ++ } ++} + +- if (!strcmp(nat->type, "dnat_and_snat") && stateless) { +- ds_put_format(&actions, "ip%s.src=%s; next;", +- is_v6 ? "6" : "4", nat->external_ip); +- } else { +- ds_put_format(&actions, "ct_snat(%s", +- nat->external_ip); +- if (nat->external_port_range[0]) { +- ds_put_format(&actions, ",%s", +- nat->external_port_range); +- } +- ds_put_format(&actions, ");"); +- } ++/* Logical router ingress table IP_ROUTING : IP Routing. ++ * ++ * A packet that arrives at this table is an IP packet that should be ++ * routed to the address in 'ip[46].dst'. ++ * ++ * For regular routes without ECMP, table IP_ROUTING sets outport to the ++ * correct output port, eth.src to the output port's MAC address, and ++ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address ++ * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and ++ * advances to the next table. ++ * ++ * For ECMP routes, i.e. multiple routes with same policy and prefix, table ++ * IP_ROUTING remembers ECMP group id and selects a member id, and advances ++ * to table IP_ROUTING_ECMP, which sets outport, eth.src and ++ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 for the selected ECMP member. ++ */ ++static void ++build_ip_routing_flows_for_lrouter_port( ++ struct ovn_port *op, struct hmap *lflows) ++{ ++ if (op->nbrp) { + +- /* The priority here is calculated such that the +- * nat->logical_ip with the longest mask gets a higher +- * priority. */ +- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT, +- priority, ds_cstr(&match), +- ds_cstr(&actions), +- &nat->header_); +- } +- } ++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { ++ add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s, ++ op->lrp_networks.ipv4_addrs[i].network_s, ++ op->lrp_networks.ipv4_addrs[i].plen, NULL, false, ++ &op->nbrp->header_); ++ } + +- /* Logical router ingress table 0: +- * For NAT on a distributed router, add rules allowing +- * ingress traffic with eth.dst matching nat->external_mac +- * on the l3dgw_port instance where nat->logical_port is +- * resident. */ +- if (distributed) { +- /* Store the ethernet address of the port receiving the packet. +- * This will save us from having to match on inport further +- * down in the pipeline. +- */ +- ds_clear(&actions); +- ds_put_format(&actions, REG_INPORT_ETH_ADDR " = %s; next;", +- od->l3dgw_port->lrp_networks.ea_s); ++ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { ++ add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s, ++ op->lrp_networks.ipv6_addrs[i].network_s, ++ op->lrp_networks.ipv6_addrs[i].plen, NULL, false, ++ &op->nbrp->header_); ++ } ++ } ++} + +- ds_clear(&match); +- ds_put_format(&match, +- "eth.dst == "ETH_ADDR_FMT" && inport == %s" +- " && is_chassis_resident(\"%s\")", +- ETH_ADDR_ARGS(mac), +- od->l3dgw_port->json_key, +- nat->logical_port); +- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ADMISSION, 50, +- ds_cstr(&match), ds_cstr(&actions), +- &nat->header_); +- } ++static void ++build_static_route_flows_for_lrouter( ++ struct ovn_datapath *od, struct hmap *lflows, ++ struct hmap *ports) ++{ ++ if (od->nbr) { ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, 150, ++ REG_ECMP_GROUP_ID" == 0", "next;"); + +- /* Ingress Gateway Redirect Table: For NAT on a distributed +- * router, add flows that are specific to a NAT rule. These +- * flows indicate the presence of an applicable NAT rule that +- * can be applied in a distributed manner. +- * In particulr REG_SRC_IPV4/REG_SRC_IPV6 and eth.src are set to +- * NAT external IP and NAT external mac so the ARP request +- * generated in the following stage is sent out with proper IP/MAC +- * src addresses. +- */ +- if (distributed) { +- ds_clear(&match); +- ds_clear(&actions); +- ds_put_format(&match, +- "ip%s.src == %s && outport == %s && " +- "is_chassis_resident(\"%s\")", +- is_v6 ? "6" : "4", nat->logical_ip, +- od->l3dgw_port->json_key, nat->logical_port); +- ds_put_format(&actions, "eth.src = %s; %s = %s; next;", +- nat->external_mac, +- is_v6 ? REG_SRC_IPV6 : REG_SRC_IPV4, +- nat->external_ip); +- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, +- 100, ds_cstr(&match), +- ds_cstr(&actions), &nat->header_); ++ struct hmap ecmp_groups = HMAP_INITIALIZER(&ecmp_groups); ++ struct hmap unique_routes = HMAP_INITIALIZER(&unique_routes); ++ struct ovs_list parsed_routes = OVS_LIST_INITIALIZER(&parsed_routes); ++ struct ecmp_groups_node *group; ++ for (int i = 0; i < od->nbr->n_static_routes; i++) { ++ struct parsed_route *route = ++ parsed_routes_add(&parsed_routes, od->nbr->static_routes[i]); ++ if (!route) { ++ continue; + } +- +- /* Egress Loopback table: For NAT on a distributed router. +- * If packets in the egress pipeline on the distributed +- * gateway port have ip.dst matching a NAT external IP, then +- * loop a clone of the packet back to the beginning of the +- * ingress pipeline with inport = outport. */ +- if (od->l3dgw_port) { +- /* Distributed router. */ +- ds_clear(&match); +- ds_put_format(&match, "ip%s.dst == %s && outport == %s", +- is_v6 ? "6" : "4", +- nat->external_ip, +- od->l3dgw_port->json_key); +- if (!distributed) { +- ds_put_format(&match, " && is_chassis_resident(%s)", +- od->l3redirect_port->json_key); +- } else { +- ds_put_format(&match, " && is_chassis_resident(\"%s\")", +- nat->logical_port); +- } +- +- ds_clear(&actions); +- ds_put_format(&actions, +- "clone { ct_clear; " +- "inport = outport; outport = \"\"; " +- "flags = 0; flags.loopback = 1; "); +- for (int j = 0; j < MFF_N_LOG_REGS; j++) { +- ds_put_format(&actions, "reg%d = 0; ", j); ++ group = ecmp_groups_find(&ecmp_groups, route); ++ if (group) { ++ ecmp_groups_add_route(group, route); ++ } else { ++ const struct parsed_route *existed_route = ++ unique_routes_remove(&unique_routes, route); ++ if (existed_route) { ++ group = ecmp_groups_add(&ecmp_groups, existed_route); ++ if (group) { ++ ecmp_groups_add_route(group, route); ++ } ++ } else { ++ unique_routes_add(&unique_routes, route); + } +- ds_put_format(&actions, REGBIT_EGRESS_LOOPBACK" = 1; " +- "next(pipeline=ingress, table=%d); };", +- ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); +- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100, +- ds_cstr(&match), ds_cstr(&actions), +- &nat->header_); + } + } +- +- /* Handle force SNAT options set in the gateway router. */ +- if (!od->l3dgw_port) { +- if (dnat_force_snat_ip) { +- if (od->dnat_force_snat_addrs.n_ipv4_addrs) { +- build_lrouter_force_snat_flows(lflows, od, "4", +- od->dnat_force_snat_addrs.ipv4_addrs[0].addr_s, +- "dnat"); +- } +- if (od->dnat_force_snat_addrs.n_ipv6_addrs) { +- build_lrouter_force_snat_flows(lflows, od, "6", +- od->dnat_force_snat_addrs.ipv6_addrs[0].addr_s, +- "dnat"); +- } +- } +- if (lb_force_snat_ip) { +- if (od->lb_force_snat_addrs.n_ipv4_addrs) { +- build_lrouter_force_snat_flows(lflows, od, "4", +- od->lb_force_snat_addrs.ipv4_addrs[0].addr_s, "lb"); +- } +- if (od->lb_force_snat_addrs.n_ipv6_addrs) { +- build_lrouter_force_snat_flows(lflows, od, "6", +- od->lb_force_snat_addrs.ipv6_addrs[0].addr_s, "lb"); +- } +- } +- +- /* For gateway router, re-circulate every packet through +- * the DNAT zone. This helps with the following. +- * +- * Any packet that needs to be unDNATed in the reverse +- * direction gets unDNATed. Ideally this could be done in +- * the egress pipeline. But since the gateway router +- * does not have any feature that depends on the source +- * ip address being external IP address for IP routing, +- * we can do it here, saving a future re-circulation. */ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50, +- "ip", "flags.loopback = 1; ct_dnat;"); ++ HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) { ++ /* add a flow in IP_ROUTING, and one flow for each member in ++ * IP_ROUTING_ECMP. */ ++ build_ecmp_route_flow(lflows, od, ports, group); + } +- +- /* Load balancing and packet defrag are only valid on +- * Gateway routers or router with gateway port. */ +- if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { +- sset_destroy(&nat_entries); +- continue; ++ const struct unique_routes_node *ur; ++ HMAP_FOR_EACH (ur, hmap_node, &unique_routes) { ++ build_static_route_flow(lflows, od, ports, ur->route); + } ++ ecmp_groups_destroy(&ecmp_groups); ++ unique_routes_destroy(&unique_routes); ++ parsed_routes_destroy(&parsed_routes); ++ } ++} + +- /* A set to hold all ips that need defragmentation and tracking. */ +- struct sset all_ips = SSET_INITIALIZER(&all_ips); ++/* IP Multicast lookup. Here we set the output port, adjust TTL and ++ * advance to next table (priority 500). ++ */ ++static void ++build_mcast_lookup_flows_for_lrouter( ++ struct ovn_datapath *od, struct hmap *lflows, ++ struct ds *match, struct ds *actions) ++{ ++ if (od->nbr) { + +- for (int i = 0; i < od->nbr->n_load_balancer; i++) { +- struct nbrec_load_balancer *nb_lb = od->nbr->load_balancer[i]; +- struct ovn_northd_lb *lb = +- ovn_northd_lb_find(lbs, &nb_lb->header_.uuid); +- ovs_assert(lb); ++ /* Drop IPv6 multicast traffic that shouldn't be forwarded, ++ * i.e., router solicitation and router advertisement. ++ */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550, ++ "nd_rs || nd_ra", "drop;"); ++ if (!od->mcast_info.rtr.relay) { ++ return; ++ } + +- for (size_t j = 0; j < lb->n_vips; j++) { +- struct ovn_lb_vip *lb_vip = &lb->vips[j]; +- struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[j]; +- ds_clear(&actions); +- build_lb_vip_actions(lb_vip, lb_vip_nb, &actions, +- lb->selection_fields, false); ++ struct ovn_igmp_group *igmp_group; + +- if (!sset_contains(&all_ips, lb_vip->vip_str)) { +- sset_add(&all_ips, lb_vip->vip_str); +- /* If there are any load balancing rules, we should send +- * the packet to conntrack for defragmentation and +- * tracking. This helps with two things. +- * +- * 1. With tracking, we can send only new connections to +- * pick a DNAT ip address from a group. +- * 2. If there are L4 ports in load balancing rules, we +- * need the defragmentation to match on L4 ports. */ +- ds_clear(&match); +- if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { +- ds_put_format(&match, "ip && ip4.dst == %s", +- lb_vip->vip_str); +- } else { +- ds_put_format(&match, "ip && ip6.dst == %s", +- lb_vip->vip_str); +- } +- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, +- 100, ds_cstr(&match), "ct_next;", +- &nb_lb->header_); +- } ++ LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) { ++ ds_clear(match); ++ ds_clear(actions); ++ if (IN6_IS_ADDR_V4MAPPED(&igmp_group->address)) { ++ ds_put_format(match, "ip4 && ip4.dst == %s ", ++ igmp_group->mcgroup.name); ++ } else { ++ ds_put_format(match, "ip6 && ip6.dst == %s ", ++ igmp_group->mcgroup.name); ++ } ++ if (od->mcast_info.rtr.flood_static) { ++ ds_put_cstr(actions, ++ "clone { " ++ "outport = \""MC_STATIC"\"; " ++ "ip.ttl--; " ++ "next; " ++ "};"); ++ } ++ ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;", ++ igmp_group->mcgroup.name); ++ ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 500, ++ ds_cstr(match), ds_cstr(actions)); ++ } + +- /* Higher priority rules are added for load-balancing in DNAT +- * table. For every match (on a VIP[:port]), we add two flows +- * via add_router_lb_flow(). One flow is for specific matching +- * on ct.new with an action of "ct_lb($targets);". The other +- * flow is for ct.est with an action of "ct_dnat;". */ +- ds_clear(&match); +- if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { +- ds_put_format(&match, "ip && ip4.dst == %s", +- lb_vip->vip_str); +- } else { +- ds_put_format(&match, "ip && ip6.dst == %s", +- lb_vip->vip_str); +- } ++ /* If needed, flood unregistered multicast on statically configured ++ * ports. Otherwise drop any multicast traffic. ++ */ ++ if (od->mcast_info.rtr.flood_static) { ++ ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 450, ++ "ip4.mcast || ip6.mcast", ++ "clone { " ++ "outport = \""MC_STATIC"\"; " ++ "ip.ttl--; " ++ "next; " ++ "};"); ++ } else { ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450, ++ "ip4.mcast || ip6.mcast", "drop;"); ++ } ++ } ++} + +- int prio = 110; +- bool is_udp = nullable_string_is_equal(nb_lb->protocol, "udp"); +- bool is_sctp = nullable_string_is_equal(nb_lb->protocol, +- "sctp"); +- const char *proto = is_udp ? "udp" : is_sctp ? "sctp" : "tcp"; ++/* Logical router ingress table POLICY: Policy. ++ * ++ * A packet that arrives at this table is an IP packet that should be ++ * permitted/denied/rerouted to the address in the rule's nexthop. ++ * This table sets outport to the correct out_port, ++ * eth.src to the output port's MAC address, ++ * and REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address ++ * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and ++ * advances to the next table for ARP/ND resolution. */ ++static void ++build_ingress_policy_flows_for_lrouter( ++ struct ovn_datapath *od, struct hmap *lflows, ++ struct hmap *ports) ++{ ++ if (od->nbr) { ++ /* This is a catch-all rule. It has the lowest priority (0) ++ * does a match-all("1") and pass-through (next) */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1", ++ REG_ECMP_GROUP_ID" = 0; next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY_ECMP, 150, ++ REG_ECMP_GROUP_ID" == 0", "next;"); + +- if (lb_vip->vip_port) { +- ds_put_format(&match, " && %s && %s.dst == %d", proto, +- proto, lb_vip->vip_port); +- prio = 120; +- } ++ /* Convert routing policies to flows. */ ++ uint16_t ecmp_group_id = 1; ++ for (int i = 0; i < od->nbr->n_policies; i++) { ++ const struct nbrec_logical_router_policy *rule ++ = od->nbr->policies[i]; ++ bool is_ecmp_reroute = ++ (!strcmp(rule->action, "reroute") && rule->n_nexthops > 1); + +- if (od->l3redirect_port && +- (lb_vip->n_backends || !lb_vip->empty_backend_rej)) { +- ds_put_format(&match, " && is_chassis_resident(%s)", +- od->l3redirect_port->json_key); +- } +- add_router_lb_flow(lflows, od, &match, &actions, prio, +- lb_force_snat_ip, lb_vip, proto, +- nb_lb, meter_groups, &nat_entries); ++ if (is_ecmp_reroute) { ++ build_ecmp_routing_policy_flows(lflows, od, ports, rule, ++ ecmp_group_id); ++ ecmp_group_id++; ++ } else { ++ build_routing_policy_flow(lflows, od, ports, rule, ++ &rule->header_); + } + } +- sset_destroy(&all_ips); +- sset_destroy(&nat_entries); + } +- +- ds_destroy(&match); +- ds_destroy(&actions); + } + +-/* Logical router ingress Table 0: L2 Admission Control +- * Generic admission control flows (without inport check). +- */ ++/* Local router ingress table ARP_RESOLVE: ARP Resolution. */ + static void +-build_adm_ctrl_flows_for_lrouter( ++build_arp_resolve_flows_for_lrouter( + struct ovn_datapath *od, struct hmap *lflows) + { + if (od->nbr) { +- /* Logical VLANs not supported. +- * Broadcast/multicast source address is invalid. */ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100, +- "vlan.present || eth.src[40]", "drop;"); ++ /* Multicast packets already have the outport set so just advance to ++ * next table (priority 500). */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 500, ++ "ip4.mcast || ip6.mcast", "next;"); ++ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4", ++ "get_arp(outport, " REG_NEXT_HOP_IPV4 "); next;"); ++ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6", ++ "get_nd(outport, " REG_NEXT_HOP_IPV6 "); next;"); + } + } + +-/* Logical router ingress Table 0: L2 Admission Control +- * This table drops packets that the router shouldn’t see at all based +- * on their Ethernet headers. +- */ +-static void +-build_adm_ctrl_flows_for_lrouter_port( ++/* Local router ingress table ARP_RESOLVE: ARP Resolution. ++ * ++ * Any unicast packet that reaches this table is an IP packet whose ++ * next-hop IP address is in REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 ++ * (ip4.dst/ipv6.dst is the final destination). ++ * This table resolves the IP address in ++ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 into an output port in outport and ++ * an Ethernet address in eth.dst. ++ */ ++static void ++build_arp_resolve_flows_for_lrouter_port( + struct ovn_port *op, struct hmap *lflows, ++ struct hmap *ports, + struct ds *match, struct ds *actions) + { +- if (op->nbrp) { +- if (!lrport_is_enabled(op->nbrp)) { +- /* Drop packets from disabled logical ports (since logical flow +- * tables are default-drop). */ +- return; +- } ++ if (op->nbsp && !lsp_is_enabled(op->nbsp)) { ++ return; ++ } + +- if (op->derived) { +- /* No ingress packets should be received on a chassisredirect +- * port. */ +- return; +- } ++ if (op->nbrp) { ++ /* This is a logical router port. If next-hop IP address in ++ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 matches IP address of this ++ * router port, then the packet is intended to eventually be sent ++ * to this logical port. Set the destination mac address using ++ * this port's mac address. ++ * ++ * The packet is still in peer's logical pipeline. So the match ++ * should be on peer's outport. */ ++ if (op->peer && op->nbrp->peer) { ++ if (op->lrp_networks.n_ipv4_addrs) { ++ ds_clear(match); ++ ds_put_format(match, "outport == %s && " ++ REG_NEXT_HOP_IPV4 "== ", ++ op->peer->json_key); ++ op_put_v4_networks(match, op, false); + +- /* Store the ethernet address of the port receiving the packet. +- * This will save us from having to match on inport further down in +- * the pipeline. +- */ +- ds_clear(actions); +- ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;", +- op->lrp_networks.ea_s); ++ ds_clear(actions); ++ ds_put_format(actions, "eth.dst = %s; next;", ++ op->lrp_networks.ea_s); ++ ovn_lflow_add_with_hint(lflows, op->peer->od, ++ S_ROUTER_IN_ARP_RESOLVE, 100, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbrp->header_); ++ } + +- ds_clear(match); +- ds_put_format(match, "eth.mcast && inport == %s", op->json_key); +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50, +- ds_cstr(match), ds_cstr(actions), +- &op->nbrp->header_); ++ if (op->lrp_networks.n_ipv6_addrs) { ++ ds_clear(match); ++ ds_put_format(match, "outport == %s && " ++ REG_NEXT_HOP_IPV6 " == ", ++ op->peer->json_key); ++ op_put_v6_networks(match, op); + +- ds_clear(match); +- ds_put_format(match, "eth.dst == %s && inport == %s", +- op->lrp_networks.ea_s, op->json_key); +- if (op->od->l3dgw_port && op == op->od->l3dgw_port +- && op->od->l3redirect_port) { +- /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s +- * should only be received on the gateway chassis. */ +- ds_put_format(match, " && is_chassis_resident(%s)", +- op->od->l3redirect_port->json_key); ++ ds_clear(actions); ++ ds_put_format(actions, "eth.dst = %s; next;", ++ op->lrp_networks.ea_s); ++ ovn_lflow_add_with_hint(lflows, op->peer->od, ++ S_ROUTER_IN_ARP_RESOLVE, 100, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbrp->header_); ++ } + } +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50, +- ds_cstr(match), ds_cstr(actions), +- &op->nbrp->header_); +- } +-} + ++ if (!op->derived && op->od->l3redirect_port) { ++ const char *redirect_type = smap_get(&op->nbrp->options, ++ "redirect-type"); ++ if (redirect_type && !strcasecmp(redirect_type, "bridged")) { ++ /* Packet is on a non gateway chassis and ++ * has an unresolved ARP on a network behind gateway ++ * chassis attached router port. Since, redirect type ++ * is "bridged", instead of calling "get_arp" ++ * on this node, we will redirect the packet to gateway ++ * chassis, by setting destination mac router port mac.*/ ++ ds_clear(match); ++ ds_put_format(match, "outport == %s && " ++ "!is_chassis_resident(%s)", op->json_key, ++ op->od->l3redirect_port->json_key); ++ ds_clear(actions); ++ ds_put_format(actions, "eth.dst = %s; next;", ++ op->lrp_networks.ea_s); + +-/* Logical router ingress Table 1 and 2: Neighbor lookup and learning +- * lflows for logical routers. */ +-static void +-build_neigh_learning_flows_for_lrouter( +- struct ovn_datapath *od, struct hmap *lflows, +- struct ds *match, struct ds *actions) +-{ +- if (od->nbr) { ++ ovn_lflow_add_with_hint(lflows, op->od, ++ S_ROUTER_IN_ARP_RESOLVE, 50, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbrp->header_); ++ } ++ } + +- /* Learn MAC bindings from ARP/IPv6 ND. +- * +- * For ARP packets, table LOOKUP_NEIGHBOR does a lookup for the +- * (arp.spa, arp.sha) in the mac binding table using the 'lookup_arp' +- * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_RESULT bit. +- * If "always_learn_from_arp_request" is set to false, it will also +- * lookup for the (arp.spa) in the mac binding table using the +- * "lookup_arp_ip" action for ARP request packets, and stores the +- * result in REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit; or set that bit +- * to "1" directly for ARP response packets. +- * +- * For IPv6 ND NA packets, table LOOKUP_NEIGHBOR does a lookup +- * for the (nd.target, nd.tll) in the mac binding table using the +- * 'lookup_nd' action and stores the result in +- * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. If +- * "always_learn_from_arp_request" is set to false, +- * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit is set. +- * +- * For IPv6 ND NS packets, table LOOKUP_NEIGHBOR does a lookup +- * for the (ip6.src, nd.sll) in the mac binding table using the +- * 'lookup_nd' action and stores the result in +- * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. If +- * "always_learn_from_arp_request" is set to false, it will also lookup +- * for the (ip6.src) in the mac binding table using the "lookup_nd_ip" +- * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_IP_RESULT +- * bit. +- * +- * Table LEARN_NEIGHBOR learns the mac-binding using the action +- * - 'put_arp/put_nd'. Learning mac-binding is skipped if +- * REGBIT_LOOKUP_NEIGHBOR_RESULT bit is set or +- * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT is not set. ++ /* Drop IP traffic destined to router owned IPs. Part of it is dropped ++ * in stage "lr_in_ip_input" but traffic that could have been unSNATed ++ * but didn't match any existing session might still end up here. + * +- * */ +- +- /* Flows for LOOKUP_NEIGHBOR. */ +- bool learn_from_arp_request = smap_get_bool(&od->nbr->options, +- "always_learn_from_arp_request", true); +- ds_clear(actions); +- ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT +- " = lookup_arp(inport, arp.spa, arp.sha); %snext;", +- learn_from_arp_request ? "" : +- REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; "); +- ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, +- "arp.op == 2", ds_cstr(actions)); ++ * Priority 1. ++ */ ++ build_lrouter_drop_own_dest(op, S_ROUTER_IN_ARP_RESOLVE, 1, true, ++ lflows); ++ } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp) ++ && strcmp(op->nbsp->type, "virtual")) { ++ /* This is a logical switch port that backs a VM or a container. ++ * Extract its addresses. For each of the address, go through all ++ * the router ports attached to the switch (to which this port ++ * connects) and if the address in question is reachable from the ++ * router port, add an ARP/ND entry in that router's pipeline. */ + +- ds_clear(actions); +- ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT +- " = lookup_nd(inport, nd.target, nd.tll); %snext;", +- learn_from_arp_request ? "" : +- REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; "); +- ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_na", +- ds_cstr(actions)); ++ for (size_t i = 0; i < op->n_lsp_addrs; i++) { ++ const char *ea_s = op->lsp_addrs[i].ea_s; ++ for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { ++ const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s; ++ for (size_t k = 0; k < op->od->n_router_ports; k++) { ++ /* Get the Logical_Router_Port that the ++ * Logical_Switch_Port is connected to, as ++ * 'peer'. */ ++ const char *peer_name = smap_get( ++ &op->od->router_ports[k]->nbsp->options, ++ "router-port"); ++ if (!peer_name) { ++ continue; ++ } + +- ds_clear(actions); +- ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT +- " = lookup_nd(inport, ip6.src, nd.sll); %snext;", +- learn_from_arp_request ? "" : +- REGBIT_LOOKUP_NEIGHBOR_IP_RESULT +- " = lookup_nd_ip(inport, ip6.src); "); +- ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_ns", +- ds_cstr(actions)); ++ struct ovn_port *peer = ovn_port_find(ports, peer_name); ++ if (!peer || !peer->nbrp) { ++ continue; ++ } + +- /* For other packet types, we can skip neighbor learning. +- * So set REGBIT_LOOKUP_NEIGHBOR_RESULT to 1. */ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 0, "1", +- REGBIT_LOOKUP_NEIGHBOR_RESULT" = 1; next;"); ++ if (!find_lrp_member_ip(peer, ip_s)) { ++ continue; ++ } + +- /* Flows for LEARN_NEIGHBOR. */ +- /* Skip Neighbor learning if not required. */ +- ds_clear(match); +- ds_put_format(match, REGBIT_LOOKUP_NEIGHBOR_RESULT" == 1%s", +- learn_from_arp_request ? "" : +- " || "REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" == 0"); +- ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 100, +- ds_cstr(match), "next;"); ++ ds_clear(match); ++ ds_put_format(match, "outport == %s && " ++ REG_NEXT_HOP_IPV4 " == %s", ++ peer->json_key, ip_s); + +- ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90, +- "arp", "put_arp(inport, arp.spa, arp.sha); next;"); ++ ds_clear(actions); ++ ds_put_format(actions, "eth.dst = %s; next;", ea_s); ++ ovn_lflow_add_with_hint(lflows, peer->od, ++ S_ROUTER_IN_ARP_RESOLVE, 100, ++ ds_cstr(match), ++ ds_cstr(actions), ++ &op->nbsp->header_); ++ } ++ } + +- ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90, +- "nd_na", "put_nd(inport, nd.target, nd.tll); next;"); ++ for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { ++ const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s; ++ for (size_t k = 0; k < op->od->n_router_ports; k++) { ++ /* Get the Logical_Router_Port that the ++ * Logical_Switch_Port is connected to, as ++ * 'peer'. */ ++ const char *peer_name = smap_get( ++ &op->od->router_ports[k]->nbsp->options, ++ "router-port"); ++ if (!peer_name) { ++ continue; ++ } + +- ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90, +- "nd_ns", "put_nd(inport, ip6.src, nd.sll); next;"); +- } +- +-} ++ struct ovn_port *peer = ovn_port_find(ports, peer_name); ++ if (!peer || !peer->nbrp) { ++ continue; ++ } + +-/* Logical router ingress Table 1: Neighbor lookup lflows +- * for logical router ports. */ +-static void +-build_neigh_learning_flows_for_lrouter_port( +- struct ovn_port *op, struct hmap *lflows, +- struct ds *match, struct ds *actions) +-{ +- if (op->nbrp) { ++ if (!find_lrp_member_ip(peer, ip_s)) { ++ continue; ++ } + +- bool learn_from_arp_request = smap_get_bool(&op->od->nbr->options, +- "always_learn_from_arp_request", true); ++ ds_clear(match); ++ ds_put_format(match, "outport == %s && " ++ REG_NEXT_HOP_IPV6 " == %s", ++ peer->json_key, ip_s); + +- /* Check if we need to learn mac-binding from ARP requests. */ +- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { +- if (!learn_from_arp_request) { +- /* ARP request to this address should always get learned, +- * so add a priority-110 flow to set +- * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT to 1. */ +- ds_clear(match); +- ds_put_format(match, +- "inport == %s && arp.spa == %s/%u && " +- "arp.tpa == %s && arp.op == 1", +- op->json_key, +- op->lrp_networks.ipv4_addrs[i].network_s, +- op->lrp_networks.ipv4_addrs[i].plen, +- op->lrp_networks.ipv4_addrs[i].addr_s); +- if (op->od->l3dgw_port && op == op->od->l3dgw_port +- && op->od->l3redirect_port) { +- ds_put_format(match, " && is_chassis_resident(%s)", +- op->od->l3redirect_port->json_key); ++ ds_clear(actions); ++ ds_put_format(actions, "eth.dst = %s; next;", ea_s); ++ ovn_lflow_add_with_hint(lflows, peer->od, ++ S_ROUTER_IN_ARP_RESOLVE, 100, ++ ds_cstr(match), ++ ds_cstr(actions), ++ &op->nbsp->header_); + } +- const char *actions_s = REGBIT_LOOKUP_NEIGHBOR_RESULT +- " = lookup_arp(inport, arp.spa, arp.sha); " +- REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1;" +- " next;"; +- ovn_lflow_add_with_hint(lflows, op->od, +- S_ROUTER_IN_LOOKUP_NEIGHBOR, 110, +- ds_cstr(match), actions_s, +- &op->nbrp->header_); +- } +- ds_clear(match); +- ds_put_format(match, +- "inport == %s && arp.spa == %s/%u && arp.op == 1", +- op->json_key, +- op->lrp_networks.ipv4_addrs[i].network_s, +- op->lrp_networks.ipv4_addrs[i].plen); +- if (op->od->l3dgw_port && op == op->od->l3dgw_port +- && op->od->l3redirect_port) { +- ds_put_format(match, " && is_chassis_resident(%s)", +- op->od->l3redirect_port->json_key); + } +- ds_clear(actions); +- ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT +- " = lookup_arp(inport, arp.spa, arp.sha); %snext;", +- learn_from_arp_request ? "" : +- REGBIT_LOOKUP_NEIGHBOR_IP_RESULT +- " = lookup_arp_ip(inport, arp.spa); "); +- ovn_lflow_add_with_hint(lflows, op->od, +- S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, +- ds_cstr(match), ds_cstr(actions), +- &op->nbrp->header_); + } +- } +-} +- +-/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: IPv6 Router +- * Adv (RA) options and response. */ +-static void +-build_ND_RA_flows_for_lrouter_port( +- struct ovn_port *op, struct hmap *lflows, +- struct ds *match, struct ds *actions) +-{ +- if (!op->nbrp || op->nbrp->peer || !op->peer) { +- return; +- } ++ } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp) ++ && !strcmp(op->nbsp->type, "virtual")) { ++ /* This is a virtual port. Add ARP replies for the virtual ip with ++ * the mac of the present active virtual parent. ++ * If the logical port doesn't have virtual parent set in ++ * Port_Binding table, then add the flow to set eth.dst to ++ * 00:00:00:00:00:00 and advance to next table so that ARP is ++ * resolved by router pipeline using the arp{} action. ++ * The MAC_Binding entry for the virtual ip might be invalid. */ ++ ovs_be32 ip; + +- if (!op->lrp_networks.n_ipv6_addrs) { +- return; +- } ++ const char *vip = smap_get(&op->nbsp->options, ++ "virtual-ip"); ++ const char *virtual_parents = smap_get(&op->nbsp->options, ++ "virtual-parents"); ++ if (!vip || !virtual_parents || ++ !ip_parse(vip, &ip) || !op->sb) { ++ return; ++ } + +- struct smap options; +- smap_clone(&options, &op->sb->options); ++ if (!op->sb->virtual_parent || !op->sb->virtual_parent[0] || ++ !op->sb->chassis) { ++ /* The virtual port is not claimed yet. */ ++ for (size_t i = 0; i < op->od->n_router_ports; i++) { ++ const char *peer_name = smap_get( ++ &op->od->router_ports[i]->nbsp->options, ++ "router-port"); ++ if (!peer_name) { ++ continue; ++ } + +- /* enable IPv6 prefix delegation */ +- bool prefix_delegation = smap_get_bool(&op->nbrp->options, +- "prefix_delegation", false); +- if (!lrport_is_enabled(op->nbrp)) { +- prefix_delegation = false; +- } +- smap_add(&options, "ipv6_prefix_delegation", +- prefix_delegation ? "true" : "false"); ++ struct ovn_port *peer = ovn_port_find(ports, peer_name); ++ if (!peer || !peer->nbrp) { ++ continue; ++ } + +- bool ipv6_prefix = smap_get_bool(&op->nbrp->options, +- "prefix", false); +- if (!lrport_is_enabled(op->nbrp)) { +- ipv6_prefix = false; +- } +- smap_add(&options, "ipv6_prefix", +- ipv6_prefix ? "true" : "false"); +- sbrec_port_binding_set_options(op->sb, &options); ++ if (find_lrp_member_ip(peer, vip)) { ++ ds_clear(match); ++ ds_put_format(match, "outport == %s && " ++ REG_NEXT_HOP_IPV4 " == %s", ++ peer->json_key, vip); + +- smap_destroy(&options); ++ const char *arp_actions = ++ "eth.dst = 00:00:00:00:00:00; next;"; ++ ovn_lflow_add_with_hint(lflows, peer->od, ++ S_ROUTER_IN_ARP_RESOLVE, 100, ++ ds_cstr(match), ++ arp_actions, ++ &op->nbsp->header_); ++ break; ++ } ++ } ++ } else { ++ struct ovn_port *vp = ++ ovn_port_find(ports, op->sb->virtual_parent); ++ if (!vp || !vp->nbsp) { ++ return; ++ } + +- const char *address_mode = smap_get( +- &op->nbrp->ipv6_ra_configs, "address_mode"); ++ for (size_t i = 0; i < vp->n_lsp_addrs; i++) { ++ bool found_vip_network = false; ++ const char *ea_s = vp->lsp_addrs[i].ea_s; ++ for (size_t j = 0; j < vp->od->n_router_ports; j++) { ++ /* Get the Logical_Router_Port that the ++ * Logical_Switch_Port is connected to, as ++ * 'peer'. */ ++ const char *peer_name = smap_get( ++ &vp->od->router_ports[j]->nbsp->options, ++ "router-port"); ++ if (!peer_name) { ++ continue; ++ } + +- if (!address_mode) { +- return; +- } +- if (strcmp(address_mode, "slaac") && +- strcmp(address_mode, "dhcpv6_stateful") && +- strcmp(address_mode, "dhcpv6_stateless")) { +- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); +- VLOG_WARN_RL(&rl, "Invalid address mode [%s] defined", +- address_mode); +- return; +- } ++ struct ovn_port *peer = ++ ovn_port_find(ports, peer_name); ++ if (!peer || !peer->nbrp) { ++ continue; ++ } + +- if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic", +- false)) { +- copy_ra_to_sb(op, address_mode); +- } ++ if (!find_lrp_member_ip(peer, vip)) { ++ continue; ++ } + +- ds_clear(match); +- ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && nd_rs", +- op->json_key); +- ds_clear(actions); ++ ds_clear(match); ++ ds_put_format(match, "outport == %s && " ++ REG_NEXT_HOP_IPV4 " == %s", ++ peer->json_key, vip); + +- const char *mtu_s = smap_get( +- &op->nbrp->ipv6_ra_configs, "mtu"); ++ ds_clear(actions); ++ ds_put_format(actions, "eth.dst = %s; next;", ea_s); ++ ovn_lflow_add_with_hint(lflows, peer->od, ++ S_ROUTER_IN_ARP_RESOLVE, 100, ++ ds_cstr(match), ++ ds_cstr(actions), ++ &op->nbsp->header_); ++ found_vip_network = true; ++ break; ++ } + +- /* As per RFC 2460, 1280 is minimum IPv6 MTU. */ +- uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0; ++ if (found_vip_network) { ++ break; ++ } ++ } ++ } ++ } else if (lsp_is_router(op->nbsp)) { ++ /* This is a logical switch port that connects to a router. */ + +- ds_put_format(actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts(" +- "addr_mode = \"%s\", slla = %s", +- address_mode, op->lrp_networks.ea_s); +- if (mtu > 0) { +- ds_put_format(actions, ", mtu = %u", mtu); +- } ++ /* The peer of this switch port is the router port for which ++ * we need to add logical flows such that it can resolve ++ * ARP entries for all the other router ports connected to ++ * the switch in question. */ + +- const char *prf = smap_get_def( +- &op->nbrp->ipv6_ra_configs, "router_preference", "MEDIUM"); +- if (strcmp(prf, "MEDIUM")) { +- ds_put_format(actions, ", router_preference = \"%s\"", prf); +- } ++ const char *peer_name = smap_get(&op->nbsp->options, ++ "router-port"); ++ if (!peer_name) { ++ return; ++ } + +- bool add_rs_response_flow = false; ++ struct ovn_port *peer = ovn_port_find(ports, peer_name); ++ if (!peer || !peer->nbrp) { ++ return; ++ } + +- for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { +- if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) { +- continue; ++ if (peer->od->nbr && ++ smap_get_bool(&peer->od->nbr->options, ++ "dynamic_neigh_routers", false)) { ++ return; + } + +- ds_put_format(actions, ", prefix = %s/%u", +- op->lrp_networks.ipv6_addrs[i].network_s, +- op->lrp_networks.ipv6_addrs[i].plen); ++ for (size_t i = 0; i < op->od->n_router_ports; i++) { ++ const char *router_port_name = smap_get( ++ &op->od->router_ports[i]->nbsp->options, ++ "router-port"); ++ struct ovn_port *router_port = ovn_port_find(ports, ++ router_port_name); ++ if (!router_port || !router_port->nbrp) { ++ continue; ++ } + +- add_rs_response_flow = true; +- } ++ /* Skip the router port under consideration. */ ++ if (router_port == peer) { ++ continue; ++ } + +- if (add_rs_response_flow) { +- ds_put_cstr(actions, "); next;"); +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS, +- 50, ds_cstr(match), ds_cstr(actions), +- &op->nbrp->header_); +- ds_clear(actions); +- ds_clear(match); +- ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && " +- "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key); ++ if (router_port->lrp_networks.n_ipv4_addrs) { ++ ds_clear(match); ++ ds_put_format(match, "outport == %s && " ++ REG_NEXT_HOP_IPV4 " == ", ++ peer->json_key); ++ op_put_v4_networks(match, router_port, false); + +- char ip6_str[INET6_ADDRSTRLEN + 1]; +- struct in6_addr lla; +- in6_generate_lla(op->lrp_networks.ea, &lla); +- memset(ip6_str, 0, sizeof(ip6_str)); +- ipv6_string_mapped(ip6_str, &lla); +- ds_put_format(actions, "eth.dst = eth.src; eth.src = %s; " +- "ip6.dst = ip6.src; ip6.src = %s; " +- "outport = inport; flags.loopback = 1; " +- "output;", +- op->lrp_networks.ea_s, ip6_str); +- ovn_lflow_add_with_hint(lflows, op->od, +- S_ROUTER_IN_ND_RA_RESPONSE, 50, +- ds_cstr(match), ds_cstr(actions), +- &op->nbrp->header_); +- } +-} ++ ds_clear(actions); ++ ds_put_format(actions, "eth.dst = %s; next;", ++ router_port->lrp_networks.ea_s); ++ ovn_lflow_add_with_hint(lflows, peer->od, ++ S_ROUTER_IN_ARP_RESOLVE, 100, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbsp->header_); ++ } + +-/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: RS +- * responder, by default goto next. (priority 0). */ +-static void +-build_ND_RA_flows_for_lrouter(struct ovn_datapath *od, struct hmap *lflows) +-{ +- if (od->nbr) { +- ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;"); +- ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;"); ++ if (router_port->lrp_networks.n_ipv6_addrs) { ++ ds_clear(match); ++ ds_put_format(match, "outport == %s && " ++ REG_NEXT_HOP_IPV6 " == ", ++ peer->json_key); ++ op_put_v6_networks(match, router_port); ++ ++ ds_clear(actions); ++ ds_put_format(actions, "eth.dst = %s; next;", ++ router_port->lrp_networks.ea_s); ++ ovn_lflow_add_with_hint(lflows, peer->od, ++ S_ROUTER_IN_ARP_RESOLVE, 100, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbsp->header_); ++ } ++ } + } ++ + } + +-/* Logical router ingress table IP_ROUTING : IP Routing. ++/* Local router ingress table CHK_PKT_LEN: Check packet length. + * +- * A packet that arrives at this table is an IP packet that should be +- * routed to the address in 'ip[46].dst'. ++ * Any IPv4 packet with outport set to the distributed gateway ++ * router port, check the packet length and store the result in the ++ * 'REGBIT_PKT_LARGER' register bit. + * +- * For regular routes without ECMP, table IP_ROUTING sets outport to the +- * correct output port, eth.src to the output port's MAC address, and +- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address +- * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and +- * advances to the next table. ++ * Local router ingress table LARGER_PKTS: Handle larger packets. + * +- * For ECMP routes, i.e. multiple routes with same policy and prefix, table +- * IP_ROUTING remembers ECMP group id and selects a member id, and advances +- * to table IP_ROUTING_ECMP, which sets outport, eth.src and +- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 for the selected ECMP member. +- */ ++ * Any IPv4 packet with outport set to the distributed gateway ++ * router port and the 'REGBIT_PKT_LARGER' register bit is set, ++ * generate ICMPv4 packet with type 3 (Destination Unreachable) and ++ * code 4 (Fragmentation needed). ++ * */ + static void +-build_ip_routing_flows_for_lrouter_port( +- struct ovn_port *op, struct hmap *lflows) ++build_check_pkt_len_flows_for_lrouter( ++ struct ovn_datapath *od, struct hmap *lflows, ++ struct hmap *ports, ++ struct ds *match, struct ds *actions) + { +- if (op->nbrp) { ++ if (od->nbr) { + +- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { +- add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s, +- op->lrp_networks.ipv4_addrs[i].network_s, +- op->lrp_networks.ipv4_addrs[i].plen, NULL, false, +- &op->nbrp->header_); +- } ++ /* Packets are allowed by default. */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 0, "1", ++ "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LARGER_PKTS, 0, "1", ++ "next;"); + +- for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { +- add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s, +- op->lrp_networks.ipv6_addrs[i].network_s, +- op->lrp_networks.ipv6_addrs[i].plen, NULL, false, +- &op->nbrp->header_); ++ if (od->l3dgw_port && od->l3redirect_port) { ++ int gw_mtu = 0; ++ if (od->l3dgw_port->nbrp) { ++ gw_mtu = smap_get_int(&od->l3dgw_port->nbrp->options, ++ "gateway_mtu", 0); ++ } ++ /* Add the flows only if gateway_mtu is configured. */ ++ if (gw_mtu <= 0) { ++ return; ++ } ++ ++ ds_clear(match); ++ ds_put_format(match, "outport == %s", od->l3dgw_port->json_key); ++ ++ ds_clear(actions); ++ ds_put_format(actions, ++ REGBIT_PKT_LARGER" = check_pkt_larger(%d);" ++ " next;", gw_mtu + VLAN_ETH_HEADER_LEN); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 50, ++ ds_cstr(match), ds_cstr(actions), ++ &od->l3dgw_port->nbrp->header_); ++ ++ for (size_t i = 0; i < od->nbr->n_ports; i++) { ++ struct ovn_port *rp = ovn_port_find(ports, ++ od->nbr->ports[i]->name); ++ if (!rp || rp == od->l3dgw_port) { ++ continue; ++ } ++ ++ if (rp->lrp_networks.ipv4_addrs) { ++ ds_clear(match); ++ ds_put_format(match, "inport == %s && outport == %s" ++ " && ip4 && "REGBIT_PKT_LARGER, ++ rp->json_key, od->l3dgw_port->json_key); ++ ++ ds_clear(actions); ++ /* Set icmp4.frag_mtu to gw_mtu */ ++ ds_put_format(actions, ++ "icmp4_error {" ++ REGBIT_EGRESS_LOOPBACK" = 1; " ++ "eth.dst = %s; " ++ "ip4.dst = ip4.src; " ++ "ip4.src = %s; " ++ "ip.ttl = 255; " ++ "icmp4.type = 3; /* Destination Unreachable. */ " ++ "icmp4.code = 4; /* Frag Needed and DF was Set. */ " ++ "icmp4.frag_mtu = %d; " ++ "next(pipeline=ingress, table=%d); };", ++ rp->lrp_networks.ea_s, ++ rp->lrp_networks.ipv4_addrs[0].addr_s, ++ gw_mtu, ++ ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); ++ ovn_lflow_add_with_hint(lflows, od, ++ S_ROUTER_IN_LARGER_PKTS, 50, ++ ds_cstr(match), ds_cstr(actions), ++ &rp->nbrp->header_); ++ } ++ ++ if (rp->lrp_networks.ipv6_addrs) { ++ ds_clear(match); ++ ds_put_format(match, "inport == %s && outport == %s" ++ " && ip6 && "REGBIT_PKT_LARGER, ++ rp->json_key, od->l3dgw_port->json_key); ++ ++ ds_clear(actions); ++ /* Set icmp6.frag_mtu to gw_mtu */ ++ ds_put_format(actions, ++ "icmp6_error {" ++ REGBIT_EGRESS_LOOPBACK" = 1; " ++ "eth.dst = %s; " ++ "ip6.dst = ip6.src; " ++ "ip6.src = %s; " ++ "ip.ttl = 255; " ++ "icmp6.type = 2; /* Packet Too Big. */ " ++ "icmp6.code = 0; " ++ "icmp6.frag_mtu = %d; " ++ "next(pipeline=ingress, table=%d); };", ++ rp->lrp_networks.ea_s, ++ rp->lrp_networks.ipv6_addrs[0].addr_s, ++ gw_mtu, ++ ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); ++ ovn_lflow_add_with_hint(lflows, od, ++ S_ROUTER_IN_LARGER_PKTS, 50, ++ ds_cstr(match), ds_cstr(actions), ++ &rp->nbrp->header_); ++ } ++ } + } + } + } + ++/* Logical router ingress table GW_REDIRECT: Gateway redirect. ++ * ++ * For traffic with outport equal to the l3dgw_port ++ * on a distributed router, this table redirects a subset ++ * of the traffic to the l3redirect_port which represents ++ * the central instance of the l3dgw_port. ++ */ + static void +-build_static_route_flows_for_lrouter( ++build_gateway_redirect_flows_for_lrouter( + struct ovn_datapath *od, struct hmap *lflows, +- struct hmap *ports) ++ struct ds *match, struct ds *actions) + { + if (od->nbr) { +- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, 150, +- REG_ECMP_GROUP_ID" == 0", "next;"); ++ if (od->l3dgw_port && od->l3redirect_port) { ++ const struct ovsdb_idl_row *stage_hint = NULL; + +- struct hmap ecmp_groups = HMAP_INITIALIZER(&ecmp_groups); +- struct hmap unique_routes = HMAP_INITIALIZER(&unique_routes); +- struct ovs_list parsed_routes = OVS_LIST_INITIALIZER(&parsed_routes); +- struct ecmp_groups_node *group; +- for (int i = 0; i < od->nbr->n_static_routes; i++) { +- struct parsed_route *route = +- parsed_routes_add(&parsed_routes, od->nbr->static_routes[i]); +- if (!route) { +- continue; ++ if (od->l3dgw_port->nbrp) { ++ stage_hint = &od->l3dgw_port->nbrp->header_; + } +- group = ecmp_groups_find(&ecmp_groups, route); +- if (group) { +- ecmp_groups_add_route(group, route); +- } else { +- const struct parsed_route *existed_route = +- unique_routes_remove(&unique_routes, route); +- if (existed_route) { +- group = ecmp_groups_add(&ecmp_groups, existed_route); +- if (group) { +- ecmp_groups_add_route(group, route); +- } +- } else { +- unique_routes_add(&unique_routes, route); +- } +- } +- } +- HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) { +- /* add a flow in IP_ROUTING, and one flow for each member in +- * IP_ROUTING_ECMP. */ +- build_ecmp_route_flow(lflows, od, ports, group); +- } +- const struct unique_routes_node *ur; +- HMAP_FOR_EACH (ur, hmap_node, &unique_routes) { +- build_static_route_flow(lflows, od, ports, ur->route); ++ ++ /* For traffic with outport == l3dgw_port, if the ++ * packet did not match any higher priority redirect ++ * rule, then the traffic is redirected to the central ++ * instance of the l3dgw_port. */ ++ ds_clear(match); ++ ds_put_format(match, "outport == %s", ++ od->l3dgw_port->json_key); ++ ds_clear(actions); ++ ds_put_format(actions, "outport = %s; next;", ++ od->l3redirect_port->json_key); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50, ++ ds_cstr(match), ds_cstr(actions), ++ stage_hint); + } +- ecmp_groups_destroy(&ecmp_groups); +- unique_routes_destroy(&unique_routes); +- parsed_routes_destroy(&parsed_routes); ++ ++ /* Packets are allowed by default. */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;"); + } + } + +-/* IP Multicast lookup. Here we set the output port, adjust TTL and +- * advance to next table (priority 500). +- */ ++/* Local router ingress table ARP_REQUEST: ARP request. ++ * ++ * In the common case where the Ethernet destination has been resolved, ++ * this table outputs the packet (priority 0). Otherwise, it composes ++ * and sends an ARP/IPv6 NA request (priority 100). */ + static void +-build_mcast_lookup_flows_for_lrouter( ++build_arp_request_flows_for_lrouter( + struct ovn_datapath *od, struct hmap *lflows, + struct ds *match, struct ds *actions) + { + if (od->nbr) { ++ for (int i = 0; i < od->nbr->n_static_routes; i++) { ++ const struct nbrec_logical_router_static_route *route; + +- /* Drop IPv6 multicast traffic that shouldn't be forwarded, +- * i.e., router solicitation and router advertisement. +- */ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550, +- "nd_rs || nd_ra", "drop;"); +- if (!od->mcast_info.rtr.relay) { +- return; +- } +- +- struct ovn_igmp_group *igmp_group; ++ route = od->nbr->static_routes[i]; ++ struct in6_addr gw_ip6; ++ unsigned int plen; ++ char *error = ipv6_parse_cidr(route->nexthop, &gw_ip6, &plen); ++ if (error || plen != 128) { ++ free(error); ++ continue; ++ } + +- LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) { + ds_clear(match); ++ ds_put_format(match, "eth.dst == 00:00:00:00:00:00 && " ++ "ip6 && " REG_NEXT_HOP_IPV6 " == %s", ++ route->nexthop); ++ struct in6_addr sn_addr; ++ struct eth_addr eth_dst; ++ in6_addr_solicited_node(&sn_addr, &gw_ip6); ++ ipv6_multicast_to_ethernet(ð_dst, &sn_addr); ++ ++ char sn_addr_s[INET6_ADDRSTRLEN + 1]; ++ ipv6_string_mapped(sn_addr_s, &sn_addr); ++ + ds_clear(actions); +- if (IN6_IS_ADDR_V4MAPPED(&igmp_group->address)) { +- ds_put_format(match, "ip4 && ip4.dst == %s ", +- igmp_group->mcgroup.name); +- } else { +- ds_put_format(match, "ip6 && ip6.dst == %s ", +- igmp_group->mcgroup.name); +- } +- if (od->mcast_info.rtr.flood_static) { +- ds_put_cstr(actions, +- "clone { " +- "outport = \""MC_STATIC"\"; " +- "ip.ttl--; " +- "next; " +- "};"); +- } +- ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;", +- igmp_group->mcgroup.name); +- ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 500, +- ds_cstr(match), ds_cstr(actions)); +- } ++ ds_put_format(actions, ++ "nd_ns { " ++ "eth.dst = "ETH_ADDR_FMT"; " ++ "ip6.dst = %s; " ++ "nd.target = %s; " ++ "output; " ++ "};", ETH_ADDR_ARGS(eth_dst), sn_addr_s, ++ route->nexthop); + +- /* If needed, flood unregistered multicast on statically configured +- * ports. Otherwise drop any multicast traffic. +- */ +- if (od->mcast_info.rtr.flood_static) { +- ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 450, +- "ip4.mcast || ip6.mcast", +- "clone { " +- "outport = \""MC_STATIC"\"; " +- "ip.ttl--; " +- "next; " +- "};"); +- } else { +- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450, +- "ip4.mcast || ip6.mcast", "drop;"); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ARP_REQUEST, 200, ++ ds_cstr(match), ds_cstr(actions), ++ &route->header_); + } ++ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100, ++ "eth.dst == 00:00:00:00:00:00 && ip4", ++ "arp { " ++ "eth.dst = ff:ff:ff:ff:ff:ff; " ++ "arp.spa = " REG_SRC_IPV4 "; " ++ "arp.tpa = " REG_NEXT_HOP_IPV4 "; " ++ "arp.op = 1; " /* ARP request */ ++ "output; " ++ "};"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100, ++ "eth.dst == 00:00:00:00:00:00 && ip6", ++ "nd_ns { " ++ "nd.target = " REG_NEXT_HOP_IPV6 "; " ++ "output; " ++ "};"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;"); + } + } + +-/* Logical router ingress table POLICY: Policy. ++/* Logical router egress table DELIVERY: Delivery (priority 100-110). + * +- * A packet that arrives at this table is an IP packet that should be +- * permitted/denied/rerouted to the address in the rule's nexthop. +- * This table sets outport to the correct out_port, +- * eth.src to the output port's MAC address, +- * and REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address +- * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and +- * advances to the next table for ARP/ND resolution. */ ++ * Priority 100 rules deliver packets to enabled logical ports. ++ * Priority 110 rules match multicast packets and update the source ++ * mac before delivering to enabled logical ports. IP multicast traffic ++ * bypasses S_ROUTER_IN_IP_ROUTING route lookups. ++ */ + static void +-build_ingress_policy_flows_for_lrouter( +- struct ovn_datapath *od, struct hmap *lflows, +- struct hmap *ports) ++build_egress_delivery_flows_for_lrouter_port( ++ struct ovn_port *op, struct hmap *lflows, ++ struct ds *match, struct ds *actions) + { +- if (od->nbr) { +- /* This is a catch-all rule. It has the lowest priority (0) +- * does a match-all("1") and pass-through (next) */ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1", +- REG_ECMP_GROUP_ID" = 0; next;"); +- ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY_ECMP, 150, +- REG_ECMP_GROUP_ID" == 0", "next;"); ++ if (op->nbrp) { ++ if (!lrport_is_enabled(op->nbrp)) { ++ /* Drop packets to disabled logical ports (since logical flow ++ * tables are default-drop). */ ++ return; ++ } + +- /* Convert routing policies to flows. */ +- uint16_t ecmp_group_id = 1; +- for (int i = 0; i < od->nbr->n_policies; i++) { +- const struct nbrec_logical_router_policy *rule +- = od->nbr->policies[i]; +- bool is_ecmp_reroute = +- (!strcmp(rule->action, "reroute") && rule->n_nexthops > 1); ++ if (op->derived) { ++ /* No egress packets should be processed in the context of ++ * a chassisredirect port. The chassisredirect port should ++ * be replaced by the l3dgw port in the local output ++ * pipeline stage before egress processing. */ ++ return; ++ } + +- if (is_ecmp_reroute) { +- build_ecmp_routing_policy_flows(lflows, od, ports, rule, +- ecmp_group_id); +- ecmp_group_id++; +- } else { +- build_routing_policy_flow(lflows, od, ports, rule, +- &rule->header_); +- } ++ /* If multicast relay is enabled then also adjust source mac for IP ++ * multicast traffic. ++ */ ++ if (op->od->mcast_info.rtr.relay) { ++ ds_clear(match); ++ ds_clear(actions); ++ ds_put_format(match, "(ip4.mcast || ip6.mcast) && outport == %s", ++ op->json_key); ++ ds_put_format(actions, "eth.src = %s; output;", ++ op->lrp_networks.ea_s); ++ ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 110, ++ ds_cstr(match), ds_cstr(actions)); + } ++ ++ ds_clear(match); ++ ds_put_format(match, "outport == %s", op->json_key); ++ ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100, ++ ds_cstr(match), "output;"); + } ++ + } + +-/* Local router ingress table ARP_RESOLVE: ARP Resolution. */ + static void +-build_arp_resolve_flows_for_lrouter( ++build_misc_local_traffic_drop_flows_for_lrouter( + struct ovn_datapath *od, struct hmap *lflows) + { + if (od->nbr) { +- /* Multicast packets already have the outport set so just advance to +- * next table (priority 500). */ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 500, +- "ip4.mcast || ip6.mcast", "next;"); ++ /* L3 admission control: drop multicast and broadcast source, localhost ++ * source or destination, and zero network source or destination ++ * (priority 100). */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100, ++ "ip4.src_mcast ||" ++ "ip4.src == 255.255.255.255 || " ++ "ip4.src == 127.0.0.0/8 || " ++ "ip4.dst == 127.0.0.0/8 || " ++ "ip4.src == 0.0.0.0/8 || " ++ "ip4.dst == 0.0.0.0/8", ++ "drop;"); + +- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4", +- "get_arp(outport, " REG_NEXT_HOP_IPV4 "); next;"); ++ /* Drop ARP packets (priority 85). ARP request packets for router's own ++ * IPs are handled with priority-90 flows. ++ * Drop IPv6 ND packets (priority 85). ND NA packets for router's own ++ * IPs are handled with priority-90 flows. ++ */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 85, ++ "arp || nd", "drop;"); + +- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6", +- "get_nd(outport, " REG_NEXT_HOP_IPV6 "); next;"); +- } +-} ++ /* Allow IPv6 multicast traffic that's supposed to reach the ++ * router pipeline (e.g., router solicitations). ++ */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 84, "nd_rs || nd_ra", ++ "next;"); ++ ++ /* Drop other reserved multicast. */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 83, ++ "ip6.mcast_rsvd", "drop;"); ++ ++ /* Allow other multicast if relay enabled (priority 82). */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 82, ++ "ip4.mcast || ip6.mcast", ++ od->mcast_info.rtr.relay ? "next;" : "drop;"); ++ ++ /* Drop Ethernet local broadcast. By definition this traffic should ++ * not be forwarded.*/ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50, ++ "eth.bcast", "drop;"); ++ ++ /* TTL discard */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, ++ "ip4 && ip.ttl == {0, 1}", "drop;"); ++ ++ /* Pass other traffic not already handled to the next table for ++ * routing. */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;"); ++ } ++} + +-/* Local router ingress table ARP_RESOLVE: ARP Resolution. +- * +- * Any unicast packet that reaches this table is an IP packet whose +- * next-hop IP address is in REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 +- * (ip4.dst/ipv6.dst is the final destination). +- * This table resolves the IP address in +- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 into an output port in outport and +- * an Ethernet address in eth.dst. +- */ + static void +-build_arp_resolve_flows_for_lrouter_port( ++build_dhcpv6_reply_flows_for_lrouter_port( + struct ovn_port *op, struct hmap *lflows, +- struct hmap *ports, +- struct ds *match, struct ds *actions) ++ struct ds *match) + { +- if (op->nbsp && !lsp_is_enabled(op->nbsp)) { +- return; ++ if (op->nbrp && (!op->derived)) { ++ for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { ++ ds_clear(match); ++ ds_put_format(match, "ip6.dst == %s && udp.src == 547 &&" ++ " udp.dst == 546", ++ op->lrp_networks.ipv6_addrs[i].addr_s); ++ ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, ++ ds_cstr(match), ++ "reg0 = 0; handle_dhcpv6_reply;"); ++ } + } + +- if (op->nbrp) { +- /* This is a logical router port. If next-hop IP address in +- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 matches IP address of this +- * router port, then the packet is intended to eventually be sent +- * to this logical port. Set the destination mac address using +- * this port's mac address. +- * +- * The packet is still in peer's logical pipeline. So the match +- * should be on peer's outport. */ +- if (op->peer && op->nbrp->peer) { +- if (op->lrp_networks.n_ipv4_addrs) { +- ds_clear(match); +- ds_put_format(match, "outport == %s && " +- REG_NEXT_HOP_IPV4 "== ", +- op->peer->json_key); +- op_put_v4_networks(match, op, false); ++} + +- ds_clear(actions); +- ds_put_format(actions, "eth.dst = %s; next;", +- op->lrp_networks.ea_s); +- ovn_lflow_add_with_hint(lflows, op->peer->od, +- S_ROUTER_IN_ARP_RESOLVE, 100, +- ds_cstr(match), ds_cstr(actions), +- &op->nbrp->header_); +- } ++static void ++build_ipv6_input_flows_for_lrouter_port( ++ struct ovn_port *op, struct hmap *lflows, ++ struct ds *match, struct ds *actions) ++{ ++ if (op->nbrp && (!op->derived)) { ++ /* No ingress packets are accepted on a chassisredirect ++ * port, so no need to program flows for that port. */ ++ if (op->lrp_networks.n_ipv6_addrs) { ++ /* ICMPv6 echo reply. These flows reply to echo requests ++ * received for the router's IP address. */ ++ ds_clear(match); ++ ds_put_cstr(match, "ip6.dst == "); ++ op_put_v6_networks(match, op); ++ ds_put_cstr(match, " && icmp6.type == 128 && icmp6.code == 0"); + +- if (op->lrp_networks.n_ipv6_addrs) { +- ds_clear(match); +- ds_put_format(match, "outport == %s && " +- REG_NEXT_HOP_IPV6 " == ", +- op->peer->json_key); +- op_put_v6_networks(match, op); ++ const char *lrp_actions = ++ "ip6.dst <-> ip6.src; " ++ "ip.ttl = 255; " ++ "icmp6.type = 129; " ++ "flags.loopback = 1; " ++ "next; "; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, ++ ds_cstr(match), lrp_actions, ++ &op->nbrp->header_); ++ } + +- ds_clear(actions); +- ds_put_format(actions, "eth.dst = %s; next;", +- op->lrp_networks.ea_s); +- ovn_lflow_add_with_hint(lflows, op->peer->od, +- S_ROUTER_IN_ARP_RESOLVE, 100, +- ds_cstr(match), ds_cstr(actions), +- &op->nbrp->header_); ++ /* ND reply. These flows reply to ND solicitations for the ++ * router's own IP address. */ ++ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { ++ ds_clear(match); ++ if (op->od->l3dgw_port && op == op->od->l3dgw_port ++ && op->od->l3redirect_port) { ++ /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s ++ * should only be sent from the gateway chassi, so that ++ * upstream MAC learning points to the gateway chassis. ++ * Also need to avoid generation of multiple ND replies ++ * from different chassis. */ ++ ds_put_format(match, "is_chassis_resident(%s)", ++ op->od->l3redirect_port->json_key); + } ++ ++ build_lrouter_nd_flow(op->od, op, "nd_na_router", ++ op->lrp_networks.ipv6_addrs[i].addr_s, ++ op->lrp_networks.ipv6_addrs[i].sn_addr_s, ++ REG_INPORT_ETH_ADDR, match, false, 90, ++ &op->nbrp->header_, lflows); + } + +- if (!op->derived && op->od->l3redirect_port) { +- const char *redirect_type = smap_get(&op->nbrp->options, +- "redirect-type"); +- if (redirect_type && !strcasecmp(redirect_type, "bridged")) { +- /* Packet is on a non gateway chassis and +- * has an unresolved ARP on a network behind gateway +- * chassis attached router port. Since, redirect type +- * is "bridged", instead of calling "get_arp" +- * on this node, we will redirect the packet to gateway +- * chassis, by setting destination mac router port mac.*/ ++ /* UDP/TCP port unreachable */ ++ if (!smap_get(&op->od->nbr->options, "chassis") ++ && !op->od->l3dgw_port) { ++ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { + ds_clear(match); +- ds_put_format(match, "outport == %s && " +- "!is_chassis_resident(%s)", op->json_key, +- op->od->l3redirect_port->json_key); +- ds_clear(actions); +- ds_put_format(actions, "eth.dst = %s; next;", +- op->lrp_networks.ea_s); ++ ds_put_format(match, ++ "ip6 && ip6.dst == %s && !ip.later_frag && tcp", ++ op->lrp_networks.ipv6_addrs[i].addr_s); ++ const char *action = "tcp_reset {" ++ "eth.dst <-> eth.src; " ++ "ip6.dst <-> ip6.src; " ++ "next; };"; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, ++ 80, ds_cstr(match), action, ++ &op->nbrp->header_); + +- ovn_lflow_add_with_hint(lflows, op->od, +- S_ROUTER_IN_ARP_RESOLVE, 50, +- ds_cstr(match), ds_cstr(actions), ++ ds_clear(match); ++ ds_put_format(match, ++ "ip6 && ip6.dst == %s && !ip.later_frag && udp", ++ op->lrp_networks.ipv6_addrs[i].addr_s); ++ action = "icmp6 {" ++ "eth.dst <-> eth.src; " ++ "ip6.dst <-> ip6.src; " ++ "ip.ttl = 255; " ++ "icmp6.type = 1; " ++ "icmp6.code = 4; " ++ "next; };"; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, ++ 80, ds_cstr(match), action, ++ &op->nbrp->header_); ++ ++ ds_clear(match); ++ ds_put_format(match, ++ "ip6 && ip6.dst == %s && !ip.later_frag", ++ op->lrp_networks.ipv6_addrs[i].addr_s); ++ action = "icmp6 {" ++ "eth.dst <-> eth.src; " ++ "ip6.dst <-> ip6.src; " ++ "ip.ttl = 255; " ++ "icmp6.type = 1; " ++ "icmp6.code = 3; " ++ "next; };"; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, ++ 70, ds_cstr(match), action, + &op->nbrp->header_); + } + } + +- /* Drop IP traffic destined to router owned IPs. Part of it is dropped +- * in stage "lr_in_ip_input" but traffic that could have been unSNATed +- * but didn't match any existing session might still end up here. +- * +- * Priority 1. +- */ +- build_lrouter_drop_own_dest(op, S_ROUTER_IN_ARP_RESOLVE, 1, true, +- lflows); +- } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp) +- && strcmp(op->nbsp->type, "virtual")) { +- /* This is a logical switch port that backs a VM or a container. +- * Extract its addresses. For each of the address, go through all +- * the router ports attached to the switch (to which this port +- * connects) and if the address in question is reachable from the +- * router port, add an ARP/ND entry in that router's pipeline. */ ++ /* ICMPv6 time exceeded */ ++ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { ++ /* skip link-local address */ ++ if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) { ++ continue; ++ } + +- for (size_t i = 0; i < op->n_lsp_addrs; i++) { +- const char *ea_s = op->lsp_addrs[i].ea_s; +- for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { +- const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s; +- for (size_t k = 0; k < op->od->n_router_ports; k++) { +- /* Get the Logical_Router_Port that the +- * Logical_Switch_Port is connected to, as +- * 'peer'. */ +- const char *peer_name = smap_get( +- &op->od->router_ports[k]->nbsp->options, +- "router-port"); +- if (!peer_name) { +- continue; +- } ++ ds_clear(match); ++ ds_clear(actions); + +- struct ovn_port *peer = ovn_port_find(ports, peer_name); +- if (!peer || !peer->nbrp) { +- continue; +- } ++ ds_put_format(match, ++ "inport == %s && ip6 && " ++ "ip6.src == %s/%d && " ++ "ip.ttl == {0, 1} && !ip.later_frag", ++ op->json_key, ++ op->lrp_networks.ipv6_addrs[i].network_s, ++ op->lrp_networks.ipv6_addrs[i].plen); ++ ds_put_format(actions, ++ "icmp6 {" ++ "eth.dst <-> eth.src; " ++ "ip6.dst = ip6.src; " ++ "ip6.src = %s; " ++ "ip.ttl = 255; " ++ "icmp6.type = 3; /* Time exceeded */ " ++ "icmp6.code = 0; /* TTL exceeded in transit */ " ++ "next; };", ++ op->lrp_networks.ipv6_addrs[i].addr_s); ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbrp->header_); ++ } ++ } + +- if (!find_lrp_member_ip(peer, ip_s)) { +- continue; +- } ++} + +- ds_clear(match); +- ds_put_format(match, "outport == %s && " +- REG_NEXT_HOP_IPV4 " == %s", +- peer->json_key, ip_s); ++static void ++build_lrouter_arp_nd_for_datapath(struct ovn_datapath *od, ++ struct hmap *lflows) ++{ ++ if (od->nbr) { + +- ds_clear(actions); +- ds_put_format(actions, "eth.dst = %s; next;", ea_s); +- ovn_lflow_add_with_hint(lflows, peer->od, +- S_ROUTER_IN_ARP_RESOLVE, 100, +- ds_cstr(match), +- ds_cstr(actions), +- &op->nbsp->header_); +- } ++ /* Priority-90-92 flows handle ARP requests and ND packets. Most are ++ * per logical port but DNAT addresses can be handled per datapath ++ * for non gateway router ports. ++ * ++ * Priority 91 and 92 flows are added for each gateway router ++ * port to handle the special cases. In case we get the packet ++ * on a regular port, just reply with the port's ETH address. ++ */ ++ for (int i = 0; i < od->nbr->n_nat; i++) { ++ struct ovn_nat *nat_entry = &od->nat_entries[i]; ++ ++ /* Skip entries we failed to parse. */ ++ if (!nat_entry_is_valid(nat_entry)) { ++ continue; + } + +- for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { +- const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s; +- for (size_t k = 0; k < op->od->n_router_ports; k++) { +- /* Get the Logical_Router_Port that the +- * Logical_Switch_Port is connected to, as +- * 'peer'. */ +- const char *peer_name = smap_get( +- &op->od->router_ports[k]->nbsp->options, +- "router-port"); +- if (!peer_name) { +- continue; +- } ++ /* Skip SNAT entries for now, we handle unique SNAT IPs separately ++ * below. ++ */ ++ if (!strcmp(nat_entry->nb->type, "snat")) { ++ continue; ++ } ++ build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows); ++ } + +- struct ovn_port *peer = ovn_port_find(ports, peer_name); +- if (!peer || !peer->nbrp) { +- continue; +- } ++ /* Now handle SNAT entries too, one per unique SNAT IP. */ ++ struct shash_node *snat_snode; ++ SHASH_FOR_EACH (snat_snode, &od->snat_ips) { ++ struct ovn_snat_ip *snat_ip = snat_snode->data; + +- if (!find_lrp_member_ip(peer, ip_s)) { +- continue; +- } ++ if (ovs_list_is_empty(&snat_ip->snat_entries)) { ++ continue; ++ } + +- ds_clear(match); +- ds_put_format(match, "outport == %s && " +- REG_NEXT_HOP_IPV6 " == %s", +- peer->json_key, ip_s); ++ struct ovn_nat *nat_entry = ++ CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries), ++ struct ovn_nat, ext_addr_list_node); ++ build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows); ++ } ++ } ++} + +- ds_clear(actions); +- ds_put_format(actions, "eth.dst = %s; next;", ea_s); +- ovn_lflow_add_with_hint(lflows, peer->od, +- S_ROUTER_IN_ARP_RESOLVE, 100, +- ds_cstr(match), +- ds_cstr(actions), +- &op->nbsp->header_); +- } +- } ++/* Logical router ingress table 3: IP Input for IPv4. */ ++static void ++build_lrouter_ipv4_ip_input(struct ovn_port *op, ++ struct hmap *lflows, ++ struct ds *match, struct ds *actions) ++{ ++ /* No ingress packets are accepted on a chassisredirect ++ * port, so no need to program flows for that port. */ ++ if (op->nbrp && (!op->derived)) { ++ if (op->lrp_networks.n_ipv4_addrs) { ++ /* L3 admission control: drop packets that originate from an ++ * IPv4 address owned by the router or a broadcast address ++ * known to the router (priority 100). */ ++ ds_clear(match); ++ ds_put_cstr(match, "ip4.src == "); ++ op_put_v4_networks(match, op, true); ++ ds_put_cstr(match, " && "REGBIT_EGRESS_LOOPBACK" == 0"); ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, ++ ds_cstr(match), "drop;", ++ &op->nbrp->header_); ++ ++ /* ICMP echo reply. These flows reply to ICMP echo requests ++ * received for the router's IP address. Since packets only ++ * get here as part of the logical router datapath, the inport ++ * (i.e. the incoming locally attached net) does not matter. ++ * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */ ++ ds_clear(match); ++ ds_put_cstr(match, "ip4.dst == "); ++ op_put_v4_networks(match, op, false); ++ ds_put_cstr(match, " && icmp4.type == 8 && icmp4.code == 0"); ++ ++ const char * icmp_actions = "ip4.dst <-> ip4.src; " ++ "ip.ttl = 255; " ++ "icmp4.type = 0; " ++ "flags.loopback = 1; " ++ "next; "; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, ++ ds_cstr(match), icmp_actions, ++ &op->nbrp->header_); + } +- } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp) +- && !strcmp(op->nbsp->type, "virtual")) { +- /* This is a virtual port. Add ARP replies for the virtual ip with +- * the mac of the present active virtual parent. +- * If the logical port doesn't have virtual parent set in +- * Port_Binding table, then add the flow to set eth.dst to +- * 00:00:00:00:00:00 and advance to next table so that ARP is +- * resolved by router pipeline using the arp{} action. +- * The MAC_Binding entry for the virtual ip might be invalid. */ +- ovs_be32 ip; + +- const char *vip = smap_get(&op->nbsp->options, +- "virtual-ip"); +- const char *virtual_parents = smap_get(&op->nbsp->options, +- "virtual-parents"); +- if (!vip || !virtual_parents || +- !ip_parse(vip, &ip) || !op->sb) { +- return; ++ /* ICMP time exceeded */ ++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { ++ ds_clear(match); ++ ds_clear(actions); ++ ++ ds_put_format(match, ++ "inport == %s && ip4 && " ++ "ip.ttl == {0, 1} && !ip.later_frag", op->json_key); ++ ds_put_format(actions, ++ "icmp4 {" ++ "eth.dst <-> eth.src; " ++ "icmp4.type = 11; /* Time exceeded */ " ++ "icmp4.code = 0; /* TTL exceeded in transit */ " ++ "ip4.dst = ip4.src; " ++ "ip4.src = %s; " ++ "ip.ttl = 255; " ++ "next; };", ++ op->lrp_networks.ipv4_addrs[i].addr_s); ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40, ++ ds_cstr(match), ds_cstr(actions), ++ &op->nbrp->header_); + } + +- if (!op->sb->virtual_parent || !op->sb->virtual_parent[0] || +- !op->sb->chassis) { +- /* The virtual port is not claimed yet. */ +- for (size_t i = 0; i < op->od->n_router_ports; i++) { +- const char *peer_name = smap_get( +- &op->od->router_ports[i]->nbsp->options, +- "router-port"); +- if (!peer_name) { +- continue; +- } ++ /* ARP reply. These flows reply to ARP requests for the router's own ++ * IP address. */ ++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { ++ ds_clear(match); ++ ds_put_format(match, "arp.spa == %s/%u", ++ op->lrp_networks.ipv4_addrs[i].network_s, ++ op->lrp_networks.ipv4_addrs[i].plen); + +- struct ovn_port *peer = ovn_port_find(ports, peer_name); +- if (!peer || !peer->nbrp) { +- continue; ++ if (op->od->l3dgw_port && op->od->l3redirect_port && op->peer ++ && op->peer->od->n_localnet_ports) { ++ bool add_chassis_resident_check = false; ++ if (op == op->od->l3dgw_port) { ++ /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s ++ * should only be sent from the gateway chassis, so that ++ * upstream MAC learning points to the gateway chassis. ++ * Also need to avoid generation of multiple ARP responses ++ * from different chassis. */ ++ add_chassis_resident_check = true; ++ } else { ++ /* Check if the option 'reside-on-redirect-chassis' ++ * is set to true on the router port. If set to true ++ * and if peer's logical switch has a localnet port, it ++ * means the router pipeline for the packets from ++ * peer's logical switch is be run on the chassis ++ * hosting the gateway port and it should reply to the ++ * ARP requests for the router port IPs. ++ */ ++ add_chassis_resident_check = smap_get_bool( ++ &op->nbrp->options, ++ "reside-on-redirect-chassis", false); + } + +- if (find_lrp_member_ip(peer, vip)) { +- ds_clear(match); +- ds_put_format(match, "outport == %s && " +- REG_NEXT_HOP_IPV4 " == %s", +- peer->json_key, vip); +- +- const char *arp_actions = +- "eth.dst = 00:00:00:00:00:00; next;"; +- ovn_lflow_add_with_hint(lflows, peer->od, +- S_ROUTER_IN_ARP_RESOLVE, 100, +- ds_cstr(match), +- arp_actions, +- &op->nbsp->header_); +- break; ++ if (add_chassis_resident_check) { ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ op->od->l3redirect_port->json_key); + } + } +- } else { +- struct ovn_port *vp = +- ovn_port_find(ports, op->sb->virtual_parent); +- if (!vp || !vp->nbsp) { +- return; +- } + +- for (size_t i = 0; i < vp->n_lsp_addrs; i++) { +- bool found_vip_network = false; +- const char *ea_s = vp->lsp_addrs[i].ea_s; +- for (size_t j = 0; j < vp->od->n_router_ports; j++) { +- /* Get the Logical_Router_Port that the +- * Logical_Switch_Port is connected to, as +- * 'peer'. */ +- const char *peer_name = smap_get( +- &vp->od->router_ports[j]->nbsp->options, +- "router-port"); +- if (!peer_name) { +- continue; +- } ++ build_lrouter_arp_flow(op->od, op, ++ op->lrp_networks.ipv4_addrs[i].addr_s, ++ REG_INPORT_ETH_ADDR, match, false, 90, ++ &op->nbrp->header_, lflows); ++ } + +- struct ovn_port *peer = +- ovn_port_find(ports, peer_name); +- if (!peer || !peer->nbrp) { +- continue; +- } ++ /* A set to hold all load-balancer vips that need ARP responses. */ ++ struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4); ++ struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6); ++ get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6); + +- if (!find_lrp_member_ip(peer, vip)) { +- continue; +- } +- +- ds_clear(match); +- ds_put_format(match, "outport == %s && " +- REG_NEXT_HOP_IPV4 " == %s", +- peer->json_key, vip); ++ const char *ip_address; ++ SSET_FOR_EACH (ip_address, &all_ips_v4) { ++ ds_clear(match); ++ if (op == op->od->l3dgw_port) { ++ ds_put_format(match, "is_chassis_resident(%s)", ++ op->od->l3redirect_port->json_key); ++ } + +- ds_clear(actions); +- ds_put_format(actions, "eth.dst = %s; next;", ea_s); +- ovn_lflow_add_with_hint(lflows, peer->od, +- S_ROUTER_IN_ARP_RESOLVE, 100, +- ds_cstr(match), +- ds_cstr(actions), +- &op->nbsp->header_); +- found_vip_network = true; +- break; +- } ++ build_lrouter_arp_flow(op->od, op, ++ ip_address, REG_INPORT_ETH_ADDR, ++ match, false, 90, NULL, lflows); ++ } + +- if (found_vip_network) { +- break; +- } ++ SSET_FOR_EACH (ip_address, &all_ips_v6) { ++ ds_clear(match); ++ if (op == op->od->l3dgw_port) { ++ ds_put_format(match, "is_chassis_resident(%s)", ++ op->od->l3redirect_port->json_key); + } ++ ++ build_lrouter_nd_flow(op->od, op, "nd_na", ++ ip_address, NULL, REG_INPORT_ETH_ADDR, ++ match, false, 90, NULL, lflows); + } +- } else if (lsp_is_router(op->nbsp)) { +- /* This is a logical switch port that connects to a router. */ + +- /* The peer of this switch port is the router port for which +- * we need to add logical flows such that it can resolve +- * ARP entries for all the other router ports connected to +- * the switch in question. */ ++ sset_destroy(&all_ips_v4); ++ sset_destroy(&all_ips_v6); + +- const char *peer_name = smap_get(&op->nbsp->options, +- "router-port"); +- if (!peer_name) { +- return; +- } ++ if (!smap_get(&op->od->nbr->options, "chassis") ++ && !op->od->l3dgw_port) { ++ /* UDP/TCP port unreachable. */ ++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { ++ ds_clear(match); ++ ds_put_format(match, ++ "ip4 && ip4.dst == %s && !ip.later_frag && udp", ++ op->lrp_networks.ipv4_addrs[i].addr_s); ++ const char *action = "icmp4 {" ++ "eth.dst <-> eth.src; " ++ "ip4.dst <-> ip4.src; " ++ "ip.ttl = 255; " ++ "icmp4.type = 3; " ++ "icmp4.code = 3; " ++ "next; };"; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, ++ 80, ds_cstr(match), action, ++ &op->nbrp->header_); + +- struct ovn_port *peer = ovn_port_find(ports, peer_name); +- if (!peer || !peer->nbrp) { +- return; ++ ds_clear(match); ++ ds_put_format(match, ++ "ip4 && ip4.dst == %s && !ip.later_frag && tcp", ++ op->lrp_networks.ipv4_addrs[i].addr_s); ++ action = "tcp_reset {" ++ "eth.dst <-> eth.src; " ++ "ip4.dst <-> ip4.src; " ++ "next; };"; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, ++ 80, ds_cstr(match), action, ++ &op->nbrp->header_); ++ ++ ds_clear(match); ++ ds_put_format(match, ++ "ip4 && ip4.dst == %s && !ip.later_frag", ++ op->lrp_networks.ipv4_addrs[i].addr_s); ++ action = "icmp4 {" ++ "eth.dst <-> eth.src; " ++ "ip4.dst <-> ip4.src; " ++ "ip.ttl = 255; " ++ "icmp4.type = 3; " ++ "icmp4.code = 2; " ++ "next; };"; ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, ++ 70, ds_cstr(match), action, ++ &op->nbrp->header_); ++ } + } + +- if (peer->od->nbr && +- smap_get_bool(&peer->od->nbr->options, +- "dynamic_neigh_routers", false)) { ++ /* Drop IP traffic destined to router owned IPs except if the IP is ++ * also a SNAT IP. Those are dropped later, in stage ++ * "lr_in_arp_resolve", if unSNAT was unsuccessful. ++ * ++ * Priority 60. ++ */ ++ build_lrouter_drop_own_dest(op, S_ROUTER_IN_IP_INPUT, 60, false, ++ lflows); ++ ++ /* ARP / ND handling for external IP addresses. ++ * ++ * DNAT and SNAT IP addresses are external IP addresses that need ARP ++ * handling. ++ * ++ * These are already taken care globally, per router. The only ++ * exception is on the l3dgw_port where we might need to use a ++ * different ETH address. ++ */ ++ if (op != op->od->l3dgw_port) { + return; + } + +- for (size_t i = 0; i < op->od->n_router_ports; i++) { +- const char *router_port_name = smap_get( +- &op->od->router_ports[i]->nbsp->options, +- "router-port"); +- struct ovn_port *router_port = ovn_port_find(ports, +- router_port_name); +- if (!router_port || !router_port->nbrp) { ++ for (size_t i = 0; i < op->od->nbr->n_nat; i++) { ++ struct ovn_nat *nat_entry = &op->od->nat_entries[i]; ++ ++ /* Skip entries we failed to parse. */ ++ if (!nat_entry_is_valid(nat_entry)) { + continue; + } + +- /* Skip the router port under consideration. */ +- if (router_port == peer) { +- continue; ++ /* Skip SNAT entries for now, we handle unique SNAT IPs separately ++ * below. ++ */ ++ if (!strcmp(nat_entry->nb->type, "snat")) { ++ continue; + } ++ build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows); ++ } + +- if (router_port->lrp_networks.n_ipv4_addrs) { +- ds_clear(match); +- ds_put_format(match, "outport == %s && " +- REG_NEXT_HOP_IPV4 " == ", +- peer->json_key); +- op_put_v4_networks(match, router_port, false); ++ /* Now handle SNAT entries too, one per unique SNAT IP. */ ++ struct shash_node *snat_snode; ++ SHASH_FOR_EACH (snat_snode, &op->od->snat_ips) { ++ struct ovn_snat_ip *snat_ip = snat_snode->data; + +- ds_clear(actions); +- ds_put_format(actions, "eth.dst = %s; next;", +- router_port->lrp_networks.ea_s); +- ovn_lflow_add_with_hint(lflows, peer->od, +- S_ROUTER_IN_ARP_RESOLVE, 100, +- ds_cstr(match), ds_cstr(actions), +- &op->nbsp->header_); ++ if (ovs_list_is_empty(&snat_ip->snat_entries)) { ++ continue; + } + +- if (router_port->lrp_networks.n_ipv6_addrs) { +- ds_clear(match); +- ds_put_format(match, "outport == %s && " +- REG_NEXT_HOP_IPV6 " == ", +- peer->json_key); +- op_put_v6_networks(match, router_port); +- +- ds_clear(actions); +- ds_put_format(actions, "eth.dst = %s; next;", +- router_port->lrp_networks.ea_s); +- ovn_lflow_add_with_hint(lflows, peer->od, +- S_ROUTER_IN_ARP_RESOLVE, 100, +- ds_cstr(match), ds_cstr(actions), +- &op->nbsp->header_); +- } ++ struct ovn_nat *nat_entry = ++ CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries), ++ struct ovn_nat, ext_addr_list_node); ++ build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows); + } + } +- + } + +-/* Local router ingress table CHK_PKT_LEN: Check packet length. +- * +- * Any IPv4 packet with outport set to the distributed gateway +- * router port, check the packet length and store the result in the +- * 'REGBIT_PKT_LARGER' register bit. +- * +- * Local router ingress table LARGER_PKTS: Handle larger packets. +- * +- * Any IPv4 packet with outport set to the distributed gateway +- * router port and the 'REGBIT_PKT_LARGER' register bit is set, +- * generate ICMPv4 packet with type 3 (Destination Unreachable) and +- * code 4 (Fragmentation needed). +- * */ ++/* NAT, Defrag and load balancing. */ + static void +-build_check_pkt_len_flows_for_lrouter( +- struct ovn_datapath *od, struct hmap *lflows, +- struct hmap *ports, +- struct ds *match, struct ds *actions) ++build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, ++ struct hmap *lflows, ++ struct shash *meter_groups, ++ struct hmap *lbs, ++ struct ds *match, struct ds *actions) + { + if (od->nbr) { + + /* Packets are allowed by default. */ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 0, "1", +- "next;"); +- ovn_lflow_add(lflows, od, S_ROUTER_IN_LARGER_PKTS, 0, "1", +- "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;"); + +- if (od->l3dgw_port && od->l3redirect_port) { +- int gw_mtu = 0; +- if (od->l3dgw_port->nbrp) { +- gw_mtu = smap_get_int(&od->l3dgw_port->nbrp->options, +- "gateway_mtu", 0); +- } +- /* Add the flows only if gateway_mtu is configured. */ +- if (gw_mtu <= 0) { +- return; +- } ++ /* Send the IPv6 NS packets to next table. When ovn-controller ++ * generates IPv6 NS (for the action - nd_ns{}), the injected ++ * packet would go through conntrack - which is not required. */ ++ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, "nd_ns", "next;"); + +- ds_clear(match); +- ds_put_format(match, "outport == %s", od->l3dgw_port->json_key); ++ /* NAT rules are only valid on Gateway routers and routers with ++ * l3dgw_port (router has a port with gateway chassis ++ * specified). */ ++ if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { ++ return; ++ } + +- ds_clear(actions); +- ds_put_format(actions, +- REGBIT_PKT_LARGER" = check_pkt_larger(%d);" +- " next;", gw_mtu + VLAN_ETH_HEADER_LEN); +- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 50, +- ds_cstr(match), ds_cstr(actions), +- &od->l3dgw_port->nbrp->header_); ++ struct sset nat_entries = SSET_INITIALIZER(&nat_entries); + +- for (size_t i = 0; i < od->nbr->n_ports; i++) { +- struct ovn_port *rp = ovn_port_find(ports, +- od->nbr->ports[i]->name); +- if (!rp || rp == od->l3dgw_port) { +- continue; +- } ++ bool dnat_force_snat_ip = ++ !lport_addresses_is_empty(&od->dnat_force_snat_addrs); ++ bool lb_force_snat_ip = ++ !lport_addresses_is_empty(&od->lb_force_snat_addrs); + +- if (rp->lrp_networks.ipv4_addrs) { +- ds_clear(match); +- ds_put_format(match, "inport == %s && outport == %s" +- " && ip4 && "REGBIT_PKT_LARGER, +- rp->json_key, od->l3dgw_port->json_key); ++ for (int i = 0; i < od->nbr->n_nat; i++) { ++ const struct nbrec_nat *nat; + +- ds_clear(actions); +- /* Set icmp4.frag_mtu to gw_mtu */ +- ds_put_format(actions, +- "icmp4_error {" +- REGBIT_EGRESS_LOOPBACK" = 1; " +- "eth.dst = %s; " +- "ip4.dst = ip4.src; " +- "ip4.src = %s; " +- "ip.ttl = 255; " +- "icmp4.type = 3; /* Destination Unreachable. */ " +- "icmp4.code = 4; /* Frag Needed and DF was Set. */ " +- "icmp4.frag_mtu = %d; " +- "next(pipeline=ingress, table=%d); };", +- rp->lrp_networks.ea_s, +- rp->lrp_networks.ipv4_addrs[0].addr_s, +- gw_mtu, +- ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); +- ovn_lflow_add_with_hint(lflows, od, +- S_ROUTER_IN_LARGER_PKTS, 50, +- ds_cstr(match), ds_cstr(actions), +- &rp->nbrp->header_); +- } ++ nat = od->nbr->nat[i]; + +- if (rp->lrp_networks.ipv6_addrs) { +- ds_clear(match); +- ds_put_format(match, "inport == %s && outport == %s" +- " && ip6 && "REGBIT_PKT_LARGER, +- rp->json_key, od->l3dgw_port->json_key); ++ ovs_be32 ip, mask; ++ struct in6_addr ipv6, mask_v6, v6_exact = IN6ADDR_EXACT_INIT; ++ bool is_v6 = false; ++ bool stateless = lrouter_nat_is_stateless(nat); ++ struct nbrec_address_set *allowed_ext_ips = ++ nat->allowed_ext_ips; ++ struct nbrec_address_set *exempted_ext_ips = ++ nat->exempted_ext_ips; + +- ds_clear(actions); +- /* Set icmp6.frag_mtu to gw_mtu */ +- ds_put_format(actions, +- "icmp6_error {" +- REGBIT_EGRESS_LOOPBACK" = 1; " +- "eth.dst = %s; " +- "ip6.dst = ip6.src; " +- "ip6.src = %s; " +- "ip.ttl = 255; " +- "icmp6.type = 2; /* Packet Too Big. */ " +- "icmp6.code = 0; " +- "icmp6.frag_mtu = %d; " +- "next(pipeline=ingress, table=%d); };", +- rp->lrp_networks.ea_s, +- rp->lrp_networks.ipv6_addrs[0].addr_s, +- gw_mtu, +- ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); +- ovn_lflow_add_with_hint(lflows, od, +- S_ROUTER_IN_LARGER_PKTS, 50, +- ds_cstr(match), ds_cstr(actions), +- &rp->nbrp->header_); +- } ++ if (allowed_ext_ips && exempted_ext_ips) { ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); ++ VLOG_WARN_RL(&rl, "NAT rule: "UUID_FMT" not applied, since " ++ "both allowed and exempt external ips set", ++ UUID_ARGS(&(nat->header_.uuid))); ++ continue; + } +- } +- } +-} + +-/* Logical router ingress table GW_REDIRECT: Gateway redirect. +- * +- * For traffic with outport equal to the l3dgw_port +- * on a distributed router, this table redirects a subset +- * of the traffic to the l3redirect_port which represents +- * the central instance of the l3dgw_port. +- */ +-static void +-build_gateway_redirect_flows_for_lrouter( +- struct ovn_datapath *od, struct hmap *lflows, +- struct ds *match, struct ds *actions) +-{ +- if (od->nbr) { +- if (od->l3dgw_port && od->l3redirect_port) { +- const struct ovsdb_idl_row *stage_hint = NULL; ++ char *error = ip_parse_masked(nat->external_ip, &ip, &mask); ++ if (error || mask != OVS_BE32_MAX) { ++ free(error); ++ error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6); ++ if (error || memcmp(&mask_v6, &v6_exact, sizeof(mask_v6))) { ++ /* Invalid for both IPv4 and IPv6 */ ++ static struct vlog_rate_limit rl = ++ VLOG_RATE_LIMIT_INIT(5, 1); ++ VLOG_WARN_RL(&rl, "bad external ip %s for nat", ++ nat->external_ip); ++ free(error); ++ continue; ++ } ++ /* It was an invalid IPv4 address, but valid IPv6. ++ * Treat the rest of the handling of this NAT rule ++ * as IPv6. */ ++ is_v6 = true; ++ } + +- if (od->l3dgw_port->nbrp) { +- stage_hint = &od->l3dgw_port->nbrp->header_; ++ /* Check the validity of nat->logical_ip. 'logical_ip' can ++ * be a subnet when the type is "snat". */ ++ int cidr_bits; ++ if (is_v6) { ++ error = ipv6_parse_masked(nat->logical_ip, &ipv6, &mask_v6); ++ cidr_bits = ipv6_count_cidr_bits(&mask_v6); ++ } else { ++ error = ip_parse_masked(nat->logical_ip, &ip, &mask); ++ cidr_bits = ip_count_cidr_bits(mask); ++ } ++ if (!strcmp(nat->type, "snat")) { ++ if (error) { ++ /* Invalid for both IPv4 and IPv6 */ ++ static struct vlog_rate_limit rl = ++ VLOG_RATE_LIMIT_INIT(5, 1); ++ VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat " ++ "in router "UUID_FMT"", ++ nat->logical_ip, UUID_ARGS(&od->key)); ++ free(error); ++ continue; ++ } ++ } else { ++ if (error || (!is_v6 && mask != OVS_BE32_MAX) ++ || (is_v6 && memcmp(&mask_v6, &v6_exact, ++ sizeof mask_v6))) { ++ /* Invalid for both IPv4 and IPv6 */ ++ static struct vlog_rate_limit rl = ++ VLOG_RATE_LIMIT_INIT(5, 1); ++ VLOG_WARN_RL(&rl, "bad ip %s for dnat in router " ++ ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key)); ++ free(error); ++ continue; ++ } + } + +- /* For traffic with outport == l3dgw_port, if the +- * packet did not match any higher priority redirect +- * rule, then the traffic is redirected to the central +- * instance of the l3dgw_port. */ +- ds_clear(match); +- ds_put_format(match, "outport == %s", +- od->l3dgw_port->json_key); +- ds_clear(actions); +- ds_put_format(actions, "outport = %s; next;", +- od->l3redirect_port->json_key); +- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50, +- ds_cstr(match), ds_cstr(actions), +- stage_hint); +- } ++ /* For distributed router NAT, determine whether this NAT rule ++ * satisfies the conditions for distributed NAT processing. */ ++ bool distributed = false; ++ struct eth_addr mac; ++ if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") && ++ nat->logical_port && nat->external_mac) { ++ if (eth_addr_from_string(nat->external_mac, &mac)) { ++ distributed = true; ++ } else { ++ static struct vlog_rate_limit rl = ++ VLOG_RATE_LIMIT_INIT(5, 1); ++ VLOG_WARN_RL(&rl, "bad mac %s for dnat in router " ++ ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key)); ++ continue; ++ } ++ } + +- /* Packets are allowed by default. */ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;"); +- } +-} ++ /* Ingress UNSNAT table: It is for already established connections' ++ * reverse traffic. i.e., SNAT has already been done in egress ++ * pipeline and now the packet has entered the ingress pipeline as ++ * part of a reply. We undo the SNAT here. ++ * ++ * Undoing SNAT has to happen before DNAT processing. This is ++ * because when the packet was DNATed in ingress pipeline, it did ++ * not know about the possibility of eventual additional SNAT in ++ * egress pipeline. */ ++ if (!strcmp(nat->type, "snat") ++ || !strcmp(nat->type, "dnat_and_snat")) { ++ if (!od->l3dgw_port) { ++ /* Gateway router. */ ++ ds_clear(match); ++ ds_clear(actions); ++ ds_put_format(match, "ip && ip%s.dst == %s", ++ is_v6 ? "6" : "4", ++ nat->external_ip); ++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { ++ ds_put_format(actions, "ip%s.dst=%s; next;", ++ is_v6 ? "6" : "4", nat->logical_ip); ++ } else { ++ ds_put_cstr(actions, "ct_snat;"); ++ } + +-/* Local router ingress table ARP_REQUEST: ARP request. +- * +- * In the common case where the Ethernet destination has been resolved, +- * this table outputs the packet (priority 0). Otherwise, it composes +- * and sends an ARP/IPv6 NA request (priority 100). */ +-static void +-build_arp_request_flows_for_lrouter( +- struct ovn_datapath *od, struct hmap *lflows, +- struct ds *match, struct ds *actions) +-{ +- if (od->nbr) { +- for (int i = 0; i < od->nbr->n_static_routes; i++) { +- const struct nbrec_logical_router_static_route *route; ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, ++ 90, ds_cstr(match), ++ ds_cstr(actions), ++ &nat->header_); ++ } else { ++ /* Distributed router. */ + +- route = od->nbr->static_routes[i]; +- struct in6_addr gw_ip6; +- unsigned int plen; +- char *error = ipv6_parse_cidr(route->nexthop, &gw_ip6, &plen); +- if (error || plen != 128) { +- free(error); +- continue; +- } ++ /* Traffic received on l3dgw_port is subject to NAT. */ ++ ds_clear(match); ++ ds_clear(actions); ++ ds_put_format(match, "ip && ip%s.dst == %s" ++ " && inport == %s", ++ is_v6 ? "6" : "4", ++ nat->external_ip, ++ od->l3dgw_port->json_key); ++ if (!distributed && od->l3redirect_port) { ++ /* Flows for NAT rules that are centralized are only ++ * programmed on the gateway chassis. */ ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ od->l3redirect_port->json_key); ++ } + +- ds_clear(match); +- ds_put_format(match, "eth.dst == 00:00:00:00:00:00 && " +- "ip6 && " REG_NEXT_HOP_IPV6 " == %s", +- route->nexthop); +- struct in6_addr sn_addr; +- struct eth_addr eth_dst; +- in6_addr_solicited_node(&sn_addr, &gw_ip6); +- ipv6_multicast_to_ethernet(ð_dst, &sn_addr); ++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { ++ ds_put_format(actions, "ip%s.dst=%s; next;", ++ is_v6 ? "6" : "4", nat->logical_ip); ++ } else { ++ ds_put_cstr(actions, "ct_snat;"); ++ } + +- char sn_addr_s[INET6_ADDRSTRLEN + 1]; +- ipv6_string_mapped(sn_addr_s, &sn_addr); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, ++ 100, ++ ds_cstr(match), ds_cstr(actions), ++ &nat->header_); ++ } ++ } + +- ds_clear(actions); +- ds_put_format(actions, +- "nd_ns { " +- "eth.dst = "ETH_ADDR_FMT"; " +- "ip6.dst = %s; " +- "nd.target = %s; " +- "output; " +- "};", ETH_ADDR_ARGS(eth_dst), sn_addr_s, +- route->nexthop); +- +- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ARP_REQUEST, 200, +- ds_cstr(match), ds_cstr(actions), +- &route->header_); +- } +- +- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100, +- "eth.dst == 00:00:00:00:00:00 && ip4", +- "arp { " +- "eth.dst = ff:ff:ff:ff:ff:ff; " +- "arp.spa = " REG_SRC_IPV4 "; " +- "arp.tpa = " REG_NEXT_HOP_IPV4 "; " +- "arp.op = 1; " /* ARP request */ +- "output; " +- "};"); +- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100, +- "eth.dst == 00:00:00:00:00:00 && ip6", +- "nd_ns { " +- "nd.target = " REG_NEXT_HOP_IPV6 "; " +- "output; " +- "};"); +- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;"); +- } +-} +- +-/* Logical router egress table DELIVERY: Delivery (priority 100-110). +- * +- * Priority 100 rules deliver packets to enabled logical ports. +- * Priority 110 rules match multicast packets and update the source +- * mac before delivering to enabled logical ports. IP multicast traffic +- * bypasses S_ROUTER_IN_IP_ROUTING route lookups. +- */ +-static void +-build_egress_delivery_flows_for_lrouter_port( +- struct ovn_port *op, struct hmap *lflows, +- struct ds *match, struct ds *actions) +-{ +- if (op->nbrp) { +- if (!lrport_is_enabled(op->nbrp)) { +- /* Drop packets to disabled logical ports (since logical flow +- * tables are default-drop). */ +- return; +- } +- +- if (op->derived) { +- /* No egress packets should be processed in the context of +- * a chassisredirect port. The chassisredirect port should +- * be replaced by the l3dgw port in the local output +- * pipeline stage before egress processing. */ +- return; +- } +- +- /* If multicast relay is enabled then also adjust source mac for IP +- * multicast traffic. +- */ +- if (op->od->mcast_info.rtr.relay) { +- ds_clear(match); +- ds_clear(actions); +- ds_put_format(match, "(ip4.mcast || ip6.mcast) && outport == %s", +- op->json_key); +- ds_put_format(actions, "eth.src = %s; output;", +- op->lrp_networks.ea_s); +- ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 110, +- ds_cstr(match), ds_cstr(actions)); +- } +- +- ds_clear(match); +- ds_put_format(match, "outport == %s", op->json_key); +- ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100, +- ds_cstr(match), "output;"); +- } +- +-} +- +-static void +-build_misc_local_traffic_drop_flows_for_lrouter( +- struct ovn_datapath *od, struct hmap *lflows) +-{ +- if (od->nbr) { +- /* L3 admission control: drop multicast and broadcast source, localhost +- * source or destination, and zero network source or destination +- * (priority 100). */ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100, +- "ip4.src_mcast ||" +- "ip4.src == 255.255.255.255 || " +- "ip4.src == 127.0.0.0/8 || " +- "ip4.dst == 127.0.0.0/8 || " +- "ip4.src == 0.0.0.0/8 || " +- "ip4.dst == 0.0.0.0/8", +- "drop;"); +- +- /* Drop ARP packets (priority 85). ARP request packets for router's own +- * IPs are handled with priority-90 flows. +- * Drop IPv6 ND packets (priority 85). ND NA packets for router's own +- * IPs are handled with priority-90 flows. +- */ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 85, +- "arp || nd", "drop;"); +- +- /* Allow IPv6 multicast traffic that's supposed to reach the +- * router pipeline (e.g., router solicitations). +- */ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 84, "nd_rs || nd_ra", +- "next;"); +- +- /* Drop other reserved multicast. */ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 83, +- "ip6.mcast_rsvd", "drop;"); +- +- /* Allow other multicast if relay enabled (priority 82). */ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 82, +- "ip4.mcast || ip6.mcast", +- od->mcast_info.rtr.relay ? "next;" : "drop;"); +- +- /* Drop Ethernet local broadcast. By definition this traffic should +- * not be forwarded.*/ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50, +- "eth.bcast", "drop;"); ++ /* Ingress DNAT table: Packets enter the pipeline with destination ++ * IP address that needs to be DNATted from a external IP address ++ * to a logical IP address. */ ++ if (!strcmp(nat->type, "dnat") ++ || !strcmp(nat->type, "dnat_and_snat")) { ++ if (!od->l3dgw_port) { ++ /* Gateway router. */ ++ /* Packet when it goes from the initiator to destination. ++ * We need to set flags.loopback because the router can ++ * send the packet back through the same interface. */ ++ ds_clear(match); ++ ds_put_format(match, "ip && ip%s.dst == %s", ++ is_v6 ? "6" : "4", ++ nat->external_ip); ++ ds_clear(actions); ++ if (allowed_ext_ips || exempted_ext_ips) { ++ lrouter_nat_add_ext_ip_match(od, lflows, match, nat, ++ is_v6, true, mask); ++ } + +- /* TTL discard */ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, +- "ip4 && ip.ttl == {0, 1}", "drop;"); ++ if (dnat_force_snat_ip) { ++ /* Indicate to the future tables that a DNAT has taken ++ * place and a force SNAT needs to be done in the ++ * Egress SNAT table. */ ++ ds_put_format(actions, ++ "flags.force_snat_for_dnat = 1; "); ++ } + +- /* Pass other traffic not already handled to the next table for +- * routing. */ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;"); +- } +-} ++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { ++ ds_put_format(actions, "flags.loopback = 1; " ++ "ip%s.dst=%s; next;", ++ is_v6 ? "6" : "4", nat->logical_ip); ++ } else { ++ ds_put_format(actions, "flags.loopback = 1; " ++ "ct_dnat(%s", nat->logical_ip); + +-static void +-build_dhcpv6_reply_flows_for_lrouter_port( +- struct ovn_port *op, struct hmap *lflows, +- struct ds *match) +-{ +- if (op->nbrp && (!op->derived)) { +- for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { +- ds_clear(match); +- ds_put_format(match, "ip6.dst == %s && udp.src == 547 &&" +- " udp.dst == 546", +- op->lrp_networks.ipv6_addrs[i].addr_s); +- ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, +- ds_cstr(match), +- "reg0 = 0; handle_dhcpv6_reply;"); +- } +- } ++ if (nat->external_port_range[0]) { ++ ds_put_format(actions, ",%s", ++ nat->external_port_range); ++ } ++ ds_put_format(actions, ");"); ++ } + +-} ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100, ++ ds_cstr(match), ds_cstr(actions), ++ &nat->header_); ++ } else { ++ /* Distributed router. */ + +-static void +-build_ipv6_input_flows_for_lrouter_port( +- struct ovn_port *op, struct hmap *lflows, +- struct ds *match, struct ds *actions) +-{ +- if (op->nbrp && (!op->derived)) { +- /* No ingress packets are accepted on a chassisredirect +- * port, so no need to program flows for that port. */ +- if (op->lrp_networks.n_ipv6_addrs) { +- /* ICMPv6 echo reply. These flows reply to echo requests +- * received for the router's IP address. */ +- ds_clear(match); +- ds_put_cstr(match, "ip6.dst == "); +- op_put_v6_networks(match, op); +- ds_put_cstr(match, " && icmp6.type == 128 && icmp6.code == 0"); ++ /* Traffic received on l3dgw_port is subject to NAT. */ ++ ds_clear(match); ++ ds_put_format(match, "ip && ip%s.dst == %s" ++ " && inport == %s", ++ is_v6 ? "6" : "4", ++ nat->external_ip, ++ od->l3dgw_port->json_key); ++ if (!distributed && od->l3redirect_port) { ++ /* Flows for NAT rules that are centralized are only ++ * programmed on the gateway chassis. */ ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ od->l3redirect_port->json_key); ++ } ++ ds_clear(actions); ++ if (allowed_ext_ips || exempted_ext_ips) { ++ lrouter_nat_add_ext_ip_match(od, lflows, match, nat, ++ is_v6, true, mask); ++ } + +- const char *lrp_actions = +- "ip6.dst <-> ip6.src; " +- "ip.ttl = 255; " +- "icmp6.type = 129; " +- "flags.loopback = 1; " +- "next; "; +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, +- ds_cstr(match), lrp_actions, +- &op->nbrp->header_); +- } ++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { ++ ds_put_format(actions, "ip%s.dst=%s; next;", ++ is_v6 ? "6" : "4", nat->logical_ip); ++ } else { ++ ds_put_format(actions, "ct_dnat(%s", nat->logical_ip); ++ if (nat->external_port_range[0]) { ++ ds_put_format(actions, ",%s", ++ nat->external_port_range); ++ } ++ ds_put_format(actions, ");"); ++ } + +- /* ND reply. These flows reply to ND solicitations for the +- * router's own IP address. */ +- for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { +- ds_clear(match); +- if (op->od->l3dgw_port && op == op->od->l3dgw_port +- && op->od->l3redirect_port) { +- /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s +- * should only be sent from the gateway chassi, so that +- * upstream MAC learning points to the gateway chassis. +- * Also need to avoid generation of multiple ND replies +- * from different chassis. */ +- ds_put_format(match, "is_chassis_resident(%s)", +- op->od->l3redirect_port->json_key); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100, ++ ds_cstr(match), ds_cstr(actions), ++ &nat->header_); ++ } + } + +- build_lrouter_nd_flow(op->od, op, "nd_na_router", +- op->lrp_networks.ipv6_addrs[i].addr_s, +- op->lrp_networks.ipv6_addrs[i].sn_addr_s, +- REG_INPORT_ETH_ADDR, match, false, 90, +- &op->nbrp->header_, lflows); +- } +- +- /* UDP/TCP port unreachable */ +- if (!smap_get(&op->od->nbr->options, "chassis") +- && !op->od->l3dgw_port) { +- for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { +- ds_clear(match); +- ds_put_format(match, +- "ip6 && ip6.dst == %s && !ip.later_frag && tcp", +- op->lrp_networks.ipv6_addrs[i].addr_s); +- const char *action = "tcp_reset {" +- "eth.dst <-> eth.src; " +- "ip6.dst <-> ip6.src; " +- "next; };"; +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, +- 80, ds_cstr(match), action, +- &op->nbrp->header_); +- +- ds_clear(match); +- ds_put_format(match, +- "ip6 && ip6.dst == %s && !ip.later_frag && udp", +- op->lrp_networks.ipv6_addrs[i].addr_s); +- action = "icmp6 {" +- "eth.dst <-> eth.src; " +- "ip6.dst <-> ip6.src; " +- "ip.ttl = 255; " +- "icmp6.type = 1; " +- "icmp6.code = 4; " +- "next; };"; +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, +- 80, ds_cstr(match), action, +- &op->nbrp->header_); +- +- ds_clear(match); +- ds_put_format(match, +- "ip6 && ip6.dst == %s && !ip.later_frag", +- op->lrp_networks.ipv6_addrs[i].addr_s); +- action = "icmp6 {" +- "eth.dst <-> eth.src; " +- "ip6.dst <-> ip6.src; " +- "ip.ttl = 255; " +- "icmp6.type = 1; " +- "icmp6.code = 3; " +- "next; };"; +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, +- 70, ds_cstr(match), action, +- &op->nbrp->header_); +- } +- } ++ /* ARP resolve for NAT IPs. */ ++ if (od->l3dgw_port) { ++ if (!strcmp(nat->type, "snat")) { ++ ds_clear(match); ++ ds_put_format( ++ match, "inport == %s && %s == %s", ++ od->l3dgw_port->json_key, ++ is_v6 ? "ip6.src" : "ip4.src", nat->external_ip); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT, ++ 120, ds_cstr(match), "next;", ++ &nat->header_); ++ } + +- /* ICMPv6 time exceeded */ +- for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { +- /* skip link-local address */ +- if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) { +- continue; ++ if (!sset_contains(&nat_entries, nat->external_ip)) { ++ ds_clear(match); ++ ds_put_format( ++ match, "outport == %s && %s == %s", ++ od->l3dgw_port->json_key, ++ is_v6 ? REG_NEXT_HOP_IPV6 : REG_NEXT_HOP_IPV4, ++ nat->external_ip); ++ ds_clear(actions); ++ ds_put_format( ++ actions, "eth.dst = %s; next;", ++ distributed ? nat->external_mac : ++ od->l3dgw_port->lrp_networks.ea_s); ++ ovn_lflow_add_with_hint(lflows, od, ++ S_ROUTER_IN_ARP_RESOLVE, ++ 100, ds_cstr(match), ++ ds_cstr(actions), ++ &nat->header_); ++ sset_add(&nat_entries, nat->external_ip); ++ } ++ } else { ++ /* Add the NAT external_ip to the nat_entries even for ++ * gateway routers. This is required for adding load balancer ++ * flows.*/ ++ sset_add(&nat_entries, nat->external_ip); + } + +- ds_clear(match); +- ds_clear(actions); +- +- ds_put_format(match, +- "inport == %s && ip6 && " +- "ip6.src == %s/%d && " +- "ip.ttl == {0, 1} && !ip.later_frag", +- op->json_key, +- op->lrp_networks.ipv6_addrs[i].network_s, +- op->lrp_networks.ipv6_addrs[i].plen); +- ds_put_format(actions, +- "icmp6 {" +- "eth.dst <-> eth.src; " +- "ip6.dst = ip6.src; " +- "ip6.src = %s; " +- "ip.ttl = 255; " +- "icmp6.type = 3; /* Time exceeded */ " +- "icmp6.code = 0; /* TTL exceeded in transit */ " +- "next; };", +- op->lrp_networks.ipv6_addrs[i].addr_s); +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40, +- ds_cstr(match), ds_cstr(actions), +- &op->nbrp->header_); +- } +- } ++ /* Egress UNDNAT table: It is for already established connections' ++ * reverse traffic. i.e., DNAT has already been done in ingress ++ * pipeline and now the packet has entered the egress pipeline as ++ * part of a reply. We undo the DNAT here. ++ * ++ * Note that this only applies for NAT on a distributed router. ++ * Undo DNAT on a gateway router is done in the ingress DNAT ++ * pipeline stage. */ ++ if (od->l3dgw_port && (!strcmp(nat->type, "dnat") ++ || !strcmp(nat->type, "dnat_and_snat"))) { ++ ds_clear(match); ++ ds_put_format(match, "ip && ip%s.src == %s" ++ " && outport == %s", ++ is_v6 ? "6" : "4", ++ nat->logical_ip, ++ od->l3dgw_port->json_key); ++ if (!distributed && od->l3redirect_port) { ++ /* Flows for NAT rules that are centralized are only ++ * programmed on the gateway chassis. */ ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ od->l3redirect_port->json_key); ++ } ++ ds_clear(actions); ++ if (distributed) { ++ ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ", ++ ETH_ADDR_ARGS(mac)); ++ } + +-} ++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { ++ ds_put_format(actions, "ip%s.src=%s; next;", ++ is_v6 ? "6" : "4", nat->external_ip); ++ } else { ++ ds_put_format(actions, "ct_dnat;"); ++ } + +-static void +-build_lrouter_arp_nd_for_datapath(struct ovn_datapath *od, +- struct hmap *lflows) +-{ +- if (od->nbr) { ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 100, ++ ds_cstr(match), ds_cstr(actions), ++ &nat->header_); ++ } + +- /* Priority-90-92 flows handle ARP requests and ND packets. Most are +- * per logical port but DNAT addresses can be handled per datapath +- * for non gateway router ports. +- * +- * Priority 91 and 92 flows are added for each gateway router +- * port to handle the special cases. In case we get the packet +- * on a regular port, just reply with the port's ETH address. +- */ +- for (int i = 0; i < od->nbr->n_nat; i++) { +- struct ovn_nat *nat_entry = &od->nat_entries[i]; ++ /* Egress SNAT table: Packets enter the egress pipeline with ++ * source ip address that needs to be SNATted to a external ip ++ * address. */ ++ if (!strcmp(nat->type, "snat") ++ || !strcmp(nat->type, "dnat_and_snat")) { ++ if (!od->l3dgw_port) { ++ /* Gateway router. */ ++ ds_clear(match); ++ ds_put_format(match, "ip && ip%s.src == %s", ++ is_v6 ? "6" : "4", ++ nat->logical_ip); ++ ds_clear(actions); + +- /* Skip entries we failed to parse. */ +- if (!nat_entry_is_valid(nat_entry)) { +- continue; +- } ++ if (allowed_ext_ips || exempted_ext_ips) { ++ lrouter_nat_add_ext_ip_match(od, lflows, match, nat, ++ is_v6, false, mask); ++ } + +- /* Skip SNAT entries for now, we handle unique SNAT IPs separately +- * below. +- */ +- if (!strcmp(nat_entry->nb->type, "snat")) { +- continue; +- } +- build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows); +- } ++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { ++ ds_put_format(actions, "ip%s.src=%s; next;", ++ is_v6 ? "6" : "4", nat->external_ip); ++ } else { ++ ds_put_format(actions, "ct_snat(%s", ++ nat->external_ip); + +- /* Now handle SNAT entries too, one per unique SNAT IP. */ +- struct shash_node *snat_snode; +- SHASH_FOR_EACH (snat_snode, &od->snat_ips) { +- struct ovn_snat_ip *snat_ip = snat_snode->data; ++ if (nat->external_port_range[0]) { ++ ds_put_format(actions, ",%s", ++ nat->external_port_range); ++ } ++ ds_put_format(actions, ");"); ++ } + +- if (ovs_list_is_empty(&snat_ip->snat_entries)) { +- continue; +- } ++ /* The priority here is calculated such that the ++ * nat->logical_ip with the longest mask gets a higher ++ * priority. */ ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT, ++ cidr_bits + 1, ++ ds_cstr(match), ds_cstr(actions), ++ &nat->header_); ++ } else { ++ uint16_t priority = cidr_bits + 1; + +- struct ovn_nat *nat_entry = +- CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries), +- struct ovn_nat, ext_addr_list_node); +- build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows); +- } +- } +-} ++ /* Distributed router. */ ++ ds_clear(match); ++ ds_put_format(match, "ip && ip%s.src == %s" ++ " && outport == %s", ++ is_v6 ? "6" : "4", ++ nat->logical_ip, ++ od->l3dgw_port->json_key); ++ if (!distributed && od->l3redirect_port) { ++ /* Flows for NAT rules that are centralized are only ++ * programmed on the gateway chassis. */ ++ priority += 128; ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ od->l3redirect_port->json_key); ++ } ++ ds_clear(actions); + +-/* Logical router ingress table 3: IP Input for IPv4. */ +-static void +-build_lrouter_ipv4_ip_input(struct ovn_port *op, +- struct hmap *lflows, +- struct ds *match, struct ds *actions) +-{ +- /* No ingress packets are accepted on a chassisredirect +- * port, so no need to program flows for that port. */ +- if (op->nbrp && (!op->derived)) { +- if (op->lrp_networks.n_ipv4_addrs) { +- /* L3 admission control: drop packets that originate from an +- * IPv4 address owned by the router or a broadcast address +- * known to the router (priority 100). */ +- ds_clear(match); +- ds_put_cstr(match, "ip4.src == "); +- op_put_v4_networks(match, op, true); +- ds_put_cstr(match, " && "REGBIT_EGRESS_LOOPBACK" == 0"); +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, +- ds_cstr(match), "drop;", +- &op->nbrp->header_); ++ if (allowed_ext_ips || exempted_ext_ips) { ++ lrouter_nat_add_ext_ip_match(od, lflows, match, nat, ++ is_v6, false, mask); ++ } + +- /* ICMP echo reply. These flows reply to ICMP echo requests +- * received for the router's IP address. Since packets only +- * get here as part of the logical router datapath, the inport +- * (i.e. the incoming locally attached net) does not matter. +- * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */ +- ds_clear(match); +- ds_put_cstr(match, "ip4.dst == "); +- op_put_v4_networks(match, op, false); +- ds_put_cstr(match, " && icmp4.type == 8 && icmp4.code == 0"); ++ if (distributed) { ++ ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ", ++ ETH_ADDR_ARGS(mac)); ++ } + +- const char * icmp_actions = "ip4.dst <-> ip4.src; " +- "ip.ttl = 255; " +- "icmp4.type = 0; " +- "flags.loopback = 1; " +- "next; "; +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, +- ds_cstr(match), icmp_actions, +- &op->nbrp->header_); +- } ++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) { ++ ds_put_format(actions, "ip%s.src=%s; next;", ++ is_v6 ? "6" : "4", nat->external_ip); ++ } else { ++ ds_put_format(actions, "ct_snat(%s", ++ nat->external_ip); ++ if (nat->external_port_range[0]) { ++ ds_put_format(actions, ",%s", ++ nat->external_port_range); ++ } ++ ds_put_format(actions, ");"); ++ } + +- /* ICMP time exceeded */ +- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { +- ds_clear(match); +- ds_clear(actions); ++ /* The priority here is calculated such that the ++ * nat->logical_ip with the longest mask gets a higher ++ * priority. */ ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT, ++ priority, ds_cstr(match), ++ ds_cstr(actions), ++ &nat->header_); ++ } ++ } + +- ds_put_format(match, +- "inport == %s && ip4 && " +- "ip.ttl == {0, 1} && !ip.later_frag", op->json_key); +- ds_put_format(actions, +- "icmp4 {" +- "eth.dst <-> eth.src; " +- "icmp4.type = 11; /* Time exceeded */ " +- "icmp4.code = 0; /* TTL exceeded in transit */ " +- "ip4.dst = ip4.src; " +- "ip4.src = %s; " +- "ip.ttl = 255; " +- "next; };", +- op->lrp_networks.ipv4_addrs[i].addr_s); +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40, +- ds_cstr(match), ds_cstr(actions), +- &op->nbrp->header_); +- } ++ /* Logical router ingress table 0: ++ * For NAT on a distributed router, add rules allowing ++ * ingress traffic with eth.dst matching nat->external_mac ++ * on the l3dgw_port instance where nat->logical_port is ++ * resident. */ ++ if (distributed) { ++ /* Store the ethernet address of the port receiving the packet. ++ * This will save us from having to match on inport further ++ * down in the pipeline. ++ */ ++ ds_clear(actions); ++ ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;", ++ od->l3dgw_port->lrp_networks.ea_s); + +- /* ARP reply. These flows reply to ARP requests for the router's own +- * IP address. */ +- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { +- ds_clear(match); +- ds_put_format(match, "arp.spa == %s/%u", +- op->lrp_networks.ipv4_addrs[i].network_s, +- op->lrp_networks.ipv4_addrs[i].plen); ++ ds_clear(match); ++ ds_put_format(match, ++ "eth.dst == "ETH_ADDR_FMT" && inport == %s" ++ " && is_chassis_resident(\"%s\")", ++ ETH_ADDR_ARGS(mac), ++ od->l3dgw_port->json_key, ++ nat->logical_port); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ADMISSION, 50, ++ ds_cstr(match), ds_cstr(actions), ++ &nat->header_); ++ } + +- if (op->od->l3dgw_port && op->od->l3redirect_port && op->peer +- && op->peer->od->n_localnet_ports) { +- bool add_chassis_resident_check = false; +- if (op == op->od->l3dgw_port) { +- /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s +- * should only be sent from the gateway chassis, so that +- * upstream MAC learning points to the gateway chassis. +- * Also need to avoid generation of multiple ARP responses +- * from different chassis. */ +- add_chassis_resident_check = true; ++ /* Ingress Gateway Redirect Table: For NAT on a distributed ++ * router, add flows that are specific to a NAT rule. These ++ * flows indicate the presence of an applicable NAT rule that ++ * can be applied in a distributed manner. ++ * In particulr REG_SRC_IPV4/REG_SRC_IPV6 and eth.src are set to ++ * NAT external IP and NAT external mac so the ARP request ++ * generated in the following stage is sent out with proper IP/MAC ++ * src addresses. ++ */ ++ if (distributed) { ++ ds_clear(match); ++ ds_clear(actions); ++ ds_put_format(match, ++ "ip%s.src == %s && outport == %s && " ++ "is_chassis_resident(\"%s\")", ++ is_v6 ? "6" : "4", nat->logical_ip, ++ od->l3dgw_port->json_key, nat->logical_port); ++ ds_put_format(actions, "eth.src = %s; %s = %s; next;", ++ nat->external_mac, ++ is_v6 ? REG_SRC_IPV6 : REG_SRC_IPV4, ++ nat->external_ip); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, ++ 100, ds_cstr(match), ++ ds_cstr(actions), &nat->header_); ++ } ++ ++ /* Egress Loopback table: For NAT on a distributed router. ++ * If packets in the egress pipeline on the distributed ++ * gateway port have ip.dst matching a NAT external IP, then ++ * loop a clone of the packet back to the beginning of the ++ * ingress pipeline with inport = outport. */ ++ if (od->l3dgw_port) { ++ /* Distributed router. */ ++ ds_clear(match); ++ ds_put_format(match, "ip%s.dst == %s && outport == %s", ++ is_v6 ? "6" : "4", ++ nat->external_ip, ++ od->l3dgw_port->json_key); ++ if (!distributed) { ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ od->l3redirect_port->json_key); + } else { +- /* Check if the option 'reside-on-redirect-chassis' +- * is set to true on the router port. If set to true +- * and if peer's logical switch has a localnet port, it +- * means the router pipeline for the packets from +- * peer's logical switch is be run on the chassis +- * hosting the gateway port and it should reply to the +- * ARP requests for the router port IPs. +- */ +- add_chassis_resident_check = smap_get_bool( +- &op->nbrp->options, +- "reside-on-redirect-chassis", false); ++ ds_put_format(match, " && is_chassis_resident(\"%s\")", ++ nat->logical_port); + } + +- if (add_chassis_resident_check) { +- ds_put_format(match, " && is_chassis_resident(%s)", +- op->od->l3redirect_port->json_key); ++ ds_clear(actions); ++ ds_put_format(actions, ++ "clone { ct_clear; " ++ "inport = outport; outport = \"\"; " ++ "flags = 0; flags.loopback = 1; "); ++ for (int j = 0; j < MFF_N_LOG_REGS; j++) { ++ ds_put_format(actions, "reg%d = 0; ", j); + } ++ ds_put_format(actions, REGBIT_EGRESS_LOOPBACK" = 1; " ++ "next(pipeline=ingress, table=%d); };", ++ ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100, ++ ds_cstr(match), ds_cstr(actions), ++ &nat->header_); + } +- +- build_lrouter_arp_flow(op->od, op, +- op->lrp_networks.ipv4_addrs[i].addr_s, +- REG_INPORT_ETH_ADDR, match, false, 90, +- &op->nbrp->header_, lflows); + } + +- /* A set to hold all load-balancer vips that need ARP responses. */ +- struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4); +- struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6); +- get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6); +- +- const char *ip_address; +- SSET_FOR_EACH (ip_address, &all_ips_v4) { +- ds_clear(match); +- if (op == op->od->l3dgw_port) { +- ds_put_format(match, "is_chassis_resident(%s)", +- op->od->l3redirect_port->json_key); ++ /* Handle force SNAT options set in the gateway router. */ ++ if (!od->l3dgw_port) { ++ if (dnat_force_snat_ip) { ++ if (od->dnat_force_snat_addrs.n_ipv4_addrs) { ++ build_lrouter_force_snat_flows(lflows, od, "4", ++ od->dnat_force_snat_addrs.ipv4_addrs[0].addr_s, ++ "dnat"); ++ } ++ if (od->dnat_force_snat_addrs.n_ipv6_addrs) { ++ build_lrouter_force_snat_flows(lflows, od, "6", ++ od->dnat_force_snat_addrs.ipv6_addrs[0].addr_s, ++ "dnat"); ++ } + } +- +- build_lrouter_arp_flow(op->od, op, +- ip_address, REG_INPORT_ETH_ADDR, +- match, false, 90, NULL, lflows); +- } +- +- SSET_FOR_EACH (ip_address, &all_ips_v6) { +- ds_clear(match); +- if (op == op->od->l3dgw_port) { +- ds_put_format(match, "is_chassis_resident(%s)", +- op->od->l3redirect_port->json_key); ++ if (lb_force_snat_ip) { ++ if (od->lb_force_snat_addrs.n_ipv4_addrs) { ++ build_lrouter_force_snat_flows(lflows, od, "4", ++ od->lb_force_snat_addrs.ipv4_addrs[0].addr_s, "lb"); ++ } ++ if (od->lb_force_snat_addrs.n_ipv6_addrs) { ++ build_lrouter_force_snat_flows(lflows, od, "6", ++ od->lb_force_snat_addrs.ipv6_addrs[0].addr_s, "lb"); ++ } + } + +- build_lrouter_nd_flow(op->od, op, "nd_na", +- ip_address, NULL, REG_INPORT_ETH_ADDR, +- match, false, 90, NULL, lflows); ++ /* For gateway router, re-circulate every packet through ++ * the DNAT zone. This helps with the following. ++ * ++ * Any packet that needs to be unDNATed in the reverse ++ * direction gets unDNATed. Ideally this could be done in ++ * the egress pipeline. But since the gateway router ++ * does not have any feature that depends on the source ++ * ip address being external IP address for IP routing, ++ * we can do it here, saving a future re-circulation. */ ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50, ++ "ip", "flags.loopback = 1; ct_dnat;"); + } + +- sset_destroy(&all_ips_v4); +- sset_destroy(&all_ips_v6); +- +- if (!smap_get(&op->od->nbr->options, "chassis") +- && !op->od->l3dgw_port) { +- /* UDP/TCP port unreachable. */ +- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { +- ds_clear(match); +- ds_put_format(match, +- "ip4 && ip4.dst == %s && !ip.later_frag && udp", +- op->lrp_networks.ipv4_addrs[i].addr_s); +- const char *action = "icmp4 {" +- "eth.dst <-> eth.src; " +- "ip4.dst <-> ip4.src; " +- "ip.ttl = 255; " +- "icmp4.type = 3; " +- "icmp4.code = 3; " +- "next; };"; +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, +- 80, ds_cstr(match), action, +- &op->nbrp->header_); +- +- ds_clear(match); +- ds_put_format(match, +- "ip4 && ip4.dst == %s && !ip.later_frag && tcp", +- op->lrp_networks.ipv4_addrs[i].addr_s); +- action = "tcp_reset {" +- "eth.dst <-> eth.src; " +- "ip4.dst <-> ip4.src; " +- "next; };"; +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, +- 80, ds_cstr(match), action, +- &op->nbrp->header_); +- +- ds_clear(match); +- ds_put_format(match, +- "ip4 && ip4.dst == %s && !ip.later_frag", +- op->lrp_networks.ipv4_addrs[i].addr_s); +- action = "icmp4 {" +- "eth.dst <-> eth.src; " +- "ip4.dst <-> ip4.src; " +- "ip.ttl = 255; " +- "icmp4.type = 3; " +- "icmp4.code = 2; " +- "next; };"; +- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, +- 70, ds_cstr(match), action, +- &op->nbrp->header_); +- } ++ /* Load balancing and packet defrag are only valid on ++ * Gateway routers or router with gateway port. */ ++ if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { ++ sset_destroy(&nat_entries); ++ return; + } + +- /* Drop IP traffic destined to router owned IPs except if the IP is +- * also a SNAT IP. Those are dropped later, in stage +- * "lr_in_arp_resolve", if unSNAT was unsuccessful. +- * +- * Priority 60. +- */ +- build_lrouter_drop_own_dest(op, S_ROUTER_IN_IP_INPUT, 60, false, +- lflows); ++ /* A set to hold all ips that need defragmentation and tracking. */ ++ struct sset all_ips = SSET_INITIALIZER(&all_ips); + +- /* ARP / ND handling for external IP addresses. +- * +- * DNAT and SNAT IP addresses are external IP addresses that need ARP +- * handling. +- * +- * These are already taken care globally, per router. The only +- * exception is on the l3dgw_port where we might need to use a +- * different ETH address. +- */ +- if (op != op->od->l3dgw_port) { +- return; +- } ++ for (int i = 0; i < od->nbr->n_load_balancer; i++) { ++ struct nbrec_load_balancer *nb_lb = od->nbr->load_balancer[i]; ++ struct ovn_northd_lb *lb = ++ ovn_northd_lb_find(lbs, &nb_lb->header_.uuid); ++ ovs_assert(lb); + +- for (size_t i = 0; i < op->od->nbr->n_nat; i++) { +- struct ovn_nat *nat_entry = &op->od->nat_entries[i]; ++ for (size_t j = 0; j < lb->n_vips; j++) { ++ struct ovn_lb_vip *lb_vip = &lb->vips[j]; ++ struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[j]; ++ ds_clear(actions); ++ build_lb_vip_actions(lb_vip, lb_vip_nb, actions, ++ lb->selection_fields, false); + +- /* Skip entries we failed to parse. */ +- if (!nat_entry_is_valid(nat_entry)) { +- continue; +- } ++ if (!sset_contains(&all_ips, lb_vip->vip_str)) { ++ sset_add(&all_ips, lb_vip->vip_str); ++ /* If there are any load balancing rules, we should send ++ * the packet to conntrack for defragmentation and ++ * tracking. This helps with two things. ++ * ++ * 1. With tracking, we can send only new connections to ++ * pick a DNAT ip address from a group. ++ * 2. If there are L4 ports in load balancing rules, we ++ * need the defragmentation to match on L4 ports. */ ++ ds_clear(match); ++ if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { ++ ds_put_format(match, "ip && ip4.dst == %s", ++ lb_vip->vip_str); ++ } else { ++ ds_put_format(match, "ip && ip6.dst == %s", ++ lb_vip->vip_str); ++ } ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, ++ 100, ds_cstr(match), "ct_next;", ++ &nb_lb->header_); ++ } + +- /* Skip SNAT entries for now, we handle unique SNAT IPs separately +- * below. +- */ +- if (!strcmp(nat_entry->nb->type, "snat")) { +- continue; +- } +- build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows); +- } ++ /* Higher priority rules are added for load-balancing in DNAT ++ * table. For every match (on a VIP[:port]), we add two flows ++ * via add_router_lb_flow(). One flow is for specific matching ++ * on ct.new with an action of "ct_lb($targets);". The other ++ * flow is for ct.est with an action of "ct_dnat;". */ ++ ds_clear(match); ++ if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { ++ ds_put_format(match, "ip && ip4.dst == %s", ++ lb_vip->vip_str); ++ } else { ++ ds_put_format(match, "ip && ip6.dst == %s", ++ lb_vip->vip_str); ++ } + +- /* Now handle SNAT entries too, one per unique SNAT IP. */ +- struct shash_node *snat_snode; +- SHASH_FOR_EACH (snat_snode, &op->od->snat_ips) { +- struct ovn_snat_ip *snat_ip = snat_snode->data; ++ int prio = 110; ++ bool is_udp = nullable_string_is_equal(nb_lb->protocol, "udp"); ++ bool is_sctp = nullable_string_is_equal(nb_lb->protocol, ++ "sctp"); ++ const char *proto = is_udp ? "udp" : is_sctp ? "sctp" : "tcp"; + +- if (ovs_list_is_empty(&snat_ip->snat_entries)) { +- continue; +- } ++ if (lb_vip->vip_port) { ++ ds_put_format(match, " && %s && %s.dst == %d", proto, ++ proto, lb_vip->vip_port); ++ prio = 120; ++ } + +- struct ovn_nat *nat_entry = +- CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries), +- struct ovn_nat, ext_addr_list_node); +- build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows); ++ if (od->l3redirect_port && ++ (lb_vip->n_backends || !lb_vip->empty_backend_rej)) { ++ ds_put_format(match, " && is_chassis_resident(%s)", ++ od->l3redirect_port->json_key); ++ } ++ add_router_lb_flow(lflows, od, match, actions, prio, ++ lb_force_snat_ip, lb_vip, proto, ++ nb_lb, meter_groups, &nat_entries); ++ } + } ++ sset_destroy(&all_ips); ++ sset_destroy(&nat_entries); + } + } + + ++ + struct lswitch_flow_build_info { + struct hmap *datapaths; + struct hmap *ports; +@@ -11361,6 +11350,8 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od, + &lsi->actions); + build_misc_local_traffic_drop_flows_for_lrouter(od, lsi->lflows); + build_lrouter_arp_nd_for_datapath(od, lsi->lflows); ++ build_lrouter_nat_defrag_and_lb(od, lsi->lflows, lsi->meter_groups, ++ lsi->lbs, &lsi->match, &lsi->actions); + } + + /* Helper function to combine all lflow generation which is iterated by port. +@@ -11459,9 +11450,6 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + ds_destroy(&lsi.actions); + + build_lswitch_flows(datapaths, lflows); +- +- /* Legacy lrouter build - to be migrated. */ +- build_lrouter_flows(datapaths, lflows, meter_groups, lbs); + } + + struct ovn_dp_group { +-- +2.29.2 + diff --git a/SOURCES/0012-controller-introduce-BFD-tx-path-in-ovn-controller.patch b/SOURCES/0012-controller-introduce-BFD-tx-path-in-ovn-controller.patch new file mode 100644 index 0000000..6a48950 --- /dev/null +++ b/SOURCES/0012-controller-introduce-BFD-tx-path-in-ovn-controller.patch @@ -0,0 +1,967 @@ +From 2473b80f778654f0204d1cf4671e543cb6467d5f Mon Sep 17 00:00:00 2001 +Message-Id: <2473b80f778654f0204d1cf4671e543cb6467d5f.1610458802.git.lorenzo.bianconi@redhat.com> +In-Reply-To: +References: +From: Lorenzo Bianconi +Date: Fri, 8 Jan 2021 17:36:20 +0100 +Subject: [PATCH 12/16] controller: introduce BFD tx path in ovn-controller. + +Introduce the capability to transmit BFD packets in ovn-controller. +Introduce BFD tables in nb/sb dbs in order to configure BFD parameters +(e.g. min_tx, min_rx, ..) for ovn-controller. + +Acked-by: Mark Michelson +Signed-off-by: Lorenzo Bianconi +Signed-off-by: Numan Siddique +--- + controller/ovn-controller.c | 1 + + controller/pinctrl.c | 298 +++++++++++++++++++++++++++++++++++- + controller/pinctrl.h | 2 + + lib/ovn-l7.h | 19 +++ + northd/ovn-northd.c | 202 ++++++++++++++++++++++++ + ovn-nb.ovsschema | 29 +++- + ovn-nb.xml | 67 ++++++++ + ovn-sb.ovsschema | 27 +++- + ovn-sb.xml | 78 ++++++++++ + 9 files changed, 718 insertions(+), 5 deletions(-) + +diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c +index 366fc9c06..75512871b 100644 +--- a/controller/ovn-controller.c ++++ b/controller/ovn-controller.c +@@ -2837,6 +2837,7 @@ main(int argc, char *argv[]) + ovnsb_idl_loop.idl), + sbrec_service_monitor_table_get( + ovnsb_idl_loop.idl), ++ sbrec_bfd_table_get(ovnsb_idl_loop.idl), + br_int, chassis, + &runtime_data->local_datapaths, + &runtime_data->active_tunnels); +diff --git a/controller/pinctrl.c b/controller/pinctrl.c +index 7e3abf0a4..9df6533a1 100644 +--- a/controller/pinctrl.c ++++ b/controller/pinctrl.c +@@ -323,6 +323,18 @@ put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits, + static void notify_pinctrl_main(void); + static void notify_pinctrl_handler(void); + ++static bool bfd_monitor_should_inject(void); ++static void bfd_monitor_wait(long long int timeout); ++static void bfd_monitor_init(void); ++static void bfd_monitor_destroy(void); ++static void bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) ++ OVS_REQUIRES(pinctrl_mutex); ++static void bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, ++ struct ovsdb_idl_index *sbrec_port_binding_by_name, ++ const struct sbrec_chassis *chassis, ++ const struct sset *active_tunnels) ++ OVS_REQUIRES(pinctrl_mutex); ++ + COVERAGE_DEFINE(pinctrl_drop_put_mac_binding); + COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map); + COVERAGE_DEFINE(pinctrl_drop_controller_event); +@@ -487,6 +499,7 @@ pinctrl_init(void) + ip_mcast_snoop_init(); + init_put_vport_bindings(); + init_svc_monitors(); ++ bfd_monitor_init(); + pinctrl.br_int_name = NULL; + pinctrl_handler_seq = seq_create(); + pinctrl_main_seq = seq_create(); +@@ -3053,6 +3066,8 @@ pinctrl_handler(void *arg_) + swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP15_VERSION); + + while (!latch_is_set(&pctrl->pinctrl_thread_exit)) { ++ long long int bfd_time = LLONG_MAX; ++ + ovs_mutex_lock(&pinctrl_mutex); + pinctrl_rconn_setup(swconn, pctrl->br_int_name); + ip_mcast_snoop_run(); +@@ -3085,6 +3100,7 @@ pinctrl_handler(void *arg_) + send_ipv6_ras(swconn, &send_ipv6_ra_time); + send_ipv6_prefixd(swconn, &send_prefixd_time); + send_mac_binding_buffered_pkts(swconn); ++ bfd_monitor_send_msg(swconn, &bfd_time); + ovs_mutex_unlock(&pinctrl_mutex); + + ip_mcast_querier_run(swconn, &send_mcast_query_time); +@@ -3102,6 +3118,7 @@ pinctrl_handler(void *arg_) + ip_mcast_querier_wait(send_mcast_query_time); + svc_monitors_wait(svc_monitors_next_run_time); + ipv6_prefixd_wait(send_prefixd_time); ++ bfd_monitor_wait(bfd_time); + + new_seq = seq_read(pinctrl_handler_seq); + seq_wait(pinctrl_handler_seq, new_seq); +@@ -3149,6 +3166,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, + const struct sbrec_dns_table *dns_table, + const struct sbrec_controller_event_table *ce_table, + const struct sbrec_service_monitor_table *svc_mon_table, ++ const struct sbrec_bfd_table *bfd_table, + const struct ovsrec_bridge *br_int, + const struct sbrec_chassis *chassis, + const struct hmap *local_datapaths, +@@ -3179,6 +3197,10 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, + local_datapaths); + sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name, + chassis); ++ if (ovnsb_idl_txn) { ++ bfd_monitor_run(bfd_table, sbrec_port_binding_by_name, chassis, ++ active_tunnels); ++ } + ovs_mutex_unlock(&pinctrl_mutex); + } + +@@ -3722,6 +3744,7 @@ pinctrl_destroy(void) + destroy_dns_cache(); + ip_mcast_snoop_destroy(); + destroy_svc_monitors(); ++ bfd_monitor_destroy(); + seq_destroy(pinctrl_main_seq); + seq_destroy(pinctrl_handler_seq); + } +@@ -5525,7 +5548,8 @@ may_inject_pkts(void) + !shash_is_empty(&send_garp_rarp_data) || + ipv6_prefixd_should_inject() || + !ovs_list_is_empty(&mcast_query_list) || +- !ovs_list_is_empty(&buffered_mac_bindings)); ++ !ovs_list_is_empty(&buffered_mac_bindings) || ++ bfd_monitor_should_inject()); + } + + static void +@@ -6312,6 +6336,278 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn, + + } + ++static struct hmap bfd_monitor_map; ++ ++struct bfd_entry { ++ struct hmap_node node; ++ bool erase; ++ ++ /* L2 source address */ ++ struct eth_addr src_mac; ++ /* IPv4 source address */ ++ ovs_be32 ip_src; ++ /* IPv4 destination address */ ++ ovs_be32 ip_dst; ++ /* RFC 5881 section 4 ++ * The source port MUST be in the range 49152 through 65535. ++ * The same UDP source port number MUST be used for all BFD ++ * Control packets associated with a particular session. ++ * The source port number SHOULD be unique among all BFD ++ * sessions on the system ++ */ ++ uint16_t udp_src; ++ ovs_be32 disc; ++ ++ int64_t port_key; ++ int64_t metadata; ++ ++ long long int next_tx; ++}; ++ ++static void ++bfd_monitor_init(void) ++{ ++ hmap_init(&bfd_monitor_map); ++} ++ ++static void ++bfd_monitor_destroy(void) ++{ ++ struct bfd_entry *entry; ++ HMAP_FOR_EACH_POP (entry, node, &bfd_monitor_map) { ++ free(entry); ++ } ++ hmap_destroy(&bfd_monitor_map); ++} ++ ++static struct bfd_entry * ++pinctrl_find_bfd_monitor_entry_by_port(char *ip, uint16_t port) ++{ ++ struct bfd_entry *entry; ++ HMAP_FOR_EACH_WITH_HASH (entry, node, hash_string(ip, 0), ++ &bfd_monitor_map) { ++ if (entry->udp_src == port) { ++ return entry; ++ } ++ } ++ return NULL; ++} ++ ++static bool ++bfd_monitor_should_inject(void) ++{ ++ long long int cur_time = time_msec(); ++ struct bfd_entry *entry; ++ ++ HMAP_FOR_EACH (entry, node, &bfd_monitor_map) { ++ if (entry->next_tx < cur_time) { ++ return true; ++ } ++ } ++ return false; ++} ++ ++static void ++bfd_monitor_wait(long long int timeout) ++{ ++ if (!hmap_is_empty(&bfd_monitor_map)) { ++ poll_timer_wait_until(timeout); ++ } ++} ++ ++static void ++bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet) ++{ ++ struct udp_header *udp; ++ struct bfd_msg *msg; ++ ++ /* Properly align after the ethernet header */ ++ dp_packet_reserve(packet, 2); ++ struct eth_header *eth = dp_packet_put_uninit(packet, sizeof *eth); ++ eth->eth_dst = eth_addr_broadcast; ++ eth->eth_src = entry->src_mac; ++ eth->eth_type = htons(ETH_TYPE_IP); ++ ++ struct ip_header *ip = dp_packet_put_zeros(packet, sizeof *ip); ++ ip->ip_ihl_ver = IP_IHL_VER(5, 4); ++ ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg); ++ ip->ip_ttl = MAXTTL; ++ ip->ip_tos = IPTOS_PREC_INTERNETCONTROL; ++ ip->ip_proto = IPPROTO_UDP; ++ put_16aligned_be32(&ip->ip_src, entry->ip_src); ++ put_16aligned_be32(&ip->ip_dst, entry->ip_dst); ++ /* Checksum has already been zeroed by put_zeros call. */ ++ ip->ip_csum = csum(ip, sizeof *ip); ++ ++ udp = dp_packet_put_zeros(packet, sizeof *udp); ++ udp->udp_src = htons(entry->udp_src); ++ udp->udp_dst = htons(BFD_DEST_PORT); ++ udp->udp_len = htons(sizeof *udp + sizeof *msg); ++ ++ msg = dp_packet_put_uninit(packet, sizeof *msg); ++ msg->vers_diag = (BFD_VERSION << 5); ++ msg->length = BFD_PACKET_LEN; ++} ++ ++static void ++bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) ++ OVS_REQUIRES(pinctrl_mutex) ++{ ++ long long int cur_time = time_msec(); ++ struct bfd_entry *entry; ++ ++ HMAP_FOR_EACH (entry, node, &bfd_monitor_map) { ++ if (cur_time < entry->next_tx) { ++ goto next; ++ } ++ ++ uint64_t packet_stub[256 / 8]; ++ struct dp_packet packet; ++ dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); ++ bfd_monitor_put_bfd_msg(entry, &packet); ++ ++ uint64_t ofpacts_stub[4096 / 8]; ++ struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); ++ ++ /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */ ++ uint32_t dp_key = entry->metadata; ++ uint32_t port_key = entry->port_key; ++ put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts); ++ put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts); ++ put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts); ++ struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); ++ resubmit->in_port = OFPP_CONTROLLER; ++ resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE; ++ ++ struct ofputil_packet_out po = { ++ .packet = dp_packet_data(&packet), ++ .packet_len = dp_packet_size(&packet), ++ .buffer_id = UINT32_MAX, ++ .ofpacts = ofpacts.data, ++ .ofpacts_len = ofpacts.size, ++ }; ++ ++ match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); ++ enum ofp_version version = rconn_get_version(swconn); ++ enum ofputil_protocol proto = ++ ofputil_protocol_from_ofp_version(version); ++ queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); ++ dp_packet_uninit(&packet); ++ ofpbuf_uninit(&ofpacts); ++ ++ entry->next_tx = cur_time + 5000; ++next: ++ if (*bfd_time > entry->next_tx) { ++ *bfd_time = entry->next_tx; ++ } ++ } ++} ++ ++static void ++bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, ++ struct ovsdb_idl_index *sbrec_port_binding_by_name, ++ const struct sbrec_chassis *chassis, ++ const struct sset *active_tunnels) ++ OVS_REQUIRES(pinctrl_mutex) ++{ ++ struct bfd_entry *entry, *next_entry; ++ long long int cur_time = time_msec(); ++ bool changed = false; ++ ++ HMAP_FOR_EACH (entry, node, &bfd_monitor_map) { ++ entry->erase = true; ++ } ++ ++ const struct sbrec_bfd *bt; ++ SBREC_BFD_TABLE_FOR_EACH (bt, bfd_table) { ++ const struct sbrec_port_binding *pb ++ = lport_lookup_by_name(sbrec_port_binding_by_name, ++ bt->logical_port); ++ if (!pb) { ++ continue; ++ } ++ ++ const char *peer_s = smap_get(&pb->options, "peer"); ++ if (!peer_s) { ++ continue; ++ } ++ ++ const struct sbrec_port_binding *peer ++ = lport_lookup_by_name(sbrec_port_binding_by_name, peer_s); ++ if (!peer) { ++ continue; ++ } ++ ++ char *redirect_name = xasprintf("cr-%s", pb->logical_port); ++ bool resident = lport_is_chassis_resident( ++ sbrec_port_binding_by_name, chassis, active_tunnels, ++ redirect_name); ++ free(redirect_name); ++ if ((strcmp(pb->type, "l3gateway") || pb->chassis != chassis) && ++ !resident) { ++ continue; ++ } ++ ++ entry = pinctrl_find_bfd_monitor_entry_by_port( ++ bt->dst_ip, bt->src_port); ++ if (!entry) { ++ ovs_be32 ip_dst, ip_src = htonl(BFD_DEFAULT_SRC_IP); ++ struct eth_addr ea = eth_addr_zero; ++ int i; ++ ++ if (!ip_parse(bt->dst_ip, &ip_dst)) { ++ continue; ++ } ++ ++ for (i = 0; i < pb->n_mac; i++) { ++ struct lport_addresses laddrs; ++ ++ if (!extract_lsp_addresses(pb->mac[i], &laddrs)) { ++ continue; ++ } ++ ++ ea = laddrs.ea; ++ if (laddrs.n_ipv4_addrs > 0) { ++ ip_src = laddrs.ipv4_addrs[0].addr; ++ destroy_lport_addresses(&laddrs); ++ break; ++ } ++ destroy_lport_addresses(&laddrs); ++ } ++ ++ if (eth_addr_is_zero(ea)) { ++ continue; ++ } ++ ++ entry = xzalloc(sizeof *entry); ++ entry->src_mac = ea; ++ entry->ip_src = ip_src; ++ entry->ip_dst = ip_dst; ++ entry->udp_src = bt->src_port; ++ entry->disc = htonl(bt->disc); ++ entry->next_tx = cur_time; ++ entry->metadata = pb->datapath->tunnel_key; ++ entry->port_key = pb->tunnel_key; ++ ++ uint32_t hash = hash_string(bt->dst_ip, 0); ++ hmap_insert(&bfd_monitor_map, &entry->node, hash); ++ changed = true; ++ } ++ entry->erase = false; ++ } ++ ++ HMAP_FOR_EACH_SAFE (entry, next_entry, node, &bfd_monitor_map) { ++ if (entry->erase) { ++ hmap_remove(&bfd_monitor_map, &entry->node); ++ free(entry); ++ } ++ } ++ ++ if (changed) { ++ notify_pinctrl_handler(); ++ } ++} ++ + static uint16_t + get_random_src_port(void) + { +diff --git a/controller/pinctrl.h b/controller/pinctrl.h +index 4b101ec92..8555d983d 100644 +--- a/controller/pinctrl.h ++++ b/controller/pinctrl.h +@@ -31,6 +31,7 @@ struct sbrec_chassis; + struct sbrec_dns_table; + struct sbrec_controller_event_table; + struct sbrec_service_monitor_table; ++struct sbrec_bfd_table; + + void pinctrl_init(void); + void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, +@@ -44,6 +45,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, + const struct sbrec_dns_table *, + const struct sbrec_controller_event_table *, + const struct sbrec_service_monitor_table *, ++ const struct sbrec_bfd_table *, + const struct ovsrec_bridge *, const struct sbrec_chassis *, + const struct hmap *local_datapaths, + const struct sset *active_tunnels); +diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h +index c84a0e7a9..d00982449 100644 +--- a/lib/ovn-l7.h ++++ b/lib/ovn-l7.h +@@ -26,6 +26,25 @@ + #include "hash.h" + #include "ovn/logical-fields.h" + ++#define BFD_PACKET_LEN 24 ++#define BFD_DEST_PORT 3784 ++#define BFD_VERSION 1 ++#define BFD_DEFAULT_SRC_IP 0xA9FE0101 /* 169.254.1.1 */ ++#define BFD_DEFAULT_DST_IP 0xA9FE0100 /* 169.254.1.0 */ ++ ++struct bfd_msg { ++ uint8_t vers_diag; ++ uint8_t flags; ++ uint8_t mult; ++ uint8_t length; ++ ovs_be32 my_disc; ++ ovs_be32 your_disc; ++ ovs_be32 min_tx; ++ ovs_be32 min_rx; ++ ovs_be32 min_rx_echo; ++}; ++BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct bfd_msg)); ++ + /* Generic options map which is used to store dhcpv4 opts and dhcpv6 opts. */ + struct gen_opts_map { + struct hmap_node hmap_node; +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index f588d8c32..77ea2181c 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -7487,6 +7487,191 @@ build_lswitch_ip_unicast_lookup(struct ovn_port *op, + } + } + ++struct bfd_entry { ++ struct hmap_node hmap_node; ++ ++ const struct sbrec_bfd *sb_bt; ++ ++ bool ref; ++}; ++ ++static struct bfd_entry * ++bfd_port_lookup(struct hmap *bfd_map, const char *logical_port, ++ const char *dst_ip) ++{ ++ struct bfd_entry *bfd_e; ++ uint32_t hash; ++ ++ hash = hash_string(dst_ip, 0); ++ hash = hash_string(logical_port, hash); ++ HMAP_FOR_EACH_WITH_HASH (bfd_e, hmap_node, hash, bfd_map) { ++ if (!strcmp(bfd_e->sb_bt->logical_port, logical_port) && ++ !strcmp(bfd_e->sb_bt->dst_ip, dst_ip)) { ++ return bfd_e; ++ } ++ } ++ return NULL; ++} ++ ++static void ++bfd_cleanup_connections(struct northd_context *ctx, struct hmap *bfd_map) ++{ ++ const struct nbrec_bfd *nb_bt; ++ struct bfd_entry *bfd_e; ++ ++ NBREC_BFD_FOR_EACH (nb_bt, ctx->ovnnb_idl) { ++ bfd_e = bfd_port_lookup(bfd_map, nb_bt->logical_port, nb_bt->dst_ip); ++ if (!bfd_e) { ++ continue; ++ } ++ ++ if (!bfd_e->ref && strcmp(nb_bt->status, "admin_down")) { ++ /* no user for this bfd connection */ ++ nbrec_bfd_set_status(nb_bt, "admin_down"); ++ } ++ } ++ ++ HMAP_FOR_EACH_POP (bfd_e, hmap_node, bfd_map) { ++ free(bfd_e); ++ } ++} ++ ++#define BFD_DEF_MINTX 1000 /* 1s */ ++#define BFD_DEF_MINRX 1000 /* 1s */ ++#define BFD_DEF_DETECT_MULT 5 ++ ++static void ++build_bfd_update_sb_conf(const struct nbrec_bfd *nb_bt, ++ const struct sbrec_bfd *sb_bt) ++{ ++ if (strcmp(nb_bt->dst_ip, sb_bt->dst_ip)) { ++ sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip); ++ } ++ ++ if (strcmp(nb_bt->logical_port, sb_bt->logical_port)) { ++ sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port); ++ } ++ ++ if (strcmp(nb_bt->status, sb_bt->status)) { ++ sbrec_bfd_set_status(sb_bt, nb_bt->status); ++ } ++ ++ int detect_mult = nb_bt->n_detect_mult ? nb_bt->detect_mult[0] ++ : BFD_DEF_DETECT_MULT; ++ if (detect_mult != sb_bt->detect_mult) { ++ sbrec_bfd_set_detect_mult(sb_bt, detect_mult); ++ } ++ ++ int min_tx = nb_bt->n_min_tx ? nb_bt->min_tx[0] : BFD_DEF_MINTX; ++ if (min_tx != sb_bt->min_tx) { ++ sbrec_bfd_set_min_tx(sb_bt, min_tx); ++ } ++ ++ int min_rx = nb_bt->n_min_rx ? nb_bt->min_rx[0] : BFD_DEF_MINRX; ++ if (min_rx != sb_bt->min_rx) { ++ sbrec_bfd_set_min_rx(sb_bt, min_rx); ++ } ++} ++ ++/* RFC 5881 section 4 ++ * The source port MUST be in the range 49152 through 65535. ++ * The same UDP source port number MUST be used for all BFD ++ * Control packets associated with a particular session. ++ * The source port number SHOULD be unique among all BFD ++ * sessions on the system ++ */ ++#define BFD_UDP_SRC_PORT_START 49152 ++#define BFD_UDP_SRC_PORT_LEN (65535 - BFD_UDP_SRC_PORT_START) ++ ++static int bfd_get_unused_port(unsigned long *bfd_src_ports) ++{ ++ int port; ++ ++ port = bitmap_scan(bfd_src_ports, 0, 0, BFD_UDP_SRC_PORT_LEN); ++ if (port == BFD_UDP_SRC_PORT_LEN) { ++ return -ENOSPC; ++ } ++ bitmap_set1(bfd_src_ports, port); ++ ++ return port + BFD_UDP_SRC_PORT_START; ++} ++ ++static void ++build_bfd_table(struct northd_context *ctx, struct hmap *bfd_connections) ++{ ++ struct hmap sb_only = HMAP_INITIALIZER(&sb_only); ++ const struct sbrec_bfd *sb_bt; ++ unsigned long *bfd_src_ports; ++ struct bfd_entry *bfd_e; ++ uint32_t hash; ++ ++ bfd_src_ports = bitmap_allocate(BFD_UDP_SRC_PORT_LEN); ++ ++ SBREC_BFD_FOR_EACH (sb_bt, ctx->ovnsb_idl) { ++ bfd_e = xmalloc(sizeof *bfd_e); ++ bfd_e->sb_bt = sb_bt; ++ hash = hash_string(sb_bt->dst_ip, 0); ++ hash = hash_string(sb_bt->logical_port, hash); ++ hmap_insert(&sb_only, &bfd_e->hmap_node, hash); ++ bitmap_set1(bfd_src_ports, sb_bt->src_port - BFD_UDP_SRC_PORT_START); ++ } ++ ++ const struct nbrec_bfd *nb_bt; ++ NBREC_BFD_FOR_EACH (nb_bt, ctx->ovnnb_idl) { ++ if (!nb_bt->status) { ++ /* default state is admin_down */ ++ nbrec_bfd_set_status(nb_bt, "admin_down"); ++ } ++ ++ bfd_e = bfd_port_lookup(&sb_only, nb_bt->logical_port, nb_bt->dst_ip); ++ if (!bfd_e) { ++ int udp_src = bfd_get_unused_port(bfd_src_ports); ++ if (udp_src < 0) { ++ continue; ++ } ++ ++ sb_bt = sbrec_bfd_insert(ctx->ovnsb_txn); ++ sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port); ++ sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip); ++ sbrec_bfd_set_disc(sb_bt, 1 + random_uint32()); ++ sbrec_bfd_set_src_port(sb_bt, udp_src); ++ sbrec_bfd_set_status(sb_bt, nb_bt->status); ++ ++ int min_tx = nb_bt->n_min_tx ? nb_bt->min_tx[0] : BFD_DEF_MINTX; ++ sbrec_bfd_set_min_tx(sb_bt, min_tx); ++ int min_rx = nb_bt->n_min_rx ? nb_bt->min_rx[0] : BFD_DEF_MINRX; ++ sbrec_bfd_set_min_rx(sb_bt, min_rx); ++ int d_mult = nb_bt->n_detect_mult ? nb_bt->detect_mult[0] ++ : BFD_DEF_DETECT_MULT; ++ sbrec_bfd_set_detect_mult(sb_bt, d_mult); ++ } else if (strcmp(bfd_e->sb_bt->status, nb_bt->status)) { ++ if (!strcmp(nb_bt->status, "admin_down") || ++ !strcmp(bfd_e->sb_bt->status, "admin_down")) { ++ sbrec_bfd_set_status(bfd_e->sb_bt, nb_bt->status); ++ } else { ++ nbrec_bfd_set_status(nb_bt, bfd_e->sb_bt->status); ++ } ++ } ++ if (bfd_e) { ++ build_bfd_update_sb_conf(nb_bt, bfd_e->sb_bt); ++ ++ hmap_remove(&sb_only, &bfd_e->hmap_node); ++ bfd_e->ref = false; ++ hash = hash_string(bfd_e->sb_bt->dst_ip, 0); ++ hash = hash_string(bfd_e->sb_bt->logical_port, hash); ++ hmap_insert(bfd_connections, &bfd_e->hmap_node, hash); ++ } ++ } ++ ++ HMAP_FOR_EACH_POP (bfd_e, hmap_node, &sb_only) { ++ sbrec_bfd_delete(bfd_e->sb_bt); ++ free(bfd_e); ++ } ++ hmap_destroy(&sb_only); ++ ++ bitmap_free(bfd_src_ports); ++} ++ + /* Returns a string of the IP address of the router port 'op' that + * overlaps with 'ip_s". If one is not found, returns NULL. + * +@@ -12444,6 +12629,7 @@ ovnnb_db_run(struct northd_context *ctx, + struct hmap igmp_groups; + struct shash meter_groups = SHASH_INITIALIZER(&meter_groups); + struct hmap lbs; ++ struct hmap bfd_connections = HMAP_INITIALIZER(&bfd_connections); + + /* Sync ipsec configuration. + * Copy nb_cfg from northbound to southbound database. +@@ -12538,6 +12724,7 @@ ovnnb_db_run(struct northd_context *ctx, + build_ip_mcast(ctx, datapaths); + build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups); + build_meter_groups(ctx, &meter_groups); ++ build_bfd_table(ctx, &bfd_connections); + build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups, + &igmp_groups, &meter_groups, &lbs); + ovn_update_ipv6_prefix(ports); +@@ -12563,9 +12750,13 @@ ovnnb_db_run(struct northd_context *ctx, + HMAP_FOR_EACH_SAFE (pg, next_pg, key_node, &port_groups) { + ovn_port_group_destroy(&port_groups, pg); + } ++ ++ bfd_cleanup_connections(ctx, &bfd_connections); ++ + hmap_destroy(&igmp_groups); + hmap_destroy(&mcast_groups); + hmap_destroy(&port_groups); ++ hmap_destroy(&bfd_connections); + + struct shash_node *node, *next; + SHASH_FOR_EACH_SAFE (node, next, &meter_groups) { +@@ -13497,6 +13688,16 @@ main(int argc, char *argv[]) + add_column_noalert(ovnsb_idl_loop.idl, + &sbrec_load_balancer_col_external_ids); + ++ ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_bfd); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_logical_port); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_dst_ip); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_status); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_min_tx); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_min_rx); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_detect_mult); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_disc); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_src_port); ++ + struct ovsdb_idl_index *sbrec_chassis_by_name + = chassis_index_create(ovnsb_idl_loop.idl); + +@@ -13619,6 +13820,7 @@ main(int argc, char *argv[]) + } + } + ++ + free(ovn_internal_version); + unixctl_server_destroy(unixctl); + ovsdb_idl_loop_destroy(&ovnnb_idl_loop); +diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema +index b77a2308c..aea932f55 100644 +--- a/ovn-nb.ovsschema ++++ b/ovn-nb.ovsschema +@@ -1,7 +1,7 @@ + { + "name": "OVN_Northbound", +- "version": "5.30.0", +- "cksum": "3273824429 27172", ++ "version": "5.31.0", ++ "cksum": "1511492848 28473", + "tables": { + "NB_Global": { + "columns": { +@@ -526,5 +526,30 @@ + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}}, + "indexes": [["name"]], ++ "isRoot": true}, ++ "BFD": { ++ "columns": { ++ "logical_port": {"type": "string"}, ++ "dst_ip": {"type": "string"}, ++ "min_tx": {"type": {"key": {"type": "integer", ++ "minInteger": 1}, ++ "min": 0, "max": 1}}, ++ "min_rx": {"type": {"key": {"type": "integer"}, ++ "min": 0, "max": 1}}, ++ "detect_mult": {"type": {"key": {"type": "integer", ++ "minInteger": 1}, ++ "min": 0, "max": 1}}, ++ "status": { ++ "type": {"key": {"type": "string", ++ "enum": ["set", ["down", "init", "up", ++ "admin_down"]]}, ++ "min": 0, "max": 1}}, ++ "external_ids": { ++ "type": {"key": "string", "value": "string", ++ "min": 0, "max": "unlimited"}}, ++ "options": { ++ "type": {"key": "string", "value": "string", ++ "min": 0, "max": "unlimited"}}}, ++ "indexes": [["logical_port", "dst_ip"]], + "isRoot": true}} + } +diff --git a/ovn-nb.xml b/ovn-nb.xml +index 0cf043790..cdc5e0f3a 100644 +--- a/ovn-nb.xml ++++ b/ovn-nb.xml +@@ -3728,4 +3728,71 @@ + + + ++ ++ ++

    ++ Contains BFD parameter for ovn-controller bfd configuration. ++

    ++ ++ ++ ++ OVN logical port when BFD engine is running. ++ ++ ++ ++ BFD peer IP address. ++ ++ ++ ++ This is the minimum interval, in milliseconds, that the local ++ system would like to use when transmitting BFD Control packets, ++ less any jitter applied. The value zero is reserved. Default ++ value is 1000 ms. ++ ++ ++ ++ This is the minimum interval, in milliseconds, between received ++ BFD Control packets that this system is capable of supporting, ++ less any jitter applied by the sender. If this value is zero, ++ the transmitting system does not want the remote system to send ++ any periodic BFD Control packets. ++ ++ ++ ++ Detection time multiplier. The negotiated transmit interval, ++ multiplied by this value, provides the Detection Time for the ++ receiving system in Asynchronous mode. Default value is 5. ++ ++ ++ ++ Reserved for future use. ++ ++ ++ ++ See External IDs at the beginning of this document. ++ ++ ++ ++ ++ ++

    ++ BFD port logical states. Possible values are: ++

      ++
    • ++ admin_down ++
    • ++
    • ++ down ++
    • ++
    • ++ init ++
    • ++
    • ++ up ++
    • ++
    ++

    ++
    ++
    ++
    + +diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema +index 5228839b8..97db6de39 100644 +--- a/ovn-sb.ovsschema ++++ b/ovn-sb.ovsschema +@@ -1,7 +1,7 @@ + { + "name": "OVN_Southbound", +- "version": "20.12.0", +- "cksum": "3969471120 24441", ++ "version": "20.13.0", ++ "cksum": "3035725595 25676", + "tables": { + "SB_Global": { + "columns": { +@@ -484,6 +484,29 @@ + "external_ids": { + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}}, ++ "isRoot": true}, ++ "BFD": { ++ "columns": { ++ "src_port": {"type": {"key": {"type": "integer", ++ "minInteger": 49152, ++ "maxInteger": 65535}}}, ++ "disc": {"type": {"key": {"type": "integer"}}}, ++ "logical_port": {"type": "string"}, ++ "dst_ip": {"type": "string"}, ++ "min_tx": {"type": {"key": {"type": "integer"}}}, ++ "min_rx": {"type": {"key": {"type": "integer"}}}, ++ "detect_mult": {"type": {"key": {"type": "integer"}}}, ++ "status": { ++ "type": {"key": {"type": "string", ++ "enum": ["set", ["down", "init", "up", ++ "admin_down"]]}}}, ++ "external_ids": { ++ "type": {"key": "string", "value": "string", ++ "min": 0, "max": "unlimited"}}, ++ "options": { ++ "type": {"key": "string", "value": "string", ++ "min": 0, "max": "unlimited"}}}, ++ "indexes": [["logical_port", "dst_ip", "src_port", "disc"]], + "isRoot": true} + } + } +diff --git a/ovn-sb.xml b/ovn-sb.xml +index c13994848..eb440e492 100644 +--- a/ovn-sb.xml ++++ b/ovn-sb.xml +@@ -4231,4 +4231,82 @@ tcp.flags = RST; + + + ++ ++ ++

    ++ Contains BFD parameter for ovn-controller bfd configuration. ++

    ++ ++ ++ ++ udp source port used in bfd control packets. ++ The source port MUST be in the range 49152 through 65535 ++ (RFC5881 section 4). ++ ++ ++ ++ A unique, nonzero discriminator value generated by the transmitting ++ system, used to demultiplex multiple BFD sessions between the same pair ++ of systems. ++ ++ ++ ++ OVN logical port when BFD engine is running. ++ ++ ++ ++ BFD peer IP address. ++ ++ ++ ++ This is the minimum interval, in milliseconds, that the local ++ system would like to use when transmitting BFD Control packets, ++ less any jitter applied. The value zero is reserved. ++ ++ ++ ++ This is the minimum interval, in milliseconds, between received ++ BFD Control packets that this system is capable of supporting, ++ less any jitter applied by the sender. If this value is zero, ++ the transmitting system does not want the remote system to send ++ any periodic BFD Control packets. ++ ++ ++ ++ Detection time multiplier. The negotiated transmit interval, ++ multiplied by this value, provides the Detection Time for the ++ receiving system in Asynchronous mode. ++ ++ ++ ++ Reserved for future use. ++ ++ ++ ++ See External IDs at the beginning of this document. ++ ++ ++ ++ ++ ++

    ++ BFD port logical states. Possible values are: ++

      ++
    • ++ admin_down ++
    • ++
    • ++ down ++
    • ++
    • ++ init ++
    • ++
    • ++ up ++
    • ++
    ++

    ++
    ++
    ++
    + +-- +2.29.2 + diff --git a/SOURCES/0013-action-introduce-handle_bfd_msg-action.patch b/SOURCES/0013-action-introduce-handle_bfd_msg-action.patch new file mode 100644 index 0000000..bf54a19 --- /dev/null +++ b/SOURCES/0013-action-introduce-handle_bfd_msg-action.patch @@ -0,0 +1,164 @@ +From 2d71cf47fdb194287719a97ee81dbb0dd9fab9d8 Mon Sep 17 00:00:00 2001 +Message-Id: <2d71cf47fdb194287719a97ee81dbb0dd9fab9d8.1610458802.git.lorenzo.bianconi@redhat.com> +In-Reply-To: +References: +From: Lorenzo Bianconi +Date: Fri, 8 Jan 2021 17:36:21 +0100 +Subject: [PATCH 13/16] action: introduce handle_bfd_msg() action. + +Add handle_bfd_msg() action to parse BFD packets received by the +controller. handle_bfd_msg() logic is currently empty and it will be +implemented adding BFD state machine in the following patches. + +Acked-by: Mark Michelson +Signed-off-by: Lorenzo Bianconi +Signed-off-by: Numan Siddique +--- + controller/pinctrl.c | 15 +++++++++++++++ + include/ovn/actions.h | 7 +++++++ + lib/actions.c | 27 +++++++++++++++++++++++++++ + tests/ovn.at | 4 ++++ + utilities/ovn-trace.c | 2 ++ + 5 files changed, 55 insertions(+) + +diff --git a/controller/pinctrl.c b/controller/pinctrl.c +index 9df6533a1..deeae7479 100644 +--- a/controller/pinctrl.c ++++ b/controller/pinctrl.c +@@ -329,6 +329,9 @@ static void bfd_monitor_init(void); + static void bfd_monitor_destroy(void); + static void bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) + OVS_REQUIRES(pinctrl_mutex); ++static void ++pinctrl_handle_bfd_msg(void) ++ OVS_REQUIRES(pinctrl_mutex); + static void bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, + struct ovsdb_idl_index *sbrec_port_binding_by_name, + const struct sbrec_chassis *chassis, +@@ -2975,6 +2978,12 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg) + ovs_mutex_unlock(&pinctrl_mutex); + break; + ++ case ACTION_OPCODE_BFD_MSG: ++ ovs_mutex_lock(&pinctrl_mutex); ++ pinctrl_handle_bfd_msg(); ++ ovs_mutex_unlock(&pinctrl_mutex); ++ break; ++ + default: + VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32, + ntohl(ah->opcode)); +@@ -6503,6 +6512,12 @@ next: + } + } + ++static void ++pinctrl_handle_bfd_msg(void) ++ OVS_REQUIRES(pinctrl_mutex) ++{ ++} ++ + static void + bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, + struct ovsdb_idl_index *sbrec_port_binding_by_name, +diff --git a/include/ovn/actions.h b/include/ovn/actions.h +index 9c1ebf4aa..d104d4d64 100644 +--- a/include/ovn/actions.h ++++ b/include/ovn/actions.h +@@ -105,6 +105,7 @@ struct ovn_extend_table; + OVNACT(CHK_LB_HAIRPIN, ovnact_result) \ + OVNACT(CHK_LB_HAIRPIN_REPLY, ovnact_result) \ + OVNACT(CT_SNAT_TO_VIP, ovnact_null) \ ++ OVNACT(BFD_MSG, ovnact_null) \ + + /* enum ovnact_type, with a member OVNACT_ for each action. */ + enum OVS_PACKED_ENUM ovnact_type { +@@ -627,6 +628,12 @@ enum action_opcode { + * The actions, in OpenFlow 1.3 format, follow the action_header. + */ + ACTION_OPCODE_REJECT, ++ ++ /* handle_bfd_msg { ...actions ...}." ++ * ++ * The actions, in OpenFlow 1.3 format, follow the action_header. ++ */ ++ ACTION_OPCODE_BFD_MSG, + }; + + /* Header. */ +diff --git a/lib/actions.c b/lib/actions.c +index fbaeb34bc..86be97f44 100644 +--- a/lib/actions.c ++++ b/lib/actions.c +@@ -2742,6 +2742,31 @@ encode_DHCP6_REPLY(const struct ovnact_null *a OVS_UNUSED, + encode_controller_op(ACTION_OPCODE_DHCP6_SERVER, ofpacts); + } + ++static void ++format_BFD_MSG(const struct ovnact_null *a OVS_UNUSED, struct ds *s) ++{ ++ ds_put_cstr(s, "handle_bfd_msg();"); ++} ++ ++static void ++encode_BFD_MSG(const struct ovnact_null *a OVS_UNUSED, ++ const struct ovnact_encode_params *ep OVS_UNUSED, ++ struct ofpbuf *ofpacts) ++{ ++ encode_controller_op(ACTION_OPCODE_BFD_MSG, ofpacts); ++} ++ ++static void ++parse_handle_bfd_msg(struct action_context *ctx OVS_UNUSED) ++{ ++ if (!lexer_force_match(ctx->lexer, LEX_T_LPAREN)) { ++ return; ++ } ++ ++ ovnact_put_BFD_MSG(ctx->ovnacts); ++ lexer_force_match(ctx->lexer, LEX_T_RPAREN); ++} ++ + static void + parse_SET_QUEUE(struct action_context *ctx) + { +@@ -3842,6 +3867,8 @@ parse_action(struct action_context *ctx) + parse_fwd_group_action(ctx); + } else if (lexer_match_id(ctx->lexer, "handle_dhcpv6_reply")) { + ovnact_put_DHCP6_REPLY(ctx->ovnacts); ++ } else if (lexer_match_id(ctx->lexer, "handle_bfd_msg")) { ++ parse_handle_bfd_msg(ctx); + } else if (lexer_match_id(ctx->lexer, "reject")) { + parse_REJECT(ctx); + } else if (lexer_match_id(ctx->lexer, "ct_snat_to_vip")) { +diff --git a/tests/ovn.at b/tests/ovn.at +index ce6db8677..27cb2e410 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -1807,6 +1807,10 @@ ct_snat_to_vip; + ct_snat_to_vip(foo); + Syntax error at `(' expecting `;'. + ++# bfd packets ++handle_bfd_msg(); ++ encodes as controller(userdata=00.00.00.17.00.00.00.00) ++ + # Miscellaneous negative tests. + ; + Syntax error at `;'. +diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c +index 465049d34..e3aa73fb7 100644 +--- a/utilities/ovn-trace.c ++++ b/utilities/ovn-trace.c +@@ -2544,6 +2544,8 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len, + break; + case OVNACT_DHCP6_REPLY: + break; ++ case OVNACT_BFD_MSG: ++ break; + } + } + ds_destroy(&s); +-- +2.29.2 + diff --git a/SOURCES/0014-controller-bfd-introduce-BFD-state-machine.patch b/SOURCES/0014-controller-bfd-introduce-BFD-state-machine.patch new file mode 100644 index 0000000..3a932e7 --- /dev/null +++ b/SOURCES/0014-controller-bfd-introduce-BFD-state-machine.patch @@ -0,0 +1,751 @@ +From e75d53c69261a0b104c75d8f6f7dc7175a690833 Mon Sep 17 00:00:00 2001 +Message-Id: +In-Reply-To: +References: +From: Lorenzo Bianconi +Date: Fri, 8 Jan 2021 17:36:22 +0100 +Subject: [PATCH 14/16] controller: bfd: introduce BFD state machine. + +Introduce BFD state machine for BFD packet parsing +according to RFC880 https://tools.ietf.org/html/rfc5880. +Introduce BFD logical flows in ovn-northd. + +Change-Id: I1ea057ad45393360fa917eb6e3a576dd37cfbc0d +Acked-by: Mark Michelson +Signed-off-by: Lorenzo Bianconi +Signed-off-by: Numan Siddique +--- + NEWS | 6 + + controller/pinctrl.c | 342 ++++++++++++++++++++++++++++++++++++++-- + northd/ovn-northd.8.xml | 21 +++ + northd/ovn-northd.c | 85 +++++++++- + tests/ovn-northd.at | 55 +++++++ + 5 files changed, 488 insertions(+), 21 deletions(-) + +diff --git a/NEWS b/NEWS +index f71ec329c..85f63503e 100644 +--- a/NEWS ++++ b/NEWS +@@ -1,3 +1,9 @@ ++Post-v20.12.0 ++------------------------- ++ - Support ECMP multiple nexthops for reroute router policies. ++ - BFD protocol support according to RFC880 [0]. IPv6 is not suported yet. ++ [0] https://tools.ietf.org/html/rfc5880) ++ + OVN v20.12.0 - 18 Dec 2020 + -------------------------- + - The "datapath" argument to ovn-trace is now optional, since the +diff --git a/controller/pinctrl.c b/controller/pinctrl.c +index deeae7479..6e363a0f9 100644 +--- a/controller/pinctrl.c ++++ b/controller/pinctrl.c +@@ -330,9 +330,10 @@ static void bfd_monitor_destroy(void); + static void bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) + OVS_REQUIRES(pinctrl_mutex); + static void +-pinctrl_handle_bfd_msg(void) ++pinctrl_handle_bfd_msg(const struct flow *ip_flow, struct dp_packet *pkt_in) + OVS_REQUIRES(pinctrl_mutex); +-static void bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, ++static void bfd_monitor_run(struct ovsdb_idl_txn *ovnsb_idl_txn, ++ const struct sbrec_bfd_table *bfd_table, + struct ovsdb_idl_index *sbrec_port_binding_by_name, + const struct sbrec_chassis *chassis, + const struct sset *active_tunnels) +@@ -2980,7 +2981,7 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg) + + case ACTION_OPCODE_BFD_MSG: + ovs_mutex_lock(&pinctrl_mutex); +- pinctrl_handle_bfd_msg(); ++ pinctrl_handle_bfd_msg(&headers, &packet); + ovs_mutex_unlock(&pinctrl_mutex); + break; + +@@ -3206,10 +3207,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, + local_datapaths); + sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name, + chassis); +- if (ovnsb_idl_txn) { +- bfd_monitor_run(bfd_table, sbrec_port_binding_by_name, chassis, +- active_tunnels); +- } ++ bfd_monitor_run(ovnsb_idl_txn, bfd_table, sbrec_port_binding_by_name, ++ chassis, active_tunnels); + ovs_mutex_unlock(&pinctrl_mutex); + } + +@@ -6345,8 +6344,48 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn, + + } + ++enum bfd_state { ++ BFD_STATE_ADMIN_DOWN, ++ BFD_STATE_DOWN, ++ BFD_STATE_INIT, ++ BFD_STATE_UP, ++}; ++ ++enum bfd_flags { ++ BFD_FLAG_MULTIPOINT = 1 << 0, ++ BFD_FLAG_DEMAND = 1 << 1, ++ BFD_FLAG_AUTH = 1 << 2, ++ BFD_FLAG_CTL = 1 << 3, ++ BFD_FLAG_FINAL = 1 << 4, ++ BFD_FLAG_POLL = 1 << 5 ++}; ++ ++#define BFD_FLAGS_MASK 0x3f ++ ++static char * ++bfd_get_status(enum bfd_state state) ++{ ++ switch (state) { ++ case BFD_STATE_ADMIN_DOWN: ++ return "admin_down"; ++ case BFD_STATE_DOWN: ++ return "down"; ++ case BFD_STATE_INIT: ++ return "init"; ++ case BFD_STATE_UP: ++ return "up"; ++ default: ++ return ""; ++ } ++} ++ + static struct hmap bfd_monitor_map; + ++#define BFD_UPDATE_BATCH_TH 10 ++static uint16_t bfd_pending_update; ++#define BFD_UPDATE_TIMEOUT 5000LL ++static long long bfd_last_update; ++ + struct bfd_entry { + struct hmap_node node; + bool erase; +@@ -6365,11 +6404,23 @@ struct bfd_entry { + * sessions on the system + */ + uint16_t udp_src; +- ovs_be32 disc; ++ ovs_be32 local_disc; ++ ovs_be32 remote_disc; ++ ++ uint32_t local_min_tx; ++ uint32_t local_min_rx; ++ uint32_t remote_min_rx; ++ ++ uint8_t local_mult; + + int64_t port_key; + int64_t metadata; + ++ enum bfd_state state; ++ bool change_state; ++ ++ uint32_t detection_timeout; ++ long long int last_rx; + long long int next_tx; + }; + +@@ -6377,6 +6428,7 @@ static void + bfd_monitor_init(void) + { + hmap_init(&bfd_monitor_map); ++ bfd_last_update = time_msec(); + } + + static void +@@ -6402,6 +6454,24 @@ pinctrl_find_bfd_monitor_entry_by_port(char *ip, uint16_t port) + return NULL; + } + ++static struct bfd_entry * ++pinctrl_find_bfd_monitor_entry_by_disc(ovs_be32 ip, ovs_be32 disc) ++{ ++ char *ip_src = xasprintf(IP_FMT, IP_ARGS(ip)); ++ struct bfd_entry *ret = NULL, *entry; ++ ++ HMAP_FOR_EACH_WITH_HASH (entry, node, hash_string(ip_src, 0), ++ &bfd_monitor_map) { ++ if (entry->local_disc == disc) { ++ ret = entry; ++ break; ++ } ++ } ++ ++ free(ip_src); ++ return ret; ++} ++ + static bool + bfd_monitor_should_inject(void) + { +@@ -6453,9 +6523,60 @@ bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet) + udp->udp_dst = htons(BFD_DEST_PORT); + udp->udp_len = htons(sizeof *udp + sizeof *msg); + +- msg = dp_packet_put_uninit(packet, sizeof *msg); ++ msg = dp_packet_put_zeros(packet, sizeof *msg); + msg->vers_diag = (BFD_VERSION << 5); ++ msg->mult = entry->local_mult; + msg->length = BFD_PACKET_LEN; ++ msg->flags = entry->state << 6; ++ msg->my_disc = entry->local_disc; ++ msg->your_disc = entry->remote_disc; ++ /* min_tx and min_rx are in us - RFC 5880 page 9 */ ++ msg->min_tx = htonl(entry->local_min_tx * 1000); ++ msg->min_rx = htonl(entry->local_min_rx * 1000); ++} ++ ++static bool ++bfd_monitor_need_update(void) ++{ ++ long long int cur_time = time_msec(); ++ ++ if (bfd_pending_update == BFD_UPDATE_BATCH_TH) { ++ goto update; ++ } ++ ++ if (bfd_pending_update && ++ bfd_last_update + BFD_UPDATE_TIMEOUT < cur_time) { ++ goto update; ++ } ++ return false; ++ ++update: ++ bfd_last_update = cur_time; ++ bfd_pending_update = 0; ++ return true; ++} ++ ++static void ++bfd_check_detection_timeout(struct bfd_entry *entry) ++{ ++ if (entry->state == BFD_STATE_ADMIN_DOWN) { ++ return; ++ } ++ ++ if (!entry->detection_timeout) { ++ return; ++ } ++ ++ long long int cur_time = time_msec(); ++ if (cur_time < entry->last_rx + entry->detection_timeout) { ++ return; ++ } ++ ++ entry->state = BFD_STATE_DOWN; ++ entry->change_state = true; ++ bfd_last_update = cur_time; ++ bfd_pending_update = 0; ++ notify_pinctrl_main(); + } + + static void +@@ -6465,11 +6586,27 @@ bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) + long long int cur_time = time_msec(); + struct bfd_entry *entry; + ++ if (bfd_monitor_need_update()) { ++ notify_pinctrl_main(); ++ } ++ + HMAP_FOR_EACH (entry, node, &bfd_monitor_map) { ++ unsigned long tx_timeout; ++ ++ bfd_check_detection_timeout(entry); ++ + if (cur_time < entry->next_tx) { + goto next; + } + ++ if (!entry->remote_min_rx) { ++ continue; ++ } ++ ++ if (entry->state == BFD_STATE_ADMIN_DOWN) { ++ continue; ++ } ++ + uint64_t packet_stub[256 / 8]; + struct dp_packet packet; + dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); +@@ -6504,7 +6641,9 @@ bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) + dp_packet_uninit(&packet); + ofpbuf_uninit(&ofpacts); + +- entry->next_tx = cur_time + 5000; ++ tx_timeout = MAX(entry->local_min_tx, entry->remote_min_rx); ++ tx_timeout -= random_range((tx_timeout * 25) / 100); ++ entry->next_tx = cur_time + tx_timeout; + next: + if (*bfd_time > entry->next_tx) { + *bfd_time = entry->next_tx; +@@ -6512,14 +6651,167 @@ next: + } + } + ++static bool ++pinctrl_check_bfd_msg(const struct flow *ip_flow, struct dp_packet *pkt_in) ++{ ++ if (ip_flow->dl_type != htons(ETH_TYPE_IP) && ++ ip_flow->dl_type != htons(ETH_TYPE_IPV6)) { ++ return false; ++ } ++ ++ if (ip_flow->nw_proto != IPPROTO_UDP) { ++ return false; ++ } ++ ++ struct udp_header *udp_hdr = dp_packet_l4(pkt_in); ++ if (udp_hdr->udp_dst != htons(BFD_DEST_PORT)) { ++ return false; ++ } ++ ++ const struct bfd_msg *msg = dp_packet_get_udp_payload(pkt_in); ++ uint8_t version = msg->vers_diag >> 5; ++ if (version != BFD_VERSION) { ++ return false; ++ } ++ ++ enum bfd_flags flags = msg->flags & BFD_FLAGS_MASK; ++ if (flags & BFD_FLAG_AUTH) { ++ /* AUTH not supported yet */ ++ return false; ++ } ++ ++ if (msg->length < BFD_PACKET_LEN) { ++ return false; ++ } ++ ++ if (!msg->mult) { ++ return false; ++ } ++ ++ if (flags & BFD_FLAG_MULTIPOINT) { ++ return false; ++ } ++ ++ if (!msg->my_disc) { ++ return false; ++ } ++ ++ enum bfd_state peer_state = msg->flags >> 6; ++ if (peer_state >= BFD_STATE_INIT && !msg->your_disc) { ++ return false; ++ } ++ ++ return true; ++} ++ + static void +-pinctrl_handle_bfd_msg(void) ++pinctrl_handle_bfd_msg(const struct flow *ip_flow, struct dp_packet *pkt_in) + OVS_REQUIRES(pinctrl_mutex) + { ++ if (!pinctrl_check_bfd_msg(ip_flow, pkt_in)) { ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); ++ VLOG_WARN_RL(&rl, "BFD packet discarded"); ++ return; ++ } ++ ++ const struct bfd_msg *msg = dp_packet_get_udp_payload(pkt_in); ++ struct bfd_entry *entry = pinctrl_find_bfd_monitor_entry_by_disc( ++ ip_flow->nw_src, msg->your_disc); ++ if (!entry) { ++ return; ++ } ++ ++ bool change_state = false; ++ entry->remote_disc = msg->my_disc; ++ uint32_t remote_min_tx = ntohl(msg->min_tx) / 1000; ++ entry->remote_min_rx = ntohl(msg->min_rx) / 1000; ++ entry->detection_timeout = msg->mult * MAX(remote_min_tx, ++ entry->local_min_rx); ++ ++ enum bfd_state peer_state = msg->flags >> 6; ++ if (peer_state == BFD_STATE_ADMIN_DOWN && ++ entry->state >= BFD_STATE_INIT) { ++ entry->state = BFD_STATE_DOWN; ++ entry->last_rx = time_msec(); ++ change_state = true; ++ goto out; ++ } ++ ++ /* bfd state machine */ ++ switch (entry->state) { ++ case BFD_STATE_DOWN: ++ if (peer_state == BFD_STATE_DOWN) { ++ entry->state = BFD_STATE_INIT; ++ change_state = true; ++ } ++ if (peer_state == BFD_STATE_INIT) { ++ entry->state = BFD_STATE_UP; ++ change_state = true; ++ } ++ entry->last_rx = time_msec(); ++ break; ++ case BFD_STATE_INIT: ++ if (peer_state == BFD_STATE_INIT || ++ peer_state == BFD_STATE_UP) { ++ entry->state = BFD_STATE_UP; ++ change_state = true; ++ } ++ if (peer_state == BFD_STATE_ADMIN_DOWN) { ++ entry->state = BFD_STATE_DOWN; ++ change_state = true; ++ } ++ entry->last_rx = time_msec(); ++ break; ++ case BFD_STATE_UP: ++ if (peer_state == BFD_STATE_ADMIN_DOWN || ++ peer_state == BFD_STATE_DOWN) { ++ entry->state = BFD_STATE_DOWN; ++ change_state = true; ++ } ++ entry->last_rx = time_msec(); ++ break; ++ case BFD_STATE_ADMIN_DOWN: ++ default: ++ break; ++ } ++ ++out: ++ /* let's try to bacth db updates */ ++ if (change_state) { ++ entry->change_state = true; ++ bfd_pending_update++; ++ } ++ if (bfd_monitor_need_update()) { ++ notify_pinctrl_main(); ++ } ++} ++ ++static void ++bfd_monitor_check_sb_conf(const struct sbrec_bfd *sb_bt, ++ struct bfd_entry *entry) ++{ ++ ovs_be32 ip_dst; ++ ++ if (ip_parse(sb_bt->dst_ip, &ip_dst) && ip_dst != entry->ip_dst) { ++ entry->ip_dst = ip_dst; ++ } ++ ++ if (sb_bt->min_tx != entry->local_min_tx) { ++ entry->local_min_tx = sb_bt->min_tx; ++ } ++ ++ if (sb_bt->min_rx != entry->local_min_rx) { ++ entry->local_min_rx = sb_bt->min_rx; ++ } ++ ++ if (sb_bt->detect_mult != entry->local_mult) { ++ entry->local_mult = sb_bt->detect_mult; ++ } + } + + static void +-bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, ++bfd_monitor_run(struct ovsdb_idl_txn *ovnsb_idl_txn, ++ const struct sbrec_bfd_table *bfd_table, + struct ovsdb_idl_index *sbrec_port_binding_by_name, + const struct sbrec_chassis *chassis, + const struct sset *active_tunnels) +@@ -6599,15 +6891,39 @@ bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, + entry->ip_src = ip_src; + entry->ip_dst = ip_dst; + entry->udp_src = bt->src_port; +- entry->disc = htonl(bt->disc); ++ entry->local_disc = htonl(bt->disc); + entry->next_tx = cur_time; ++ entry->last_rx = cur_time; ++ entry->detection_timeout = 30000; + entry->metadata = pb->datapath->tunnel_key; + entry->port_key = pb->tunnel_key; ++ entry->state = BFD_STATE_ADMIN_DOWN; ++ entry->local_min_tx = bt->min_tx; ++ entry->local_min_rx = bt->min_rx; ++ entry->remote_min_rx = 1; /* RFC5880 page 29 */ ++ entry->local_mult = bt->detect_mult; + + uint32_t hash = hash_string(bt->dst_ip, 0); + hmap_insert(&bfd_monitor_map, &entry->node, hash); ++ } else if (!strcmp(bt->status, "admin_down") && ++ entry->state != BFD_STATE_ADMIN_DOWN) { ++ entry->state = BFD_STATE_ADMIN_DOWN; ++ entry->change_state = false; ++ entry->remote_disc = 0; ++ } else if (strcmp(bt->status, "admin_down") && ++ entry->state == BFD_STATE_ADMIN_DOWN) { ++ entry->state = BFD_STATE_DOWN; ++ entry->change_state = false; ++ entry->remote_disc = 0; + changed = true; ++ } else if (entry->change_state && ovnsb_idl_txn) { ++ if (entry->state == BFD_STATE_DOWN) { ++ entry->remote_disc = 0; ++ } ++ sbrec_bfd_set_status(bt, bfd_get_status(entry->state)); ++ entry->change_state = false; + } ++ bfd_monitor_check_sb_conf(bt, entry); + entry->erase = false; + } + +diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml +index 1f0f71f34..48c52a56a 100644 +--- a/northd/ovn-northd.8.xml ++++ b/northd/ovn-northd.8.xml +@@ -1936,6 +1936,27 @@ next; +

    + + ++
  • ++

    ++ For each BFD port the two following priority-110 flows are added ++ to manage BFD traffic: ++ ++

      ++
    • ++ if ip4.src or ip6.src is any IP ++ address owned by the router port and udp.dst == 3784 ++ , the packet is advanced to the next pipeline stage. ++
    • ++ ++
    • ++ if ip4.dst or ip6.dst is any IP ++ address owned by the router port and udp.dst == 3784 ++ , the handle_bfd_msg action is executed. ++
    • ++
    ++

    ++
  • ++ +
  • +

    + L3 admission control: A priority-100 flow drops packets that match +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 77ea2181c..363bb0895 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -1473,6 +1473,8 @@ struct ovn_port { + + bool has_unknown; /* If the addresses have 'unknown' defined. */ + ++ bool has_bfd; ++ + /* The port's peer: + * + * - A switch port S of type "router" has a router port R as a peer, +@@ -7597,7 +7599,8 @@ static int bfd_get_unused_port(unsigned long *bfd_src_ports) + } + + static void +-build_bfd_table(struct northd_context *ctx, struct hmap *bfd_connections) ++build_bfd_table(struct northd_context *ctx, struct hmap *bfd_connections, ++ struct hmap *ports) + { + struct hmap sb_only = HMAP_INITIALIZER(&sb_only); + const struct sbrec_bfd *sb_bt; +@@ -7661,9 +7664,18 @@ build_bfd_table(struct northd_context *ctx, struct hmap *bfd_connections) + hash = hash_string(bfd_e->sb_bt->logical_port, hash); + hmap_insert(bfd_connections, &bfd_e->hmap_node, hash); + } ++ ++ struct ovn_port *op = ovn_port_find(ports, nb_bt->logical_port); ++ if (op) { ++ op->has_bfd = true; ++ } + } + + HMAP_FOR_EACH_POP (bfd_e, hmap_node, &sb_only) { ++ struct ovn_port *op = ovn_port_find(ports, bfd_e->sb_bt->logical_port); ++ if (op) { ++ op->has_bfd = false; ++ } + sbrec_bfd_delete(bfd_e->sb_bt); + free(bfd_e); + } +@@ -8423,16 +8435,15 @@ add_route(struct hmap *lflows, const struct ovn_port *op, + build_route_match(op_inport, network_s, plen, is_src_route, is_ipv4, + &match, &priority); + +- struct ds actions = DS_EMPTY_INITIALIZER; +- ds_put_format(&actions, "ip.ttl--; "REG_ECMP_GROUP_ID" = 0; %s = ", ++ struct ds common_actions = DS_EMPTY_INITIALIZER; ++ ds_put_format(&common_actions, REG_ECMP_GROUP_ID" = 0; %s = ", + is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6); +- + if (gateway) { +- ds_put_cstr(&actions, gateway); ++ ds_put_cstr(&common_actions, gateway); + } else { +- ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6"); ++ ds_put_format(&common_actions, "ip%s.dst", is_ipv4 ? "4" : "6"); + } +- ds_put_format(&actions, "; " ++ ds_put_format(&common_actions, "; " + "%s = %s; " + "eth.src = %s; " + "outport = %s; " +@@ -8442,11 +8453,20 @@ add_route(struct hmap *lflows, const struct ovn_port *op, + lrp_addr_s, + op->lrp_networks.ea_s, + op->json_key); ++ struct ds actions = DS_EMPTY_INITIALIZER; ++ ds_put_format(&actions, "ip.ttl--; %s", ds_cstr(&common_actions)); + + ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_ROUTING, priority, + ds_cstr(&match), ds_cstr(&actions), + stage_hint); ++ if (op->has_bfd) { ++ ds_put_format(&match, " && udp.dst == 3784"); ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_ROUTING, ++ priority + 1, ds_cstr(&match), ++ ds_cstr(&common_actions), stage_hint); ++ } + ds_destroy(&match); ++ ds_destroy(&common_actions); + ds_destroy(&actions); + } + +@@ -9108,6 +9128,52 @@ build_lrouter_force_snat_flows(struct hmap *lflows, struct ovn_datapath *od, + ds_destroy(&actions); + } + ++static void ++build_lrouter_bfd_flows(struct hmap *lflows, struct ovn_port *op) ++{ ++ if (!op->has_bfd) { ++ return; ++ } ++ ++ struct ds ip_list = DS_EMPTY_INITIALIZER; ++ struct ds match = DS_EMPTY_INITIALIZER; ++ ++ if (op->lrp_networks.n_ipv4_addrs) { ++ op_put_v4_networks(&ip_list, op, false); ++ ds_put_format(&match, "ip4.src == %s && udp.dst == 3784", ++ ds_cstr(&ip_list)); ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 110, ++ ds_cstr(&match), "next; ", ++ &op->nbrp->header_); ++ ds_clear(&match); ++ ds_put_format(&match, "ip4.dst == %s && udp.dst == 3784", ++ ds_cstr(&ip_list)); ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 110, ++ ds_cstr(&match), "handle_bfd_msg(); ", ++ &op->nbrp->header_); ++ } ++ if (op->lrp_networks.n_ipv6_addrs) { ++ ds_clear(&ip_list); ++ ds_clear(&match); ++ ++ op_put_v6_networks(&ip_list, op); ++ ds_put_format(&match, "ip6.src == %s && udp.dst == 3784", ++ ds_cstr(&ip_list)); ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 110, ++ ds_cstr(&match), "next; ", ++ &op->nbrp->header_); ++ ds_clear(&match); ++ ds_put_format(&match, "ip6.dst == %s && udp.dst == 3784", ++ ds_cstr(&ip_list)); ++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 110, ++ ds_cstr(&match), "handle_bfd_msg(); ", ++ &op->nbrp->header_); ++ } ++ ++ ds_destroy(&ip_list); ++ ds_destroy(&match); ++} ++ + /* Logical router ingress Table 0: L2 Admission Control + * Generic admission control flows (without inport check). + */ +@@ -10614,6 +10680,9 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op, + &op->nbrp->header_); + } + ++ /* BFD msg handling */ ++ build_lrouter_bfd_flows(lflows, op); ++ + /* ICMP time exceeded */ + for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { + ds_clear(match); +@@ -12724,7 +12793,7 @@ ovnnb_db_run(struct northd_context *ctx, + build_ip_mcast(ctx, datapaths); + build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups); + build_meter_groups(ctx, &meter_groups); +- build_bfd_table(ctx, &bfd_connections); ++ build_bfd_table(ctx, &bfd_connections, ports); + build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups, + &igmp_groups, &meter_groups, &lbs); + ovn_update_ipv6_prefix(ports); +diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at +index ce6c44db4..eee004328 100644 +--- a/tests/ovn-northd.at ++++ b/tests/ovn-northd.at +@@ -2322,3 +2322,58 @@ sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == /' | sort], [0], + ]) + + AT_CLEANUP ++ ++AT_SETUP([ovn -- check BFD config propagation to SBDB]) ++AT_KEYWORDS([northd-bfd]) ++ovn_start ++ ++check ovn-nbctl --wait=sb lr-add r0 ++for i in $(seq 1 4); do ++ check ovn-nbctl --wait=sb lrp-add r0 r0-sw$i 00:00:00:00:00:0$i 192.168.$i.1/24 ++ check ovn-nbctl --wait=sb ls-add sw$i ++ check ovn-nbctl --wait=sb lsp-add sw$i sw$i-r0 ++ check ovn-nbctl --wait=sb lsp-set-type sw$i-r0 router ++ check ovn-nbctl --wait=sb lsp-set-options sw$i-r0 router-port=r0-sw$i ++ check ovn-nbctl --wait=sb lsp-set-addresses sw$i-r0 00:00:00:00:00:0$i ++done ++ ++uuid=$(ovn-nbctl create bfd logical_port=r0-sw1 dst_ip=192.168.10.2 status=down min_tx=250 min_rx=250 detect_mult=10) ++ovn-nbctl create bfd logical_port=r0-sw2 dst_ip=192.168.20.2 status=down min_tx=500 min_rx=500 detect_mult=20 ++ovn-nbctl create bfd logical_port=r0-sw3 dst_ip=192.168.30.2 status=down ++ovn-nbctl create bfd logical_port=r0-sw4 dst_ip=192.168.40.2 status=down min_tx=0 detect_mult=0 ++ ++check_column 10 bfd detect_mult logical_port=r0-sw1 ++check_column "192.168.10.2" bfd dst_ip logical_port=r0-sw1 ++check_column 250 bfd min_rx logical_port=r0-sw1 ++check_column 250 bfd min_tx logical_port=r0-sw1 ++check_column admin_down bfd status logical_port=r0-sw1 ++ ++check_column 20 bfd detect_mult logical_port=r0-sw2 ++check_column "192.168.20.2" bfd dst_ip logical_port=r0-sw2 ++check_column 500 bfd min_rx logical_port=r0-sw2 ++check_column 500 bfd min_tx logical_port=r0-sw2 ++check_column admin_down bfd status logical_port=r0-sw2 ++ ++check_column 5 bfd detect_mult logical_port=r0-sw3 ++check_column "192.168.30.2" bfd dst_ip logical_port=r0-sw3 ++check_column 1000 bfd min_rx logical_port=r0-sw3 ++check_column 1000 bfd min_tx logical_port=r0-sw3 ++check_column admin_down bfd status logical_port=r0-sw3 ++ ++uuid=$(fetch_column nb:bfd _uuid logical_port=r0-sw1) ++check ovn-nbctl set bfd $uuid min_tx=1000 ++check ovn-nbctl set bfd $uuid min_rx=1000 ++check ovn-nbctl set bfd $uuid detect_mult=100 ++ ++uuid_2=$(fetch_column nb:bfd _uuid logical_port=r0-sw2) ++check ovn-nbctl clear bfd $uuid_2 min_rx ++check_column 1000 bfd min_rx logical_port=r0-sw2 ++ ++check_column 1000 bfd min_tx logical_port=r0-sw1 ++check_column 1000 bfd min_rx logical_port=r0-sw1 ++check_column 100 bfd detect_mult logical_port=r0-sw1 ++ ++ovn-nbctl destroy bfd $uuid ++check_row_count bfd 2 ++ ++AT_CLEANUP +-- +2.29.2 + diff --git a/SOURCES/0015-bfd-support-demand-mode-on-rx-side.patch b/SOURCES/0015-bfd-support-demand-mode-on-rx-side.patch new file mode 100644 index 0000000..9765cbb --- /dev/null +++ b/SOURCES/0015-bfd-support-demand-mode-on-rx-side.patch @@ -0,0 +1,202 @@ +From a3a3062985cadc2f2193b10ccb3404d587028c61 Mon Sep 17 00:00:00 2001 +Message-Id: +In-Reply-To: +References: +From: Lorenzo Bianconi +Date: Fri, 8 Jan 2021 17:36:23 +0100 +Subject: [PATCH 15/16] bfd: support demand mode on rx side. + +Introduce rx demand mode support according to RFC5880 [0]. +Demand mode on tx side is not supported yet. + +https://tools.ietf.org/html/rfc5880 +Acked-by: Mark Michelson +Signed-off-by: Lorenzo Bianconi +Signed-off-by: Numan Siddique +--- + controller/pinctrl.c | 105 ++++++++++++++++++++++++++++--------------- + 1 file changed, 68 insertions(+), 37 deletions(-) + +diff --git a/controller/pinctrl.c b/controller/pinctrl.c +index 6e363a0f9..5820ab659 100644 +--- a/controller/pinctrl.c ++++ b/controller/pinctrl.c +@@ -330,7 +330,8 @@ static void bfd_monitor_destroy(void); + static void bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) + OVS_REQUIRES(pinctrl_mutex); + static void +-pinctrl_handle_bfd_msg(const struct flow *ip_flow, struct dp_packet *pkt_in) ++pinctrl_handle_bfd_msg(struct rconn *swconn, const struct flow *ip_flow, ++ struct dp_packet *pkt_in) + OVS_REQUIRES(pinctrl_mutex); + static void bfd_monitor_run(struct ovsdb_idl_txn *ovnsb_idl_txn, + const struct sbrec_bfd_table *bfd_table, +@@ -2981,7 +2982,7 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg) + + case ACTION_OPCODE_BFD_MSG: + ovs_mutex_lock(&pinctrl_mutex); +- pinctrl_handle_bfd_msg(&headers, &packet); ++ pinctrl_handle_bfd_msg(swconn, &headers, &packet); + ovs_mutex_unlock(&pinctrl_mutex); + break; + +@@ -6411,6 +6412,8 @@ struct bfd_entry { + uint32_t local_min_rx; + uint32_t remote_min_rx; + ++ bool remote_demand_mode; ++ + uint8_t local_mult; + + int64_t port_key; +@@ -6495,7 +6498,8 @@ bfd_monitor_wait(long long int timeout) + } + + static void +-bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet) ++bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet, ++ bool final) + { + struct udp_header *udp; + struct bfd_msg *msg; +@@ -6527,7 +6531,8 @@ bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet) + msg->vers_diag = (BFD_VERSION << 5); + msg->mult = entry->local_mult; + msg->length = BFD_PACKET_LEN; +- msg->flags = entry->state << 6; ++ msg->flags = final ? BFD_FLAG_FINAL : 0; ++ msg->flags |= entry->state << 6; + msg->my_disc = entry->local_disc; + msg->your_disc = entry->remote_disc; + /* min_tx and min_rx are in us - RFC 5880 page 9 */ +@@ -6535,6 +6540,46 @@ bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet) + msg->min_rx = htonl(entry->local_min_rx * 1000); + } + ++static void ++pinctrl_send_bfd_tx_msg(struct rconn *swconn, struct bfd_entry *entry, ++ bool final) ++{ ++ uint64_t packet_stub[256 / 8]; ++ struct dp_packet packet; ++ dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); ++ bfd_monitor_put_bfd_msg(entry, &packet, final); ++ ++ uint64_t ofpacts_stub[4096 / 8]; ++ struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); ++ ++ /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */ ++ uint32_t dp_key = entry->metadata; ++ uint32_t port_key = entry->port_key; ++ put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts); ++ put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts); ++ put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts); ++ struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); ++ resubmit->in_port = OFPP_CONTROLLER; ++ resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE; ++ ++ struct ofputil_packet_out po = { ++ .packet = dp_packet_data(&packet), ++ .packet_len = dp_packet_size(&packet), ++ .buffer_id = UINT32_MAX, ++ .ofpacts = ofpacts.data, ++ .ofpacts_len = ofpacts.size, ++ }; ++ ++ match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); ++ enum ofp_version version = rconn_get_version(swconn); ++ enum ofputil_protocol proto = ++ ofputil_protocol_from_ofp_version(version); ++ queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); ++ dp_packet_uninit(&packet); ++ ofpbuf_uninit(&ofpacts); ++} ++ ++ + static bool + bfd_monitor_need_update(void) + { +@@ -6607,39 +6652,11 @@ bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) + continue; + } + +- uint64_t packet_stub[256 / 8]; +- struct dp_packet packet; +- dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); +- bfd_monitor_put_bfd_msg(entry, &packet); +- +- uint64_t ofpacts_stub[4096 / 8]; +- struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); +- +- /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */ +- uint32_t dp_key = entry->metadata; +- uint32_t port_key = entry->port_key; +- put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts); +- put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts); +- put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts); +- struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); +- resubmit->in_port = OFPP_CONTROLLER; +- resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE; +- +- struct ofputil_packet_out po = { +- .packet = dp_packet_data(&packet), +- .packet_len = dp_packet_size(&packet), +- .buffer_id = UINT32_MAX, +- .ofpacts = ofpacts.data, +- .ofpacts_len = ofpacts.size, +- }; ++ if (entry->remote_demand_mode) { ++ continue; ++ } + +- match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); +- enum ofp_version version = rconn_get_version(swconn); +- enum ofputil_protocol proto = +- ofputil_protocol_from_ofp_version(version); +- queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); +- dp_packet_uninit(&packet); +- ofpbuf_uninit(&ofpacts); ++ pinctrl_send_bfd_tx_msg(swconn, entry, false); + + tx_timeout = MAX(entry->local_min_tx, entry->remote_min_rx); + tx_timeout -= random_range((tx_timeout * 25) / 100); +@@ -6696,6 +6713,10 @@ pinctrl_check_bfd_msg(const struct flow *ip_flow, struct dp_packet *pkt_in) + return false; + } + ++ if ((flags & BFD_FLAG_FINAL) && (flags & BFD_FLAG_POLL)) { ++ return false; ++ } ++ + enum bfd_state peer_state = msg->flags >> 6; + if (peer_state >= BFD_STATE_INIT && !msg->your_disc) { + return false; +@@ -6705,7 +6726,8 @@ pinctrl_check_bfd_msg(const struct flow *ip_flow, struct dp_packet *pkt_in) + } + + static void +-pinctrl_handle_bfd_msg(const struct flow *ip_flow, struct dp_packet *pkt_in) ++pinctrl_handle_bfd_msg(struct rconn *swconn, const struct flow *ip_flow, ++ struct dp_packet *pkt_in) + OVS_REQUIRES(pinctrl_mutex) + { + if (!pinctrl_check_bfd_msg(ip_flow, pkt_in)) { +@@ -6775,6 +6797,15 @@ pinctrl_handle_bfd_msg(const struct flow *ip_flow, struct dp_packet *pkt_in) + break; + } + ++ if (entry->state == BFD_STATE_UP && ++ (msg->flags & BFD_FLAG_DEMAND)) { ++ entry->remote_demand_mode = true; ++ } ++ ++ if (msg->flags & BFD_FLAG_POLL) { ++ pinctrl_send_bfd_tx_msg(swconn, entry, true); ++ } ++ + out: + /* let's try to bacth db updates */ + if (change_state) { +-- +2.29.2 + diff --git a/SOURCES/0016-ovn-integrate-bfd-for-static-routes.patch b/SOURCES/0016-ovn-integrate-bfd-for-static-routes.patch new file mode 100644 index 0000000..7fae1d5 --- /dev/null +++ b/SOURCES/0016-ovn-integrate-bfd-for-static-routes.patch @@ -0,0 +1,407 @@ +From 986137dc1d4dc6905a7c5ab5e279856260966e12 Mon Sep 17 00:00:00 2001 +Message-Id: <986137dc1d4dc6905a7c5ab5e279856260966e12.1610458802.git.lorenzo.bianconi@redhat.com> +In-Reply-To: +References: +From: Lorenzo Bianconi +Date: Fri, 8 Jan 2021 17:36:24 +0100 +Subject: [PATCH 16/16] ovn: integrate bfd for static routes. + +Introduce the bfd reference in logical_router_static_router table +in order to check if the next-hop is properly running using the BFD +protocol. The CMS is supposed to populate bfd column with the proper +reference otherwise the BFD status is set to admin_down. +Add BFD tests in system-ovn.at. + +Acked-by: Mark Michelson +Signed-off-by: Lorenzo Bianconi +Signed-off-by: Numan Siddique +--- + NEWS | 3 +- + northd/ovn-northd.c | 45 +++++++++++---- + ovn-nb.ovsschema | 6 +- + ovn-nb.xml | 7 +++ + tests/atlocal.in | 3 + + tests/ovn-nbctl.at | 8 ++- + tests/ovn-northd.at | 8 +++ + tests/system-ovn.at | 136 ++++++++++++++++++++++++++++++++++++++++++++ + 8 files changed, 203 insertions(+), 13 deletions(-) + +diff --git a/NEWS b/NEWS +index 85f63503e..0b4b8f4d3 100644 +--- a/NEWS ++++ b/NEWS +@@ -1,7 +1,8 @@ + Post-v20.12.0 + ------------------------- + - Support ECMP multiple nexthops for reroute router policies. +- - BFD protocol support according to RFC880 [0]. IPv6 is not suported yet. ++ - BFD protocol support according to RFC880 [0]. Introduce next-hop BFD ++ availability check for OVN static routes. IPv6 is not suported yet. + [0] https://tools.ietf.org/html/rfc5880) + + OVN v20.12.0 - 18 Dec 2020 +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 363bb0895..fa2bd73c3 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -7952,7 +7952,8 @@ route_hash(struct parsed_route *route) + * Otherwise return NULL. */ + static struct parsed_route * + parsed_routes_add(struct ovs_list *routes, +- const struct nbrec_logical_router_static_route *route) ++ const struct nbrec_logical_router_static_route *route, ++ struct hmap *bfd_connections) + { + /* Verify that the next hop is an IP address with an all-ones mask. */ + struct in6_addr nexthop; +@@ -7993,6 +7994,25 @@ parsed_routes_add(struct ovs_list *routes, + return NULL; + } + ++ const struct nbrec_bfd *nb_bt = route->bfd; ++ if (nb_bt && !strcmp(nb_bt->dst_ip, route->nexthop)) { ++ struct bfd_entry *bfd_e; ++ ++ bfd_e = bfd_port_lookup(bfd_connections, nb_bt->logical_port, ++ nb_bt->dst_ip); ++ if (bfd_e) { ++ bfd_e->ref = true; ++ } ++ ++ if (!strcmp(nb_bt->status, "admin_down")) { ++ nbrec_bfd_set_status(nb_bt, "down"); ++ } ++ ++ if (!strcmp(nb_bt->status, "down")) { ++ return NULL; ++ } ++ } ++ + struct parsed_route *pr = xzalloc(sizeof *pr); + pr->prefix = prefix; + pr->plen = plen; +@@ -9579,7 +9599,7 @@ build_ip_routing_flows_for_lrouter_port( + static void + build_static_route_flows_for_lrouter( + struct ovn_datapath *od, struct hmap *lflows, +- struct hmap *ports) ++ struct hmap *ports, struct hmap *bfd_connections) + { + if (od->nbr) { + ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, 150, +@@ -9591,7 +9611,8 @@ build_static_route_flows_for_lrouter( + struct ecmp_groups_node *group; + for (int i = 0; i < od->nbr->n_static_routes; i++) { + struct parsed_route *route = +- parsed_routes_add(&parsed_routes, od->nbr->static_routes[i]); ++ parsed_routes_add(&parsed_routes, od->nbr->static_routes[i], ++ bfd_connections); + if (!route) { + continue; + } +@@ -11571,7 +11592,8 @@ struct lswitch_flow_build_info { + + static void + build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od, +- struct lswitch_flow_build_info *lsi) ++ struct lswitch_flow_build_info *lsi, ++ struct hmap *bfd_connections) + { + /* Build Logical Switch Flows. */ + build_lswitch_lflows_pre_acl_and_acl(od, lsi->port_groups, lsi->lflows, +@@ -11591,7 +11613,8 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od, + build_neigh_learning_flows_for_lrouter(od, lsi->lflows, &lsi->match, + &lsi->actions); + build_ND_RA_flows_for_lrouter(od, lsi->lflows); +- build_static_route_flows_for_lrouter(od, lsi->lflows, lsi->ports); ++ build_static_route_flows_for_lrouter(od, lsi->lflows, lsi->ports, ++ bfd_connections); + build_mcast_lookup_flows_for_lrouter(od, lsi->lflows, &lsi->match, + &lsi->actions); + build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->ports); +@@ -11655,7 +11678,8 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + struct hmap *port_groups, struct hmap *lflows, + struct hmap *mcgroups, + struct hmap *igmp_groups, +- struct shash *meter_groups, struct hmap *lbs) ++ struct shash *meter_groups, struct hmap *lbs, ++ struct hmap *bfd_connections) + { + struct ovn_datapath *od; + struct ovn_port *op; +@@ -11682,7 +11706,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + * will move here and will be reogranized by iterator type. + */ + HMAP_FOR_EACH (od, key_node, datapaths) { +- build_lswitch_and_lrouter_iterate_by_od(od, &lsi); ++ build_lswitch_and_lrouter_iterate_by_od(od, &lsi, bfd_connections); + } + HMAP_FOR_EACH (op, key_node, ports) { + build_lswitch_and_lrouter_iterate_by_op(op, &lsi); +@@ -11780,13 +11804,14 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, + struct hmap *ports, struct hmap *port_groups, + struct hmap *mcgroups, struct hmap *igmp_groups, + struct shash *meter_groups, +- struct hmap *lbs) ++ struct hmap *lbs, struct hmap *bfd_connections) + { + struct hmap lflows = HMAP_INITIALIZER(&lflows); + + build_lswitch_and_lrouter_flows(datapaths, ports, + port_groups, &lflows, mcgroups, +- igmp_groups, meter_groups, lbs); ++ igmp_groups, meter_groups, lbs, ++ bfd_connections); + + /* Collecting all unique datapath groups. */ + struct hmap dp_groups = HMAP_INITIALIZER(&dp_groups); +@@ -12795,7 +12820,7 @@ ovnnb_db_run(struct northd_context *ctx, + build_meter_groups(ctx, &meter_groups); + build_bfd_table(ctx, &bfd_connections, ports); + build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups, +- &igmp_groups, &meter_groups, &lbs); ++ &igmp_groups, &meter_groups, &lbs, &bfd_connections); + ovn_update_ipv6_prefix(ports); + + sync_address_sets(ctx); +diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema +index aea932f55..29019809c 100644 +--- a/ovn-nb.ovsschema ++++ b/ovn-nb.ovsschema +@@ -1,7 +1,7 @@ + { + "name": "OVN_Northbound", + "version": "5.31.0", +- "cksum": "1511492848 28473", ++ "cksum": "2352750632 28701", + "tables": { + "NB_Global": { + "columns": { +@@ -374,6 +374,10 @@ + "min": 0, "max": 1}}, + "nexthop": {"type": "string"}, + "output_port": {"type": {"key": "string", "min": 0, "max": 1}}, ++ "bfd": {"type": {"key": {"type": "uuid", "refTable": "BFD", ++ "refType": "weak"}, ++ "min": 0, ++ "max": 1}}, + "options": { + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}, +diff --git a/ovn-nb.xml b/ovn-nb.xml +index cdc5e0f3a..105d8697e 100644 +--- a/ovn-nb.xml ++++ b/ovn-nb.xml +@@ -2644,6 +2644,13 @@ +

    + + ++ ++

    ++ Reference to row if the route has associated a ++ BFD session ++

    ++
    ++ + + ovn-ic populates this key if the route is learned from the + global database. In this case the value +diff --git a/tests/atlocal.in b/tests/atlocal.in +index d9a4c91d4..5ebc8e117 100644 +--- a/tests/atlocal.in ++++ b/tests/atlocal.in +@@ -181,6 +181,9 @@ fi + # Set HAVE_DIBBLER-SERVER + find_command dibbler-server + ++# Set HAVE_BFDD_BEACON ++find_command bfdd-beacon ++ + # Turn off proxies. + unset http_proxy + unset https_proxy +diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at +index 01edfcbc1..2827b223c 100644 +--- a/tests/ovn-nbctl.at ++++ b/tests/ovn-nbctl.at +@@ -1617,7 +1617,13 @@ IPv6 Routes + 2001:db8::/64 2001:db8:0:f102::1 dst-ip lp0 + 2001:db8:1::/64 2001:db8:0:f103::1 dst-ip + ::/0 2001:db8:0:f101::1 dst-ip +-])]) ++]) ++ ++AT_CHECK([ovn-nbctl lrp-add lr0 lr0-p0 00:00:01:01:02:03 192.168.10.1/24]) ++bfd_uuid=$(ovn-nbctl create bfd logical_port=lr0-p0 dst_ip=100.0.0.50 status=down min_tx=250 min_rx=250 detect_mult=10) ++AT_CHECK([ovn-nbctl lr-route-add lr0 100.0.0.0/24 192.168.0.1]) ++route_uuid=$(fetch_column nb:logical_router_static_route _uuid ip_prefix="100.0.0.0/24") ++AT_CHECK([ovn-nbctl set logical_router_static_route $route_uuid bfd=$bfd_uuid])]) + + dnl --------------------------------------------------------------------- + +diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at +index eee004328..91eb9a3d1 100644 +--- a/tests/ovn-northd.at ++++ b/tests/ovn-northd.at +@@ -2373,6 +2373,14 @@ check_column 1000 bfd min_tx logical_port=r0-sw1 + check_column 1000 bfd min_rx logical_port=r0-sw1 + check_column 100 bfd detect_mult logical_port=r0-sw1 + ++check ovn-nbctl lr-route-add r0 100.0.0.0/8 192.168.10.2 ++route_uuid=$(fetch_column nb:logical_router_static_route _uuid ip_prefix="100.0.0.0/8") ++check ovn-nbctl set logical_router_static_route $route_uuid bfd=$uuid ++check_column down bfd status logical_port=r0-sw1 ++ ++check ovn-nbctl clear logical_router_static_route $route_uuid bfd ++check_column admin_down bfd status logical_port=r0-sw1 ++ + ovn-nbctl destroy bfd $uuid + check_row_count bfd 2 + +diff --git a/tests/system-ovn.at b/tests/system-ovn.at +index 1e73001ab..06d606166 100644 +--- a/tests/system-ovn.at ++++ b/tests/system-ovn.at +@@ -5531,3 +5531,139 @@ as + OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d + /.*terminating with signal 15.*/d"]) + AT_CLEANUP ++ ++AT_SETUP([ovn -- BFD]) ++AT_SKIP_IF([test $HAVE_BFDD_BEACON = no]) ++AT_SKIP_IF([test $HAVE_TCPDUMP = no]) ++AT_KEYWORDS([ovn-bfd]) ++ ++ovn_start ++OVS_TRAFFIC_VSWITCHD_START() ++ ++ADD_BR([br-int]) ++ADD_BR([br-ext]) ++ ++check ovs-ofctl add-flow br-ext action=normal ++# Set external-ids in br-int needed for ovn-controller ++check ovs-vsctl \ ++ -- set Open_vSwitch . external-ids:system-id=hv1 \ ++ -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ ++ -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ ++ -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ ++ -- set bridge br-int fail-mode=secure other-config:disable-in-band=true ++ ++# Start ovn-controller ++start_daemon ovn-controller ++ ++check ovn-nbctl lr-add R1 ++ ++check ovn-nbctl ls-add sw0 ++check ovn-nbctl ls-add sw1 ++check ovn-nbctl ls-add public ++ ++check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24 ++check ovn-nbctl lrp-add R1 rp-sw1 00:00:03:01:02:03 192.168.2.1/24 ++check ovn-nbctl lrp-add R1 rp-public 00:00:02:01:02:03 172.16.1.1/24 \ ++ -- lrp-set-gateway-chassis rp-public hv1 ++ ++check ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \ ++ type=router options:router-port=rp-sw0 \ ++ -- lsp-set-addresses sw0-rp router ++check ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \ ++ type=router options:router-port=rp-sw1 \ ++ -- lsp-set-addresses sw1-rp router ++ ++check ovn-nbctl lsp-add public public-rp -- set Logical_Switch_Port public-rp \ ++ type=router options:router-port=rp-public \ ++ -- lsp-set-addresses public-rp router ++ ++ADD_NAMESPACES(sw01) ++ADD_VETH(sw01, sw01, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \ ++ "192.168.1.1") ++check ovn-nbctl lsp-add sw0 sw01 \ ++ -- lsp-set-addresses sw01 "f0:00:00:01:02:03 192.168.1.2" ++ ++ADD_NAMESPACES(sw11) ++ADD_VETH(sw11, sw11, br-int, "192.168.2.2/24", "f0:00:00:02:02:03", \ ++ "192.168.2.1") ++check ovn-nbctl lsp-add sw1 sw11 \ ++ -- lsp-set-addresses sw11 "f0:00:00:02:02:03 192.168.2.2" ++ ++ADD_NAMESPACES(server) ++NS_CHECK_EXEC([server], [ip link set dev lo up]) ++ADD_VETH(s1, server, br-ext, "172.16.1.50/24", "f0:00:00:01:02:05", \ ++ "172.16.1.1") ++ ++AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext]) ++check ovn-nbctl lsp-add public public1 \ ++ -- lsp-set-addresses public1 unknown \ ++ -- lsp-set-type public1 localnet \ ++ -- lsp-set-options public1 network_name=phynet ++ ++NS_CHECK_EXEC([server], [bfdd-beacon --listen=172.16.1.50], [0]) ++NS_CHECK_EXEC([server], [bfdd-control allow 172.16.1.1], [0], [dnl ++Allowing connections from 172.16.1.1 ++]) ++ ++uuid=$(ovn-nbctl create bfd logical_port=rp-public dst_ip=172.16.1.50 min_tx=250 min_rx=250 detect_mult=10) ++check ovn-nbctl lr-route-add R1 100.0.0.0/8 172.16.1.50 ++route_uuid=$(fetch_column nb:logical_router_static_route _uuid ip_prefix="100.0.0.0/8") ++check ovn-nbctl set logical_router_static_route $route_uuid bfd=$uuid ++check ovn-nbctl --wait=hv sync ++ ++wait_column "up" nb:bfd status logical_port=rp-public ++OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 | grep 'match=(ip4.dst == 100.0.0.0/8)' | grep -q 172.16.1.50]) ++ ++# un-associate the bfd connection and the static route ++check ovn-nbctl clear logical_router_static_route $route_uuid bfd ++wait_column "admin_down" nb:bfd status logical_port=rp-public ++OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down]) ++NS_CHECK_EXEC([server], [tcpdump -nni s1 udp port 3784 -Q in > bfd.pcap &]) ++sleep 5 ++kill $(pidof tcpdump) ++AT_CHECK([grep -qi bfd bfd.pcap],[1]) ++ ++# restart the connection ++check ovn-nbctl set logical_router_static_route $route_uuid bfd=$uuid ++wait_column "up" nb:bfd status logical_port=rp-public ++ ++# switch to gw router configuration ++check ovn-nbctl clear logical_router_static_route $route_uuid bfd ++wait_column "admin_down" nb:bfd status logical_port=rp-public ++OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down]) ++check ovn-nbctl clear logical_router_port rp-public gateway_chassis ++check ovn-nbctl set logical_router R1 options:chassis=hv1 ++check ovn-nbctl set logical_router_static_route $route_uuid bfd=$uuid ++wait_column "up" nb:bfd status logical_port=rp-public ++ ++# stop bfd endpoint ++NS_CHECK_EXEC([server], [bfdd-control stop], [0], [dnl ++stopping ++]) ++ ++wait_column "down" nb:bfd status logical_port=rp-public ++OVS_WAIT_UNTIL([test "$(ovn-sbctl dump-flows R1 | grep 'match=(ip4.dst == 100.0.0.0/8)' | grep 172.16.1.50)" = ""]) ++ ++# remove bfd entry ++ovn-nbctl destroy bfd $uuid ++check_row_count bfd 0 ++NS_CHECK_EXEC([server], [tcpdump -nni s1 udp port 3784 -Q in > bfd.pcap &]) ++sleep 5 ++kill $(pidof tcpdump) ++AT_CHECK([grep -qi bfd bfd.pcap],[1]) ++ ++kill $(pidof ovn-controller) ++ ++as ovn-sb ++OVS_APP_EXIT_AND_WAIT([ovsdb-server]) ++ ++as ovn-nb ++OVS_APP_EXIT_AND_WAIT([ovsdb-server]) ++ ++as northd ++OVS_APP_EXIT_AND_WAIT([ovn-northd]) ++ ++as ++OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d ++/.*terminating with signal 15.*/d"]) ++AT_CLEANUP +-- +2.29.2 + diff --git a/SPECS/ovn2.13.spec b/SPECS/ovn2.13.spec index 1d3e6c6..105c0d4 100644 --- a/SPECS/ovn2.13.spec +++ b/SPECS/ovn2.13.spec @@ -60,7 +60,7 @@ Summary: Open Virtual Network support Group: System Environment/Daemons URL: http://www.openvswitch.org/ Version: %{upstreamver}.0 -Release: 1%{?commit0:.%{date}git%{shortcommit0}}%{?dist} +Release: 17%{?commit0:.%{date}git%{shortcommit0}}%{?dist} Provides: openvswitch%{pkgver}-ovn-common = %{?epoch:%{epoch}:}%{version}-%{release} Obsoletes: openvswitch%{pkgver}-ovn-common < 2.11.0-1 @@ -119,6 +119,82 @@ Patch05: 0005-northd-Add-ECMP-support-to-router-policies.patch Patch06: 0006-osx-Fix-compilation-error.patch Patch07: 0007-tests-Make-ovn-ovn-controller-incremental-processing.patch +# Bug 1909650 +Patch10: 0001-ovn-trace-fix-trigger_event-warning.patch + +# Bug 1914304 +Patch20: 0001-binding-Do-not-clear-container-lbinding-pb-when-pare.patch + +# Bug 1847570 +Patch30: 0001-ovn-northd-Move-lswitch-ARP-ND-Responder-to-function.patch +Patch31: 0002-ovn-northd-Move-DHCP-Options-and-Response-to-a-funct.patch +Patch32: 0003-ovn-northd-Move-lswitch-DNS-lookup-and-response-to-a.patch +Patch33: 0004-ovn-northd-Move-DNS-and-DHCP-defaults-to-a-function.patch +Patch34: 0005-ovn-northd-Move-ARP-response-for-external-ports-to-a.patch +Patch35: 0006-ovn-northd-Move-broadcast-and-multicast-lookup-in-ls.patch +Patch36: 0007-ovn-northd-Move-destination-handling-into-functions.patch +Patch37: 0008-ovn-northd-split-build_lswitch_output_port_sec-into-.patch +Patch38: 0009-ovn-northd-Move-lrouter-arp-and-nd-datapath-processi.patch +Patch39: 0010-ovn-northd-Move-ipv4-input-to-a-function.patch +Patch40: 0011-ovn-northd-move-NAT-Defrag-and-lb-to-a-function.patch +Patch41: 0012-controller-introduce-BFD-tx-path-in-ovn-controller.patch +Patch42: 0013-action-introduce-handle_bfd_msg-action.patch +Patch43: 0014-controller-bfd-introduce-BFD-state-machine.patch +Patch44: 0015-bfd-support-demand-mode-on-rx-side.patch +Patch45: 0016-ovn-integrate-bfd-for-static-routes.patch + +# Bug 1915739 +Patch50: 0001-bfd-introduce-IPv6-support.patch + +# Bug 1918004 +Patch60: 0001-northd-Fix-ACL-fair-log-meters-for-Port_Group-ACLs.patch + +# Bug 1917533 +Patch70: 0001-binding-Fix-container-port-removal-from-local-bindin.patch +Patch71: 0002-binding-Always-delete-child-port-bindings-first.patch + +# Bug 1918582 +# Bug 1919055 +Patch80: 0001-northd-Fix-duplicate-logical-port-detection.patch + +# Bug 1919812 +Patch90: 0001-ovn-controller-Fix-wrong-conj_id-match-flows-when-ca.patch + +# Bug 1917979 +Patch100: 0001-ovn-ctl-Add-support-for-ovsdb-server-disable-file-co.patch + +# Bug 1857106 +Patch110: 0001-controller-fix-pkt_marking-with-IP-buffering.patch + +# Bug 1915958 +Patch120: 0001-ovn-nbctl-add-bfd-report-to-lr-route-list-command.patch +Patch121: 0002-ovn-nbctl-add-ecmp-ecmp-symmetric-reply-to-lr-route-.patch + +# Bug 1918422 +Patch130: 0001-northd-add-event-option-to-enable-controller_event-f.patch + +# Bug 1839102 +Patch140: 0001-ofctrl-Rename-nb_cfg-to-req_cfg.patch +Patch141: 0002-controller-Implement-a-generic-barrier-based-on-ofct.patch +Patch142: 0003-binding-Set-Logical_Switch_Port.up-when-all-OVS-flow.patch + +# Bug 1918997 +Patch150: 0001-ovn-nbctl-add-bfd-option-to-lr-route-add.patch + +# Bug 1926165 +Patch160: 0001-binding-Correctly-set-Port_Binding.up-for-container-.patch +Patch161: 0002-binding-Set-Port_Binding.up-only-if-supported.patch +Patch162: 0003-northd-Allow-backwards-compatibility-for-Logical_Swi.patch +Patch163: 0004-tests-Fix-Port_Binding-up-test.patch + +# Bug 1908540 +# Bug 1917875 +Patch170: 0001-Support-configuring-Load-Balancer-hairpin-source-IP.patch +Patch171: 0002-lflow-Use-learn-action-to-generate-LB-hairpin-reply-.patch + +# Bug 1927230 +Patch180: 0001-northd-Skip-matching-on-ct-flags-for-stateless-confi.patch + # OpenvSwitch backports (800-) if required. # FIXME Sphinx is used to generate some manpages, unfortunately, on RHEL, it's @@ -165,10 +241,7 @@ Requires(preun): systemd-units Requires(postun): systemd-units # to skip running checks, pass --without check -# Disable Tests due to https://bugs.centos.org/view.php?id=16969, tests failing -# as build is running on CentOS7 builder, once builders are CentOS8 based tests can -# be re enabled. -%bcond_with check +%bcond_without check %description OVN, the Open Virtual Network, is a system to support virtual network @@ -563,6 +636,74 @@ fi %{_unitdir}/ovn-controller-vtep.service %changelog +* Wed Feb 10 2021 Mark Michelson - 20.12.0-17 +- Backport "northd: Skip matching on ct flags for stateless" (#1927230) + +* Tue Feb 09 2021 Dumitru Ceara - 20.12.0-16 +- Backport "Support configuring Load Balancer hairpin source IP." (#1908540) +- Backport "lflow: Use learn() action to generate LB hairpin reply flows." (#1917875) + +* Mon Feb 08 2021 Dumitru Ceara - 20.12.0-15 +- Backport "binding: Correctly set Port_Binding.up for container/virtual ports." (#1926165) +- Backport "binding: Set Port_Binding.up only if supported." (#1926165) +- Backport "northd: Allow backwards compatibility for Logical_Switch_Port.up." (#1926165) +- Backport "tests: Fix Port_Binding up test." (#1926165) + +* Wed Feb 03 2021 Lorenzo Bianconi - 20.12.0-14 +- Backport "ovn-nbctl: add --bfd option to lr-route-add" (#1918997) + +* Fri Jan 29 2021 Dumitru Ceara - 20.12.0-13 +- Backport "ofctrl: Rename 'nb_cfg' to 'req_cfg'." (#1839102) +- Backport "controller: Implement a generic barrier based on ofctrl cur_cfg sync." (#1839102) +- Backport "binding: Set Logical_Switch_Port.up when all OVS flows are installed." (#1839102) + +* Thu Jan 28 2021 Lorenzo Bianconi - 20.12.0-12 +- Backport "northd: add --event option to enable controller_event for empty_lb" (#1918422) + +* Thu Jan 28 2021 Lorenzo Bianconi - 20.12.0-11 +- Backport "ovn-nbctl: add bfd report to lr-route-list command" (#1915958) +- Backport "ovn-nbctl: add ecmp/ecmp-symmetric-reply to lr-route-list command" + +* Thu Jan 28 2021 Lorenzo Bianconi - 20.12.0-10 +- Backport "controller: fix pkt_marking with IP buffering" (#1857106) + +* Wed Jan 27 2021 Numan Siddique - 20.12.0-9 +- Backport "ovn-ctl: Add support for ovsdb-server --disable-file-column-diff." (#1917979) + +* Wed Jan 27 2021 Numan Siddique - 20.12.0-8 +- Backport "ovn-controller: Fix wrong conj_id match flows when caching is enabled." (#1919812) + +* Mon Jan 25 2021 Dumitru Ceara - 20.12.0-7 +- Backport "northd: Fix duplicate logical port detection." (#1918582) + +* Wed Jan 20 2021 Dumitru Ceara - 20.12.0-6 +- Backport "northd: Fix ACL fair log meters for Port_Group ACLs." (#1918004) +- Backport "binding: Fix container port removal from local bindings." (#1917533) +- Backport "binding: Always delete child port bindings first." (#1917533) + +* Wed Jan 13 2021 Lorenzo Bianconi - 20.12.0-5 +- Backport "bfd: introduce IPv6 support" (#1915739) + +* Tue Jan 12 2021 Lorenzo Bianconi - 20.12.0-4 +- Backport "introduce BFD support in ovn-controller" (#1847570) +- Backport "ovn-northd: Move lswitch ARP/ND Responder to functions" +- Backport "ovn-northd: Move DHCP Options and Response to a function" +- Backport "ovn-northd: Move lswitch DNS lookup and response to a function" +- Backport "ovn-northd: Move DNS and DHCP defaults to a function" +- Backport "ovn-northd: Move ARP response for external ports to a function." +- Backport "ovn-northd: Move broadcast and multicast lookup in lswitch to a function" +- Backport "ovn-northd: Move destination handling into functions." +- Backport "ovn-northd: split build_lswitch_output_port_sec into iterators" +- Backport "ovn-northd: Move lrouter arp and nd datapath processing to a function" +- Backport "ovn-northd: Move ipv4 input to a function" +- Backport "ovn-northd: move NAT, Defrag and lb to a function" + +* Fri Jan 08 2021 Dumitru Ceara - 20.12.0-3 +- Backport "binding: Do not clear container lbinding->pb when parent is deleted." (#1914304) + +* Fri Jan 08 2021 Lorenzo Bianconi - 20.12.0-2 +- Backport "ovn-trace: fix trigger_event warning" (#1909650) + * Fri Dec 18 2020 Numan Siddique - 20.12.0-1 - Rebase to OVN v20.12.0. - Re-backport patches for #1883957 and #1881826 as there are not in v20.12.0.