From 83e4bc25561e928f516fe4206f87b4f8daed2c73 Mon Sep 17 00:00:00 2001
From: yatinkarel
+ If the logical datapath has any ACL or a load balancer with VIP + configured, the following flow will also be added: +
-+ + +reg2
. For IPv6 traffic the flow also loads the original
+ destination IP and transport port in registers xxreg1
and
+ reg2
.
-+ --reject
option and
@@ -9364,7 +10088,7 @@ index a9a3a9f4f..ace16281c 100644
+ whenever an incoming packet is received for this load-balancer.
+ Please note using --reject
option will disable
+ empty_lb SB controller event for this load balancer.
- This table implements switching behavior. It contains these logical -@@ -1481,12 +1584,58 @@ output; +@@ -1406,6 +1509,14 @@ output; + logical ports. +
MC_FLOOD
multicast group which
++ contains all connected logical ports.
++ ip
@@ -9597,7 +10336,7 @@ index a9a3a9f4f..ace16281c 100644
-@@ -1542,20 +1693,39 @@ output; +@@ -1542,20 +1701,39 @@ output;
@@ -9626,14 +10365,14 @@ index a9a3a9f4f..ace16281c 100644
+ (with a match for reg0[0] == 1
) by using the
+ ct_next;
action.
+
-
--
from-lport
ACL hintsfrom-lport
ACL hintsfrom-lport
ACL hints
This is similar to ingress table ACL hints
.
@@ -9644,7 +10383,7 @@ index a9a3a9f4f..ace16281c 100644
This is similar to ingress table ACLs
except for
-@@ -1592,28 +1762,28 @@ output;
+@@ -1592,28 +1770,28 @@ output;
@@ -9677,7 +10416,7 @@ index a9a3a9f4f..ace16281c 100644
This is similar to the port security logic in table
-@@ -1623,7 +1793,7 @@ output;
+@@ -1623,7 +1801,7 @@ output;
ip4.src
and ip6.src
This is similar to the ingress port security logic in ingress table -@@ -1926,6 +2096,27 @@ next; +@@ -1926,6 +2104,27 @@ next;
@@ -9714,7 +10453,7 @@ index a9a3a9f4f..ace16281c 100644L3 admission control: A priority-100 flow drops packets that match -@@ -2121,8 +2312,7 @@ eth.src = xreg0[0..47]; +@@ -2121,8 +2320,7 @@ eth.src = xreg0[0..47]; arp.op = 2; /* ARP reply. */ arp.tha = arp.sha; arp.sha = xreg0[0..47]; @@ -9724,7 +10463,16 @@ index a9a3a9f4f..ace16281c 100644 outport = inport; flags.loopback = 1; output; -@@ -2449,6 +2639,16 @@ icmp6 { +@@ -2391,7 +2589,7 @@ icmp6 { + +
+ If ECMP routes with symmetric reply are configured in the
+- OVN_Northbound
database for a gateway router, a priority-100
++ OVN_Northbound
database for a gateway router, a priority-300
+ flow is added for each router port on which symmetric replies are
+ configured. The matching logic for these ports essentially reverses the
+ configured logic of the ECMP route. So for instance, a route with a
+@@ -2449,6 +2647,16 @@ icmp6 {
with an action ct_snat;
.
If the Gateway router has been configured to force SNAT any
previously load-balanced packets to B, a priority-100 flow
-@@ -2548,7 +2748,11 @@ icmp6 {
+@@ -2548,7 +2756,11 @@ icmp6 {
(and optional port numbers) to load balance to. If the router is
configured to force SNAT any load-balanced packets, the above action
will be replaced by flags.force_snat_for_lb = 1;
@@ -9754,7 +10502,7 @@ index a9a3a9f4f..ace16281c 100644
args will only contain those endpoints whose service
monitor status entry in
OVN_Southbound
db is
either online
or empty.
-@@ -2565,6 +2769,9 @@ icmp6 {
+@@ -2565,6 +2777,9 @@ icmp6 {
with an action of ct_dnat;
. If the router is
configured to force SNAT any load-balanced packets, the above action
will be replaced by flags.force_snat_for_lb = 1; ct_dnat;
.
@@ -9764,7 +10512,7 @@ index a9a3a9f4f..ace16281c 100644
flags.force_snat_for_lb = 1;
ct_lb(args);
.
@@ -9774,7 +10522,7 @@ index a9a3a9f4f..ace16281c 100644
flags.force_snat_for_lb = 1; ct_dnat;
.
@@ -9793,7 +10541,7 @@ index a9a3a9f4f..ace16281c 100644
@@ -9832,7 +10580,7 @@ index a9a3a9f4f..ace16281c 100644 flags.loopback = 1; next; -@@ -3053,7 +3297,51 @@ next; +@@ -3053,7 +3305,51 @@ next;
Any packet that reaches this table is an IP packet whose next-hop
-@@ -3239,7 +3527,7 @@ next;
+@@ -3173,7 +3469,12 @@ next;
+ column
+ of table for of type
+ dnat_and_snat
, otherwise the Ethernet address of the
+- distributed logical router port.
++ distributed logical router port. Note that if the
++ is not
++ within a subnet on the owning logical router, then OVN will only
++ create ARP resolution flows if the
++ is set to true
. Otherwise, no ARP resolution flows
++ will be added.
+
+@@ -3239,7 +3540,7 @@ next; @@ -9894,7 +10656,7 @@ index a9a3a9f4f..ace16281c 100644
For distributed logical routers with distributed gateway port configured -@@ -3269,7 +3557,7 @@ REGBIT_PKT_LARGER = check_pkt_larger(L); next; +@@ -3269,7 +3570,7 @@ REGBIT_PKT_LARGER = check_pkt_larger(L); next; and advances to the next table.
@@ -9903,7 +10665,7 @@ index a9a3a9f4f..ace16281c 100644For distributed logical routers with distributed gateway port configured -@@ -3330,7 +3618,7 @@ icmp6 { +@@ -3330,7 +3631,7 @@ icmp6 { and advances to the next table.
@@ -9912,7 +10674,21 @@ index a9a3a9f4f..ace16281c 100644For distributed logical routers where one of the logical router -@@ -3370,7 +3658,7 @@ icmp6 { +@@ -3353,6 +3654,13 @@ icmp6 { + external ip and D is NAT external mac. + + ++
outport == GW
has actions
+@@ -3370,7 +3678,7 @@ icmp6 {
In the common case where the Ethernet destination has been resolved, this
-@@ -3546,6 +3834,41 @@ nd_ns {
+@@ -3546,6 +3854,41 @@ nd_ns {
flags.force_snat_for_dnat == 1 && ip
with an
action ct_snat(B);
.
If the Gateway router in the OVN Northbound database has been
configured to force SNAT a packet (that has been previously
-@@ -3553,6 +3876,9 @@ nd_ns {
+@@ -3553,6 +3896,9 @@ nd_ns {
flags.force_snat_for_lb == 1 && ip
with an
action ct_snat(B);
.
For each configuration in the OVN Northbound database, that asks
to change the source IP address of a packet from an IP address of
-@@ -3566,14 +3892,18 @@ nd_ns {
+@@ -3566,14 +3912,18 @@ nd_ns {
options, then the action would be ip4/6.src=
(B)
.
If the NAT rule has exempted_ext_ips
set, then
there is an additional flow configured at the priority + 1 of
-@@ -3582,7 +3912,9 @@ nd_ns {
+@@ -3582,7 +3932,9 @@ nd_ns {
. This flow is used to bypass the ct_snat action for a packet
which is destinted to exempted_ext_ips
.
1
has actions
next;
.
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
-index 5a3227568..e78a71728 100644
+index 5a3227568..f2daeacbd 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -37,10 +37,13 @@
@@ -10176,15 +10952,23 @@ index 5a3227568..e78a71728 100644
/* IPAM data. */
struct ipam_info ipam_info;
-@@ -633,6 +668,7 @@ struct ovn_datapath {
+@@ -633,6 +668,15 @@ struct ovn_datapath {
struct lport_addresses dnat_force_snat_addrs;
struct lport_addresses lb_force_snat_addrs;
+ bool lb_force_snat_router_ip;
++ /* The "routable" ssets are subsets of the load balancer
++ * IPs for which IP routes and ARP resolution flows are automatically
++ * added
++ */
++ struct sset lb_ips_v4;
++ struct sset lb_ips_v4_routable;
++ struct sset lb_ips_v6;
++ struct sset lb_ips_v6_routable;
struct ovn_port **localnet_ports;
size_t n_localnet_ports;
-@@ -723,14 +759,28 @@ init_nat_entries(struct ovn_datapath *od)
+@@ -723,14 +767,28 @@ init_nat_entries(struct ovn_datapath *od)
}
}
@@ -10221,7 +11005,44 @@ index 5a3227568..e78a71728 100644
}
}
-@@ -872,6 +922,20 @@ ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
+@@ -785,6 +843,28 @@ destroy_nat_entries(struct ovn_datapath *od)
+ }
+ }
+
++static void
++init_lb_ips(struct ovn_datapath *od)
++{
++ sset_init(&od->lb_ips_v4);
++ sset_init(&od->lb_ips_v4_routable);
++ sset_init(&od->lb_ips_v6);
++ sset_init(&od->lb_ips_v6_routable);
++}
++
++static void
++destroy_lb_ips(struct ovn_datapath *od)
++{
++ if (!od->nbs && !od->nbr) {
++ return;
++ }
++
++ sset_destroy(&od->lb_ips_v4);
++ sset_destroy(&od->lb_ips_v4_routable);
++ sset_destroy(&od->lb_ips_v6);
++ sset_destroy(&od->lb_ips_v6_routable);
++}
++
+ /* A group of logical router datapaths which are connected - either
+ * directly or indirectly.
+ * Each logical router can belong to only one group. */
+@@ -843,6 +923,7 @@ ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
+ bitmap_free(od->ipam_info.allocated_ipv4s);
+ free(od->router_ports);
+ destroy_nat_entries(od);
++ destroy_lb_ips(od);
+ free(od->nat_entries);
+ free(od->localnet_ports);
+ ovn_ls_port_group_destroy(&od->nb_pgs);
+@@ -872,6 +953,20 @@ ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
return NULL;
}
@@ -10242,7 +11063,52 @@ index 5a3227568..e78a71728 100644
static bool
ovn_datapath_is_stale(const struct ovn_datapath *od)
{
-@@ -1472,6 +1536,8 @@ struct ovn_port {
+@@ -1259,6 +1354,7 @@ join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
+
+ init_ipam_info_for_datapath(od);
+ init_mcast_info_for_datapath(od);
++ init_lb_ips(od);
+ }
+
+ const struct nbrec_logical_router *nbr;
+@@ -1290,6 +1386,7 @@ join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
+ }
+ init_mcast_info_for_datapath(od);
+ init_nat_entries(od);
++ init_lb_ips(od);
+ ovs_list_push_back(lr_list, &od->lr_list);
+ }
+ }
+@@ -1420,6 +1517,19 @@ build_datapaths(struct northd_context *ctx, struct hmap *datapaths,
+ }
+ }
+
++/* Structure representing logical router port
++ * routable addresses. This includes DNAT and Load Balancer
++ * addresses. This structure will only be filled in if the
++ * router port is a gateway router port. Otherwise, all pointers
++ * will be NULL and n_addrs will be 0.
++ */
++struct ovn_port_routable_addresses {
++ /* The parsed routable addresses */
++ struct lport_addresses *laddrs;
++ /* Number of items in the laddrs array */
++ size_t n_addrs;
++};
++
+ /* A logical switch port or logical router port.
+ *
+ * In steady state, an ovn_port points to a northbound Logical_Switch_Port
+@@ -1463,6 +1573,8 @@ struct ovn_port {
+
+ struct lport_addresses lrp_networks;
+
++ struct ovn_port_routable_addresses routables;
++
+ /* Logical port multicast data. */
+ struct mcast_port_info mcast_info;
+
+@@ -1472,6 +1584,8 @@ struct ovn_port {
bool has_unknown; /* If the addresses have 'unknown' defined. */
@@ -10251,7 +11117,64 @@ index 5a3227568..e78a71728 100644
/* The port's peer:
*
* - A switch port S of type "router" has a router port R as a peer,
-@@ -1543,17 +1609,38 @@ ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
+@@ -1487,6 +1601,47 @@ struct ovn_port {
+ struct ovs_list list; /* In list of similar records. */
+ };
+
++static void
++destroy_routable_addresses(struct ovn_port_routable_addresses *ra)
++{
++ for (size_t i = 0; i < ra->n_addrs; i++) {
++ destroy_lport_addresses(&ra->laddrs[i]);
++ }
++ free(ra->laddrs);
++}
++
++static char **get_nat_addresses(const struct ovn_port *op, size_t *n,
++ bool routable_only);
++
++static void
++assign_routable_addresses(struct ovn_port *op)
++{
++ size_t n;
++ char **nats = get_nat_addresses(op, &n, true);
++
++ if (!nats) {
++ return;
++ }
++
++ struct lport_addresses *laddrs = xcalloc(n, sizeof(*laddrs));
++ size_t n_addrs = 0;
++ for (size_t i = 0; i < n; i++) {
++ int ofs;
++ if (!extract_addresses(nats[i], &laddrs[n_addrs], &ofs)) {
++ free(nats[i]);
++ continue;
++ }
++ n_addrs++;
++ free(nats[i]);
++ }
++ free(nats);
++
++ /* Everything seems to have worked out */
++ op->routables.laddrs = laddrs;
++ op->routables.n_addrs = n_addrs;
++}
++
++
+ static void
+ ovn_port_set_nb(struct ovn_port *op,
+ const struct nbrec_logical_switch_port *nbsp,
+@@ -1536,6 +1691,8 @@ ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
+ }
+ free(port->ps_addrs);
+
++ destroy_routable_addresses(&port->routables);
++
+ destroy_lport_addresses(&port->lrp_networks);
+ free(port->json_key);
+ free(port->key);
+@@ -1543,17 +1700,38 @@ ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
}
}
@@ -10293,7 +11216,29 @@ index 5a3227568..e78a71728 100644
}
/* Returns true if the logical switch port 'enabled' column is empty or
-@@ -2336,15 +2423,13 @@ join_logical_ports(struct northd_context *ctx,
+@@ -1608,6 +1786,21 @@ ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn)
+ return false;
+ }
+
++static struct ovn_port *
++ovn_port_get_peer(struct hmap *ports, struct ovn_port *op)
++{
++ if (!op->nbsp || !lsp_is_router(op->nbsp) || op->derived) {
++ return NULL;
++ }
++
++ const char *peer_name = smap_get(&op->nbsp->options, "router-port");
++ if (!peer_name) {
++ return NULL;
++ }
++
++ return ovn_port_find(ports, peer_name);
++}
++
+ static void
+ ipam_insert_mac(struct eth_addr *ea, bool check)
+ {
+@@ -2336,15 +2529,13 @@ join_logical_ports(struct northd_context *ctx,
for (size_t i = 0; i < od->nbs->n_ports; i++) {
const struct nbrec_logical_switch_port *nbsp
= od->nbs->ports[i];
@@ -10316,7 +11261,7 @@ index 5a3227568..e78a71728 100644
ovn_port_set_nb(op, nbsp, NULL);
ovs_list_remove(&op->list);
-@@ -2435,16 +2520,15 @@ join_logical_ports(struct northd_context *ctx,
+@@ -2435,16 +2626,15 @@ join_logical_ports(struct northd_context *ctx,
continue;
}
@@ -10342,7 +11287,7 @@ index 5a3227568..e78a71728 100644
ovn_port_set_nb(op, NULL, nbrp);
ovs_list_remove(&op->list);
ovs_list_push_back(both, &op->list);
-@@ -2487,7 +2571,7 @@ join_logical_ports(struct northd_context *ctx,
+@@ -2487,7 +2677,7 @@ join_logical_ports(struct northd_context *ctx,
char *redirect_name =
ovn_chassis_redirect_name(nbrp->name);
struct ovn_port *crp = ovn_port_find(ports, redirect_name);
@@ -10351,7 +11296,145 @@ index 5a3227568..e78a71728 100644
crp->derived = true;
ovn_port_set_nb(crp, NULL, nbrp);
ovs_list_remove(&crp->list);
-@@ -3179,6 +3263,12 @@ ovn_port_update_sbrec(struct northd_context *ctx,
+@@ -2505,6 +2695,8 @@ join_logical_ports(struct northd_context *ctx,
+ * use during flow creation. */
+ od->l3dgw_port = op;
+ od->l3redirect_port = crp;
++
++ assign_routable_addresses(op);
+ }
+ }
+ }
+@@ -2515,12 +2707,7 @@ join_logical_ports(struct northd_context *ctx,
+ struct ovn_port *op;
+ HMAP_FOR_EACH (op, key_node, ports) {
+ if (op->nbsp && lsp_is_router(op->nbsp) && !op->derived) {
+- const char *peer_name = smap_get(&op->nbsp->options, "router-port");
+- if (!peer_name) {
+- continue;
+- }
+-
+- struct ovn_port *peer = ovn_port_find(ports, peer_name);
++ struct ovn_port *peer = ovn_port_get_peer(ports, op);
+ if (!peer || !peer->nbrp) {
+ continue;
+ }
+@@ -2578,46 +2765,6 @@ join_logical_ports(struct northd_context *ctx,
+ }
+ }
+
+-static void
+-get_router_load_balancer_ips(const struct ovn_datapath *od,
+- struct sset *all_ips_v4, struct sset *all_ips_v6)
+-{
+- if (!od->nbr) {
+- return;
+- }
+-
+- for (int i = 0; i < od->nbr->n_load_balancer; i++) {
+- struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
+- struct smap *vips = &lb->vips;
+- struct smap_node *node;
+-
+- SMAP_FOR_EACH (node, vips) {
+- /* node->key contains IP:port or just IP. */
+- char *ip_address;
+- uint16_t port;
+- int addr_family;
+-
+- if (!ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
+- &addr_family)) {
+- continue;
+- }
+-
+- struct sset *all_ips;
+- if (addr_family == AF_INET) {
+- all_ips = all_ips_v4;
+- } else {
+- all_ips = all_ips_v6;
+- }
+-
+- if (!sset_contains(all_ips, ip_address)) {
+- sset_add(all_ips, ip_address);
+- }
+-
+- free(ip_address);
+- }
+- }
+-}
+-
+ /* Returns an array of strings, each consisting of a MAC address followed
+ * by one or more IP addresses, and if the port is a distributed gateway
+ * port, followed by 'is_chassis_resident("LPORT_NAME")', where the
+@@ -2629,11 +2776,11 @@ get_router_load_balancer_ips(const struct ovn_datapath *od,
+ * The caller must free each of the n returned strings with free(),
+ * and must free the returned array when it is no longer needed. */
+ static char **
+-get_nat_addresses(const struct ovn_port *op, size_t *n)
++get_nat_addresses(const struct ovn_port *op, size_t *n, bool routable_only)
+ {
+ size_t n_nats = 0;
+ struct eth_addr mac;
+- if (!op->nbrp || !op->od || !op->od->nbr
++ if (!op || !op->nbrp || !op->od || !op->od->nbr
+ || (!op->od->nbr->n_nat && !op->od->nbr->n_load_balancer)
+ || !eth_addr_from_string(op->nbrp->mac, &mac)) {
+ *n = n_nats;
+@@ -2652,6 +2799,12 @@ get_nat_addresses(const struct ovn_port *op, size_t *n)
+ const struct nbrec_nat *nat = op->od->nbr->nat[i];
+ ovs_be32 ip, mask;
+
++ if (routable_only &&
++ (!strcmp(nat->type, "snat") ||
++ !smap_get_bool(&nat->options, "add_route", false))) {
++ continue;
++ }
++
+ char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
+ if (error || mask != OVS_BE32_MAX) {
+ free(error);
+@@ -2702,22 +2855,26 @@ get_nat_addresses(const struct ovn_port *op, size_t *n)
+ }
+ }
+
+- /* Two sets to hold all load-balancer vips. */
+- struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
+- struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
+- get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6);
+-
+ const char *ip_address;
+- SSET_FOR_EACH (ip_address, &all_ips_v4) {
+- ds_put_format(&c_addresses, " %s", ip_address);
+- central_ip_address = true;
+- }
+- SSET_FOR_EACH (ip_address, &all_ips_v6) {
+- ds_put_format(&c_addresses, " %s", ip_address);
+- central_ip_address = true;
++ if (routable_only) {
++ SSET_FOR_EACH (ip_address, &op->od->lb_ips_v4_routable) {
++ ds_put_format(&c_addresses, " %s", ip_address);
++ central_ip_address = true;
++ }
++ SSET_FOR_EACH (ip_address, &op->od->lb_ips_v6_routable) {
++ ds_put_format(&c_addresses, " %s", ip_address);
++ central_ip_address = true;
++ }
++ } else {
++ SSET_FOR_EACH (ip_address, &op->od->lb_ips_v4) {
++ ds_put_format(&c_addresses, " %s", ip_address);
++ central_ip_address = true;
++ }
++ SSET_FOR_EACH (ip_address, &op->od->lb_ips_v6) {
++ ds_put_format(&c_addresses, " %s", ip_address);
++ central_ip_address = true;
++ }
+ }
+- sset_destroy(&all_ips_v4);
+- sset_destroy(&all_ips_v6);
+
+ if (central_ip_address) {
+ /* Gratuitous ARP for centralized NAT rules on distributed gateway
+@@ -3179,6 +3336,12 @@ ovn_port_update_sbrec(struct northd_context *ctx,
} else {
sbrec_port_binding_set_ha_chassis_group(op->sb, NULL);
}
@@ -10364,7 +11447,40 @@ index 5a3227568..e78a71728 100644
}
} else {
const char *chassis = NULL;
-@@ -3308,6 +3398,14 @@ ovn_port_update_sbrec(struct northd_context *ctx,
+@@ -3210,7 +3373,6 @@ ovn_port_update_sbrec(struct northd_context *ctx,
+ } else {
+ sbrec_port_binding_set_options(op->sb, NULL);
+ }
+-
+ const char *nat_addresses = smap_get(&op->nbsp->options,
+ "nat-addresses");
+ size_t n_nats = 0;
+@@ -3218,7 +3380,7 @@ ovn_port_update_sbrec(struct northd_context *ctx,
+ if (nat_addresses && !strcmp(nat_addresses, "router")) {
+ if (op->peer && op->peer->od
+ && (chassis || op->peer->od->l3redirect_port)) {
+- nats = get_nat_addresses(op->peer, &n_nats);
++ nats = get_nat_addresses(op->peer, &n_nats, false);
+ }
+ /* Only accept manual specification of ethernet address
+ * followed by IPv4 addresses on type "l3gateway" ports. */
+@@ -3266,6 +3428,7 @@ ovn_port_update_sbrec(struct northd_context *ctx,
+ if (add_router_port_garp) {
+ struct ds garp_info = DS_EMPTY_INITIALIZER;
+ ds_put_format(&garp_info, "%s", op->peer->lrp_networks.ea_s);
++
+ for (size_t i = 0; i < op->peer->lrp_networks.n_ipv4_addrs;
+ i++) {
+ ds_put_format(&garp_info, " %s",
+@@ -3282,7 +3445,6 @@ ovn_port_update_sbrec(struct northd_context *ctx,
+ nats[n_nats - 1] = ds_steal_cstr(&garp_info);
+ ds_destroy(&garp_info);
+ }
+-
+ sbrec_port_binding_set_nat_addresses(op->sb,
+ (const char **) nats, n_nats);
+ for (size_t i = 0; i < n_nats; i++) {
+@@ -3308,6 +3470,14 @@ ovn_port_update_sbrec(struct northd_context *ctx,
if (op->tunnel_key != op->sb->tunnel_key) {
sbrec_port_binding_set_tunnel_key(op->sb, op->tunnel_key);
}
@@ -10379,7 +11495,7 @@ index 5a3227568..e78a71728 100644
}
/* Remove mac_binding entries that refer to logical_ports which are
-@@ -3340,6 +3438,26 @@ cleanup_sb_ha_chassis_groups(struct northd_context *ctx,
+@@ -3340,6 +3510,26 @@ cleanup_sb_ha_chassis_groups(struct northd_context *ctx,
}
}
@@ -10406,7 +11522,114 @@ index 5a3227568..e78a71728 100644
struct service_monitor_info {
struct hmap_node hmap_node;
const struct sbrec_service_monitor *sbrec_mon;
-@@ -3436,12 +3554,12 @@ ovn_lb_svc_create(struct northd_context *ctx, struct ovn_northd_lb *lb,
+@@ -3381,67 +3571,83 @@ create_or_get_service_mon(struct northd_context *ctx,
+
+ static void
+ ovn_lb_svc_create(struct northd_context *ctx, struct ovn_northd_lb *lb,
+- struct hmap *monitor_map)
++ struct hmap *monitor_map, struct hmap *ports)
+ {
+ for (size_t i = 0; i < lb->n_vips; i++) {
+ struct ovn_lb_vip *lb_vip = &lb->vips[i];
+ struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[i];
+
+- if (!lb_vip_nb->lb_health_check) {
+- continue;
+- }
+-
+ for (size_t j = 0; j < lb_vip->n_backends; j++) {
+ struct ovn_lb_backend *backend = &lb_vip->backends[j];
+ struct ovn_northd_lb_backend *backend_nb =
+ &lb_vip_nb->backends_nb[j];
+
+- if (backend_nb->op && backend_nb->svc_mon_src_ip) {
+- const char *protocol = lb->nlb->protocol;
+- if (!protocol || !protocol[0]) {
+- protocol = "tcp";
+- }
+- backend_nb->health_check = true;
+- struct service_monitor_info *mon_info =
+- create_or_get_service_mon(ctx, monitor_map,
+- backend->ip_str,
+- backend_nb->op->nbsp->name,
+- backend->port,
+- protocol);
+-
+- ovs_assert(mon_info);
+- sbrec_service_monitor_set_options(
+- mon_info->sbrec_mon, &lb_vip_nb->lb_health_check->options);
+- struct eth_addr ea;
+- if (!mon_info->sbrec_mon->src_mac ||
+- !eth_addr_from_string(mon_info->sbrec_mon->src_mac, &ea) ||
+- !eth_addr_equals(ea, svc_monitor_mac_ea)) {
+- sbrec_service_monitor_set_src_mac(mon_info->sbrec_mon,
+- svc_monitor_mac);
++ struct ovn_port *op = NULL;
++ char *svc_mon_src_ip = NULL;
++ const char *s = smap_get(&lb->nlb->ip_port_mappings,
++ backend->ip_str);
++ if (s) {
++ char *port_name = xstrdup(s);
++ char *p = strstr(port_name, ":");
++ if (p) {
++ *p = 0;
++ p++;
++ op = ovn_port_find(ports, port_name);
++ svc_mon_src_ip = xstrdup(p);
+ }
++ free(port_name);
++ }
+
+- if (!mon_info->sbrec_mon->src_ip ||
+- strcmp(mon_info->sbrec_mon->src_ip,
+- backend_nb->svc_mon_src_ip)) {
+- sbrec_service_monitor_set_src_ip(
+- mon_info->sbrec_mon,
+- backend_nb->svc_mon_src_ip);
+- }
++ backend_nb->op = op;
++ backend_nb->svc_mon_src_ip = svc_mon_src_ip;
+
+- backend_nb->sbrec_monitor = mon_info->sbrec_mon;
+- mon_info->required = true;
++ if (!lb_vip_nb->lb_health_check || !op || !svc_mon_src_ip) {
++ continue;
++ }
++
++ const char *protocol = lb->nlb->protocol;
++ if (!protocol || !protocol[0]) {
++ protocol = "tcp";
++ }
++ backend_nb->health_check = true;
++ struct service_monitor_info *mon_info =
++ create_or_get_service_mon(ctx, monitor_map,
++ backend->ip_str,
++ backend_nb->op->nbsp->name,
++ backend->port,
++ protocol);
++ ovs_assert(mon_info);
++ sbrec_service_monitor_set_options(
++ mon_info->sbrec_mon, &lb_vip_nb->lb_health_check->options);
++ struct eth_addr ea;
++ if (!mon_info->sbrec_mon->src_mac ||
++ !eth_addr_from_string(mon_info->sbrec_mon->src_mac, &ea) ||
++ !eth_addr_equals(ea, svc_monitor_mac_ea)) {
++ sbrec_service_monitor_set_src_mac(mon_info->sbrec_mon,
++ svc_monitor_mac);
++ }
++
++ if (!mon_info->sbrec_mon->src_ip ||
++ strcmp(mon_info->sbrec_mon->src_ip,
++ backend_nb->svc_mon_src_ip)) {
++ sbrec_service_monitor_set_src_ip(
++ mon_info->sbrec_mon,
++ backend_nb->svc_mon_src_ip);
+ }
++
++ backend_nb->sbrec_monitor = mon_info->sbrec_mon;
++ mon_info->required = true;
+ }
+ }
}
static
@@ -10424,7 +11647,7 @@ index 5a3227568..e78a71728 100644
if (lb_vip_nb->lb_health_check) {
ds_put_cstr(action, "ct_lb(backends=");
-@@ -3463,18 +3581,30 @@ void build_lb_vip_ct_lb_actions(struct ovn_lb_vip *lb_vip,
+@@ -3463,18 +3669,30 @@ void build_lb_vip_ct_lb_actions(struct ovn_lb_vip *lb_vip,
}
if (!n_active_backends) {
@@ -10459,7 +11682,46 @@ index 5a3227568..e78a71728 100644
ds_chomp(action, ';');
ds_chomp(action, ')');
ds_put_format(action, "; hash_fields=\"%s\");", selection_fields);
-@@ -3547,10 +3677,18 @@ build_ovn_lbs(struct northd_context *ctx, struct hmap *datapaths,
+@@ -3483,32 +3701,17 @@ void build_lb_vip_ct_lb_actions(struct ovn_lb_vip *lb_vip,
+
+ static void
+ build_ovn_lbs(struct northd_context *ctx, struct hmap *datapaths,
+- struct hmap *ports, struct hmap *lbs)
++ struct hmap *lbs)
+ {
+- hmap_init(lbs);
+- struct hmap monitor_map = HMAP_INITIALIZER(&monitor_map);
++ struct ovn_northd_lb *lb;
+
+- const struct sbrec_service_monitor *sbrec_mon;
+- SBREC_SERVICE_MONITOR_FOR_EACH (sbrec_mon, ctx->ovnsb_idl) {
+- uint32_t hash = sbrec_mon->port;
+- hash = hash_string(sbrec_mon->ip, hash);
+- hash = hash_string(sbrec_mon->logical_port, hash);
+- struct service_monitor_info *mon_info = xzalloc(sizeof *mon_info);
+- mon_info->sbrec_mon = sbrec_mon;
+- mon_info->required = false;
+- hmap_insert(&monitor_map, &mon_info->hmap_node, hash);
+- }
++ hmap_init(lbs);
+
+ const struct nbrec_load_balancer *nbrec_lb;
+ NBREC_LOAD_BALANCER_FOR_EACH (nbrec_lb, ctx->ovnnb_idl) {
+- struct ovn_northd_lb *lb =
+- ovn_northd_lb_create(nbrec_lb, ports, (void *)ovn_port_find);
+- hmap_insert(lbs, &lb->hmap_node, uuid_hash(&nbrec_lb->header_.uuid));
+- }
+-
+- struct ovn_northd_lb *lb;
+- HMAP_FOR_EACH (lb, hmap_node, lbs) {
+- ovn_lb_svc_create(ctx, lb, &monitor_map);
++ struct ovn_northd_lb *lb_nb = ovn_northd_lb_create(nbrec_lb);
++ hmap_insert(lbs, &lb_nb->hmap_node,
++ uuid_hash(&nbrec_lb->header_.uuid));
+ }
+
+ struct ovn_datapath *od;
+@@ -3547,10 +3750,18 @@ build_ovn_lbs(struct northd_context *ctx, struct hmap *datapaths,
/* Create SB Load balancer records if not present and sync
* the SB load balancer columns. */
HMAP_FOR_EACH (lb, hmap_node, lbs) {
@@ -10478,7 +11740,7 @@ index 5a3227568..e78a71728 100644
if (!lb->slb) {
sbrec_lb = sbrec_load_balancer_insert(ctx->ovnsb_txn);
lb->slb = sbrec_lb;
-@@ -3564,9 +3702,11 @@ build_ovn_lbs(struct northd_context *ctx, struct hmap *datapaths,
+@@ -3564,9 +3775,11 @@ build_ovn_lbs(struct northd_context *ctx, struct hmap *datapaths,
sbrec_load_balancer_set_name(lb->slb, lb->nlb->name);
sbrec_load_balancer_set_vips(lb->slb, &lb->nlb->vips);
sbrec_load_balancer_set_protocol(lb->slb, lb->nlb->protocol);
@@ -10490,15 +11752,85 @@ index 5a3227568..e78a71728 100644
}
/* Set the list of associated load balanacers to a logical switch
-@@ -4821,27 +4961,38 @@ ovn_ls_port_group_destroy(struct hmap *nb_pgs)
- hmap_destroy(nb_pgs);
- }
-
--static bool
--has_stateful_acl(struct ovn_datapath *od)
+@@ -3590,6 +3803,29 @@ build_ovn_lbs(struct northd_context *ctx, struct hmap *datapaths,
+ od->nbs->n_load_balancer);
+ free(sbrec_lbs);
+ }
++}
++
+static void
-+ls_get_acl_flags(struct ovn_datapath *od)
- {
++build_ovn_lb_svcs(struct northd_context *ctx, struct hmap *ports,
++ struct hmap *lbs)
++{
++ struct hmap monitor_map = HMAP_INITIALIZER(&monitor_map);
++
++ const struct sbrec_service_monitor *sbrec_mon;
++ SBREC_SERVICE_MONITOR_FOR_EACH (sbrec_mon, ctx->ovnsb_idl) {
++ uint32_t hash = sbrec_mon->port;
++ hash = hash_string(sbrec_mon->ip, hash);
++ hash = hash_string(sbrec_mon->logical_port, hash);
++ struct service_monitor_info *mon_info = xzalloc(sizeof *mon_info);
++ mon_info->sbrec_mon = sbrec_mon;
++ mon_info->required = false;
++ hmap_insert(&monitor_map, &mon_info->hmap_node, hash);
++ }
++
++ struct ovn_northd_lb *lb;
++ HMAP_FOR_EACH (lb, hmap_node, lbs) {
++ ovn_lb_svc_create(ctx, lb, &monitor_map, ports);
++ }
+
+ struct service_monitor_info *mon_info;
+ HMAP_FOR_EACH_POP (mon_info, hmap_node, &monitor_map) {
+@@ -3602,6 +3838,39 @@ build_ovn_lbs(struct northd_context *ctx, struct hmap *datapaths,
+ hmap_destroy(&monitor_map);
+ }
+
++static void
++build_lrouter_lbs(struct hmap *datapaths, struct hmap *lbs)
++{
++ struct ovn_datapath *od;
++
++ HMAP_FOR_EACH (od, key_node, datapaths) {
++ if (!od->nbr) {
++ continue;
++ }
++
++ for (size_t i = 0; i < od->nbr->n_load_balancer; i++) {
++ struct ovn_northd_lb *lb =
++ ovn_northd_lb_find(lbs,
++ &od->nbr->load_balancer[i]->header_.uuid);
++ const char *ip_address;
++ bool is_routable = smap_get_bool(&lb->nlb->options, "add_route",
++ false);
++ SSET_FOR_EACH (ip_address, &lb->ips_v4) {
++ sset_add(&od->lb_ips_v4, ip_address);
++ if (is_routable) {
++ sset_add(&od->lb_ips_v4_routable, ip_address);
++ }
++ }
++ SSET_FOR_EACH (ip_address, &lb->ips_v6) {
++ sset_add(&od->lb_ips_v6, ip_address);
++ if (is_routable) {
++ sset_add(&od->lb_ips_v6_routable, ip_address);
++ }
++ }
++ }
++ }
++}
++
+ static bool
+ ovn_port_add_tnlid(struct ovn_port *op, uint32_t tunnel_key)
+ {
+@@ -4821,27 +5090,38 @@ ovn_ls_port_group_destroy(struct hmap *nb_pgs)
+ hmap_destroy(nb_pgs);
+ }
+
+-static bool
+-has_stateful_acl(struct ovn_datapath *od)
++static void
++ls_get_acl_flags(struct ovn_datapath *od)
+ {
- for (size_t i = 0; i < od->nbs->n_acls; i++) {
- struct nbrec_acl *acl = od->nbs->acls[i];
- if (!strcmp(acl->action, "allow-related")) {
@@ -10541,7 +11873,7 @@ index 5a3227568..e78a71728 100644
}
/* Logical switch ingress table 0: Ingress port security - L2
-@@ -4905,50 +5056,82 @@ build_lswitch_input_port_sec_od(
+@@ -4905,50 +5185,82 @@ build_lswitch_input_port_sec_od(
}
static void
@@ -10554,17 +11886,7 @@ index 5a3227568..e78a71728 100644
- struct ds actions = DS_EMPTY_INITIALIZER;
- struct ds match = DS_EMPTY_INITIALIZER;
- struct ovn_port *op;
-+ if (op->nbsp && !op->n_ps_addrs && !strcmp(op->nbsp->type, "") &&
-+ op->has_unknown) {
-+ ds_clear(match);
-+ ds_clear(actions);
-+ ds_put_format(match, "inport == %s", op->json_key);
-+ ds_put_format(actions, REGBIT_LKUP_FDB
-+ " = lookup_fdb(inport, eth.src); next;");
-+ ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_LOOKUP_FDB, 100,
-+ ds_cstr(match), ds_cstr(actions),
-+ &op->nbsp->header_);
-
+-
- /* Egress table 8: Egress port security - IP (priorities 90 and 80)
- * if port security enabled.
- *
@@ -10579,6 +11901,19 @@ index 5a3227568..e78a71728 100644
- if (!op->nbsp || lsp_is_external(op->nbsp)) {
- continue;
- }
++ if (op->nbsp && !op->n_ps_addrs && !strcmp(op->nbsp->type, "") &&
++ op->has_unknown) {
++ ds_clear(match);
++ ds_clear(actions);
++ ds_put_format(match, "inport == %s", op->json_key);
++ ds_put_format(actions, REGBIT_LKUP_FDB
++ " = lookup_fdb(inport, eth.src); next;");
++ ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_LOOKUP_FDB, 100,
++ ds_cstr(match), ds_cstr(actions),
++ &op->nbsp->header_);
+
+- ds_clear(&actions);
+- ds_clear(&match);
+ ds_put_cstr(match, " && "REGBIT_LKUP_FDB" == 0");
+ ds_clear(actions);
+ ds_put_cstr(actions, "put_fdb(inport, eth.src); next;");
@@ -10587,9 +11922,7 @@ index 5a3227568..e78a71728 100644
+ &op->nbsp->header_);
+ }
+}
-
-- ds_clear(&actions);
-- ds_clear(&match);
++
+static void
+build_lswitch_learn_fdb_od(
+ struct ovn_datapath *od, struct hmap *lflows)
@@ -10619,11 +11952,11 @@ index 5a3227568..e78a71728 100644
+{
+
+ if (op->nbsp && (!lsp_is_external(op->nbsp))) {
-+
-+ ds_clear(actions);
-+ ds_clear(match);
- ds_put_format(&match, "outport == %s", op->json_key);
++ ds_clear(actions);
++ ds_clear(match);
++
+ ds_put_format(match, "outport == %s", op->json_key);
if (lsp_is_enabled(op->nbsp)) {
build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
@@ -10651,7 +11984,7 @@ index 5a3227568..e78a71728 100644
&op->nbsp->header_);
}
-@@ -4956,23 +5139,20 @@ build_lswitch_output_port_sec(struct hmap *ports, struct hmap *datapaths,
+@@ -4956,23 +5268,20 @@ build_lswitch_output_port_sec(struct hmap *ports, struct hmap *datapaths,
build_port_security_ip(P_OUT, op, lflows, &op->nbsp->header_);
}
}
@@ -10684,7 +12017,7 @@ index 5a3227568..e78a71728 100644
}
static void
-@@ -5008,8 +5188,6 @@ skip_port_from_conntrack(struct ovn_datapath *od, struct ovn_port *op,
+@@ -5008,8 +5317,6 @@ skip_port_from_conntrack(struct ovn_datapath *od, struct ovn_port *op,
static void
build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
{
@@ -10693,7 +12026,7 @@ index 5a3227568..e78a71728 100644
/* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
* allowed by default. */
ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
-@@ -5024,7 +5202,7 @@ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
+@@ -5024,7 +5331,7 @@ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
/* If there are any stateful ACL rules in this datapath, we must
* send all IP packets through the conntrack action, which handles
* defragmentation, in order to match L4 headers. */
@@ -10702,7 +12035,7 @@ index 5a3227568..e78a71728 100644
for (size_t i = 0; i < od->n_router_ports; i++) {
skip_port_from_conntrack(od, od->router_ports[i],
S_SWITCH_IN_PRE_ACL, S_SWITCH_OUT_PRE_ACL,
-@@ -5084,7 +5262,10 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows,
+@@ -5084,7 +5391,10 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows,
struct nbrec_load_balancer *lb,
int pl, struct shash *meter_groups)
{
@@ -10714,7 +12047,7 @@ index 5a3227568..e78a71728 100644
return;
}
-@@ -5124,7 +5305,7 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows,
+@@ -5124,7 +5434,7 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows,
}
static bool
@@ -10723,7 +12056,7 @@ index 5a3227568..e78a71728 100644
{
for (int i = 0; i < od->nbs->n_load_balancer; i++) {
struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i];
-@@ -5190,8 +5371,8 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
+@@ -5190,8 +5500,8 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
vip_configured = (vip_configured || lb->n_vips);
}
@@ -10734,7 +12067,7 @@ index 5a3227568..e78a71728 100644
*
* Send all the packets to conntrack in the ingress pipeline if the
* logical switch has a load balancer with VIP configured. Earlier
-@@ -5221,9 +5402,9 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
+@@ -5221,9 +5531,9 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
*/
if (vip_configured) {
ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
@@ -10746,7 +12079,7 @@ index 5a3227568..e78a71728 100644
}
}
-@@ -5235,10 +5416,46 @@ build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
+@@ -5235,10 +5545,46 @@ build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
@@ -10793,7 +12126,7 @@ index 5a3227568..e78a71728 100644
ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
}
-@@ -5267,6 +5484,17 @@ build_acl_hints(struct ovn_datapath *od, struct hmap *lflows)
+@@ -5267,6 +5613,17 @@ build_acl_hints(struct ovn_datapath *od, struct hmap *lflows)
for (size_t i = 0; i < ARRAY_SIZE(stages); i++) {
enum ovn_stage stage = stages[i];
@@ -10811,7 +12144,7 @@ index 5a3227568..e78a71728 100644
/* New, not already established connections, may hit either allow
* or drop ACLs. For allow ACLs, the connection must also be committed
* to conntrack so we set REGBIT_ACL_HINT_ALLOW_NEW.
-@@ -5327,9 +5555,6 @@ build_acl_hints(struct ovn_datapath *od, struct hmap *lflows)
+@@ -5327,9 +5684,6 @@ build_acl_hints(struct ovn_datapath *od, struct hmap *lflows)
ovn_lflow_add(lflows, od, stage, 1, "ct.est && ct_label.blocked == 0",
REGBIT_ACL_HINT_BLOCK " = 1; "
"next;");
@@ -10821,7 +12154,7 @@ index 5a3227568..e78a71728 100644
}
}
-@@ -5661,13 +5886,22 @@ static void
+@@ -5661,13 +6015,22 @@ static void
build_acls(struct ovn_datapath *od, struct hmap *lflows,
struct hmap *port_groups, const struct shash *meter_groups)
{
@@ -10848,7 +12181,7 @@ index 5a3227568..e78a71728 100644
if (has_stateful) {
/* Ingress and Egress ACL Table (Priority 1).
-@@ -5698,21 +5932,23 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
+@@ -5698,21 +6061,23 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
"ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
REGBIT_CONNTRACK_COMMIT" = 1; next;");
@@ -10880,7 +12213,7 @@ index 5a3227568..e78a71728 100644
*
* Allow reply traffic that is part of an established
* conntrack entry that has not been marked for deletion
-@@ -5721,14 +5957,15 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
+@@ -5721,14 +6086,15 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
* direction to hit the currently defined policy from ACLs.
*
* This is enforced at a higher priority than ACLs can be defined. */
@@ -10904,7 +12237,7 @@ index 5a3227568..e78a71728 100644
/* Ingress and Egress ACL Table (Priority 65535).
*
-@@ -5741,21 +5978,21 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
+@@ -5741,21 +6107,21 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
* a dynamically negotiated FTP data channel), but will allow
* related traffic such as an ICMP Port Unreachable through
* that's generated from a non-listening UDP port. */
@@ -10937,7 +12270,7 @@ index 5a3227568..e78a71728 100644
"nd || nd_ra || nd_rs || mldv1 || mldv2", "next;");
}
-@@ -5842,15 +6079,18 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
+@@ -5842,15 +6208,18 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
actions);
}
@@ -10964,7 +12297,7 @@ index 5a3227568..e78a71728 100644
}
static void
-@@ -5914,37 +6154,6 @@ build_qos(struct ovn_datapath *od, struct hmap *lflows) {
+@@ -5914,37 +6283,6 @@ build_qos(struct ovn_datapath *od, struct hmap *lflows) {
}
}
@@ -11002,7 +12335,7 @@ index 5a3227568..e78a71728 100644
static void
build_lb_rules(struct ovn_datapath *od, struct hmap *lflows,
struct ovn_northd_lb *lb)
-@@ -5953,11 +6162,20 @@ build_lb_rules(struct ovn_datapath *od, struct hmap *lflows,
+@@ -5953,11 +6291,20 @@ build_lb_rules(struct ovn_datapath *od, struct hmap *lflows,
struct ovn_lb_vip *lb_vip = &lb->vips[i];
struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[i];
@@ -11023,7 +12356,7 @@ index 5a3227568..e78a71728 100644
}
const char *proto = NULL;
-@@ -5970,12 +6188,17 @@ build_lb_rules(struct ovn_datapath *od, struct hmap *lflows,
+@@ -5970,12 +6317,17 @@ build_lb_rules(struct ovn_datapath *od, struct hmap *lflows,
proto = "sctp";
}
}
@@ -11044,7 +12377,7 @@ index 5a3227568..e78a71728 100644
struct ds match = DS_EMPTY_INITIALIZER;
ds_put_format(&match, "ct.new && %s.dst == %s", ip_match,
-@@ -6015,18 +6238,6 @@ build_stateful(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs)
+@@ -6015,18 +6367,6 @@ build_stateful(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs)
REGBIT_CONNTRACK_COMMIT" == 1",
"ct_commit { ct_label.blocked = 0; }; next;");
@@ -11063,7 +12396,7 @@ index 5a3227568..e78a71728 100644
/* Load balancing rules for new connections get committed to conntrack
* table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
* a higher priority rule for load balancing below also commits the
-@@ -6051,40 +6262,50 @@ build_lb_hairpin(struct ovn_datapath *od, struct hmap *lflows)
+@@ -6051,40 +6391,50 @@ build_lb_hairpin(struct ovn_datapath *od, struct hmap *lflows)
ovn_lflow_add(lflows, od, S_SWITCH_IN_NAT_HAIRPIN, 0, "1", "next;");
ovn_lflow_add(lflows, od, S_SWITCH_IN_HAIRPIN, 0, "1", "next;");
@@ -11134,7 +12467,263 @@ index 5a3227568..e78a71728 100644
}
}
-@@ -6754,9 +6975,7 @@ is_vlan_transparent(const struct ovn_datapath *od)
+@@ -6371,44 +6721,41 @@ build_lswitch_rport_arp_req_self_orig_flow(struct ovn_port *op,
+ ds_destroy(&match);
+ }
+
+-/*
+- * Ingress table 19: Flows that forward ARP/ND requests only to the routers
+- * that own the addresses. Other ARP/ND packets are still flooded in the
+- * switching domain as regular broadcast.
+- */
+ static void
+-build_lswitch_rport_arp_req_flow_for_ip(struct sset *ips,
+- int addr_family,
+- struct ovn_port *patch_op,
+- struct ovn_datapath *od,
+- uint32_t priority,
+- struct hmap *lflows,
+- const struct ovsdb_idl_row *stage_hint)
++arp_nd_ns_match(struct ds *ips, int addr_family, struct ds *match)
+ {
+- struct ds match = DS_EMPTY_INITIALIZER;
+- struct ds actions = DS_EMPTY_INITIALIZER;
+-
+ /* Packets received from VXLAN tunnels have already been through the
+ * router pipeline so we should skip them. Normally this is done by the
+ * multicast_group implementation (VXLAN packets skip table 32 which
+ * delivers to patch ports) but we're bypassing multicast_groups.
+ */
+- ds_put_cstr(&match, FLAGBIT_NOT_VXLAN " && ");
++ ds_put_cstr(match, FLAGBIT_NOT_VXLAN " && ");
+
+ if (addr_family == AF_INET) {
+- ds_put_cstr(&match, "arp.op == 1 && arp.tpa == { ");
++ ds_put_cstr(match, "arp.op == 1 && arp.tpa == {");
+ } else {
+- ds_put_cstr(&match, "nd_ns && nd.target == { ");
++ ds_put_cstr(match, "nd_ns && nd.target == {");
+ }
+
+- const char *ip_address;
+- SSET_FOR_EACH (ip_address, ips) {
+- ds_put_format(&match, "%s, ", ip_address);
+- }
++ ds_put_cstr(match, ds_cstr_ro(ips));
++ ds_put_cstr(match, "}");
++}
++
++/*
++ * Ingress table 19: Flows that forward ARP/ND requests only to the routers
++ * that own the addresses. Other ARP/ND packets are still flooded in the
++ * switching domain as regular broadcast.
++ */
++static void
++build_lswitch_rport_arp_req_flow_for_reachable_ip(struct ds *ips,
++ int addr_family, struct ovn_port *patch_op, struct ovn_datapath *od,
++ uint32_t priority, struct hmap *lflows,
++ const struct ovsdb_idl_row *stage_hint)
++{
++ struct ds match = DS_EMPTY_INITIALIZER;
++ struct ds actions = DS_EMPTY_INITIALIZER;
+
+- ds_chomp(&match, ' ');
+- ds_chomp(&match, ',');
+- ds_put_cstr(&match, "}");
++ arp_nd_ns_match(ips, addr_family, &match);
+
+ /* Send a the packet to the router pipeline. If the switch has non-router
+ * ports then flood it there as well.
+@@ -6431,6 +6778,30 @@ build_lswitch_rport_arp_req_flow_for_ip(struct sset *ips,
+ ds_destroy(&actions);
+ }
+
++/*
++ * Ingress table 19: Flows that forward ARP/ND requests for "unreachable" IPs
++ * (NAT or load balancer IPs configured on a router that are outside the
++ * router's configured subnets).
++ * These ARP/ND packets are flooded in the switching domain as regular
++ * broadcast.
++ */
++static void
++build_lswitch_rport_arp_req_flow_for_unreachable_ip(struct ds *ips,
++ int addr_family, struct ovn_datapath *od, uint32_t priority,
++ struct hmap *lflows, const struct ovsdb_idl_row *stage_hint)
++{
++ struct ds match = DS_EMPTY_INITIALIZER;
++
++ arp_nd_ns_match(ips, addr_family, &match);
++
++ ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_L2_LKUP,
++ priority, ds_cstr(&match),
++ "outport = \""MC_FLOOD"\"; output;",
++ stage_hint);
++
++ ds_destroy(&match);
++}
++
+ /*
+ * Ingress table 19: Flows that forward ARP/ND requests only to the routers
+ * that own the addresses.
+@@ -6457,38 +6828,39 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op,
+ * router port.
+ * Priority: 80.
+ */
+- struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
+- struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
+-
+- get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6);
++ struct ds ips_v4_match_reachable = DS_EMPTY_INITIALIZER;
++ struct ds ips_v6_match_reachable = DS_EMPTY_INITIALIZER;
++ struct ds ips_v4_match_unreachable = DS_EMPTY_INITIALIZER;
++ struct ds ips_v6_match_unreachable = DS_EMPTY_INITIALIZER;
+
+ const char *ip_addr;
+- const char *ip_addr_next;
+- SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &all_ips_v4) {
++ SSET_FOR_EACH (ip_addr, &op->od->lb_ips_v4) {
+ ovs_be32 ipv4_addr;
+
+ /* Check if the ovn port has a network configured on which we could
+ * expect ARP requests for the LB VIP.
+ */
+- if (ip_parse(ip_addr, &ipv4_addr) &&
+- lrouter_port_ipv4_reachable(op, ipv4_addr)) {
+- continue;
++ if (ip_parse(ip_addr, &ipv4_addr)) {
++ if (lrouter_port_ipv4_reachable(op, ipv4_addr)) {
++ ds_put_format(&ips_v4_match_reachable, "%s, ", ip_addr);
++ } else {
++ ds_put_format(&ips_v4_match_unreachable, "%s, ", ip_addr);
++ }
+ }
+-
+- sset_delete(&all_ips_v4, SSET_NODE_FROM_NAME(ip_addr));
+ }
+- SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &all_ips_v6) {
++ SSET_FOR_EACH (ip_addr, &op->od->lb_ips_v6) {
+ struct in6_addr ipv6_addr;
+
+ /* Check if the ovn port has a network configured on which we could
+ * expect NS requests for the LB VIP.
+ */
+- if (ipv6_parse(ip_addr, &ipv6_addr) &&
+- lrouter_port_ipv6_reachable(op, &ipv6_addr)) {
+- continue;
++ if (ipv6_parse(ip_addr, &ipv6_addr)) {
++ if (lrouter_port_ipv6_reachable(op, &ipv6_addr)) {
++ ds_put_format(&ips_v6_match_reachable, "%s, ", ip_addr);
++ } else {
++ ds_put_format(&ips_v6_match_unreachable, "%s, ", ip_addr);
++ }
+ }
+-
+- sset_delete(&all_ips_v6, SSET_NODE_FROM_NAME(ip_addr));
+ }
+
+ for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
+@@ -6509,39 +6881,67 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op,
+ if (nat_entry_is_v6(nat_entry)) {
+ struct in6_addr *addr = &nat_entry->ext_addrs.ipv6_addrs[0].addr;
+
+- if (lrouter_port_ipv6_reachable(op, addr)) {
+- sset_add(&all_ips_v6, nat->external_ip);
++ if (!sset_contains(&op->od->lb_ips_v6, nat->external_ip)) {
++ if (lrouter_port_ipv6_reachable(op, addr)) {
++ ds_put_format(&ips_v6_match_reachable, "%s, ",
++ nat->external_ip);
++ } else {
++ ds_put_format(&ips_v6_match_unreachable, "%s, ",
++ nat->external_ip);
++ }
+ }
+ } else {
+ ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr;
+-
+- if (lrouter_port_ipv4_reachable(op, addr)) {
+- sset_add(&all_ips_v4, nat->external_ip);
++ if (!sset_contains(&op->od->lb_ips_v4, nat->external_ip)) {
++ if (lrouter_port_ipv4_reachable(op, addr)) {
++ ds_put_format(&ips_v4_match_reachable, "%s, ",
++ nat->external_ip);
++ } else {
++ ds_put_format(&ips_v4_match_unreachable, "%s, ",
++ nat->external_ip);
++ }
+ }
+ }
+ }
+
+ for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+- sset_add(&all_ips_v4, op->lrp_networks.ipv4_addrs[i].addr_s);
++ ds_put_format(&ips_v4_match_reachable, "%s, ",
++ op->lrp_networks.ipv4_addrs[i].addr_s);
+ }
+ for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+- sset_add(&all_ips_v6, op->lrp_networks.ipv6_addrs[i].addr_s);
+- }
+-
+- if (!sset_is_empty(&all_ips_v4)) {
+- build_lswitch_rport_arp_req_flow_for_ip(&all_ips_v4, AF_INET, sw_op,
+- sw_od, 80, lflows,
+- stage_hint);
++ ds_put_format(&ips_v6_match_reachable, "%s, ",
++ op->lrp_networks.ipv6_addrs[i].addr_s);
++ }
++
++ if (ds_last(&ips_v4_match_reachable) != EOF) {
++ ds_chomp(&ips_v4_match_reachable, ' ');
++ ds_chomp(&ips_v4_match_reachable, ',');
++ build_lswitch_rport_arp_req_flow_for_reachable_ip(
++ &ips_v4_match_reachable, AF_INET, sw_op, sw_od, 80, lflows,
++ stage_hint);
++ }
++ if (ds_last(&ips_v6_match_reachable) != EOF) {
++ ds_chomp(&ips_v6_match_reachable, ' ');
++ ds_chomp(&ips_v6_match_reachable, ',');
++ build_lswitch_rport_arp_req_flow_for_reachable_ip(
++ &ips_v6_match_reachable, AF_INET6, sw_op, sw_od, 80, lflows,
++ stage_hint);
++ }
++
++ if (ds_last(&ips_v4_match_unreachable) != EOF) {
++ ds_chomp(&ips_v4_match_unreachable, ' ');
++ ds_chomp(&ips_v4_match_unreachable, ',');
++ build_lswitch_rport_arp_req_flow_for_unreachable_ip(
++ &ips_v4_match_unreachable, AF_INET, sw_od, 90, lflows,
++ stage_hint);
++ }
++ if (ds_last(&ips_v6_match_unreachable) != EOF) {
++ ds_chomp(&ips_v6_match_unreachable, ' ');
++ ds_chomp(&ips_v6_match_unreachable, ',');
++ build_lswitch_rport_arp_req_flow_for_unreachable_ip(
++ &ips_v6_match_unreachable, AF_INET6, sw_od, 90, lflows,
++ stage_hint);
+ }
+- if (!sset_is_empty(&all_ips_v6)) {
+- build_lswitch_rport_arp_req_flow_for_ip(&all_ips_v6, AF_INET6, sw_op,
+- sw_od, 80, lflows,
+- stage_hint);
+- }
+-
+- sset_destroy(&all_ips_v4);
+- sset_destroy(&all_ips_v6);
+-
+ /* Self originated ARP requests/ND need to be flooded as usual.
+ *
+ * However, if the switch doesn't have any non-router ports we shouldn't
+@@ -6552,6 +6952,10 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op,
+ if (sw_od->n_router_ports != sw_od->nbs->n_ports) {
+ build_lswitch_rport_arp_req_self_orig_flow(op, 75, sw_od, lflows);
+ }
++ ds_destroy(&ips_v4_match_reachable);
++ ds_destroy(&ips_v6_match_reachable);
++ ds_destroy(&ips_v4_match_unreachable);
++ ds_destroy(&ips_v6_match_unreachable);
+ }
+
+ static void
+@@ -6754,9 +7158,7 @@ is_vlan_transparent(const struct ovn_datapath *od)
}
static void
@@ -11145,7 +12734,7 @@ index 5a3227568..e78a71728 100644
{
/* This flow table structure is documented in ovn-northd(8), so please
* update ovn-northd.8.xml if you change anything. */
-@@ -6765,49 +6984,127 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -6765,32 +7167,110 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
struct ds actions = DS_EMPTY_INITIALIZER;
struct ovn_datapath *od;
@@ -11161,13 +12750,6 @@ index 5a3227568..e78a71728 100644
continue;
}
-- if ((!strcmp(op->nbsp->type, "localnet")) ||
-- (!strcmp(op->nbsp->type, "vtep"))) {
-- ds_clear(&match);
-- ds_put_format(&match, "inport == %s", op->json_key);
-- ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP,
-- 100, ds_cstr(&match), "next;",
-- &op->nbsp->header_);
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
+ "outport = get_fdb(eth.dst); next;");
+
@@ -11178,38 +12760,15 @@ index 5a3227568..e78a71728 100644
+ } else {
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_UNKNOWN, 50,
+ "outport == \"none\"", "drop;");
- }
++ }
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_UNKNOWN, 0, "1",
+ "output;");
- }
-
-- /* Ingress table 13: ARP/ND responder, reply for known IPs.
-- * (priority 50). */
-- HMAP_FOR_EACH (op, key_node, ports) {
-- if (!op->nbsp) {
-- continue;
-- }
++ }
++
+ ds_destroy(&match);
+ ds_destroy(&actions);
+}
-
-- if (!strcmp(op->nbsp->type, "virtual")) {
-- /* Handle
-- * - GARPs for virtual ip which belongs to a logical port
-- * of type 'virtual' and bind that port.
-- *
-- * - ARP reply from the virtual ip which belongs to a logical
-- * port of type 'virtual' and bind that port.
-- * */
-- ovs_be32 ip;
-- const char *virtual_ip = smap_get(&op->nbsp->options,
-- "virtual-ip");
-- const char *virtual_parents = smap_get(&op->nbsp->options,
-- "virtual-parents");
-- if (!virtual_ip || !virtual_parents ||
-- !ip_parse(virtual_ip, &ip)) {
-- continue;
-- }
++
+/* Build pre-ACL and ACL tables for both ingress and egress.
+ * Ingress tables 3 through 10. Egress tables 0 through 7. */
+static void
@@ -11268,17 +12827,27 @@ index 5a3227568..e78a71728 100644
+ struct ds *match)
+{
+ if (op->nbsp) {
-+ if ((!strcmp(op->nbsp->type, "localnet")) ||
-+ (!strcmp(op->nbsp->type, "vtep"))) {
+ if ((!strcmp(op->nbsp->type, "localnet")) ||
+ (!strcmp(op->nbsp->type, "vtep"))) {
+- ds_clear(&match);
+- ds_put_format(&match, "inport == %s", op->json_key);
+ ds_clear(match);
+ ds_put_format(match, "inport == %s", op->json_key);
-+ ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP,
+ ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP,
+- 100, ds_cstr(&match), "next;",
+ 100, ds_cstr(match), "next;",
-+ &op->nbsp->header_);
-+ }
-+ }
+ &op->nbsp->header_);
+ }
+ }
+}
-+
+
+- /* Ingress table 13: ARP/ND responder, reply for known IPs.
+- * (priority 50). */
+- HMAP_FOR_EACH (op, key_node, ports) {
+- if (!op->nbsp) {
+- continue;
+- }
+-
+/* Ingress table 13: ARP/ND responder, reply for known IPs.
+ * (priority 50). */
+static void
@@ -11289,27 +12858,19 @@ index 5a3227568..e78a71728 100644
+ struct ds *match)
+{
+ if (op->nbsp) {
-+ if (!strcmp(op->nbsp->type, "virtual")) {
-+ /* Handle
-+ * - GARPs for virtual ip which belongs to a logical port
-+ * of type 'virtual' and bind that port.
-+ *
-+ * - ARP reply from the virtual ip which belongs to a logical
-+ * port of type 'virtual' and bind that port.
-+ * */
-+ ovs_be32 ip;
-+ const char *virtual_ip = smap_get(&op->nbsp->options,
-+ "virtual-ip");
-+ const char *virtual_parents = smap_get(&op->nbsp->options,
-+ "virtual-parents");
-+ if (!virtual_ip || !virtual_parents ||
-+ !ip_parse(virtual_ip, &ip)) {
+ if (!strcmp(op->nbsp->type, "virtual")) {
+ /* Handle
+ * - GARPs for virtual ip which belongs to a logical port
+@@ -6806,7 +7286,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ "virtual-parents");
+ if (!virtual_ip || !virtual_parents ||
+ !ip_parse(virtual_ip, &ip)) {
+- continue;
+ return;
-+ }
+ }
char *tokstr = xstrdup(virtual_parents);
- char *save_ptr = NULL;
-@@ -6821,21 +7118,21 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -6821,21 +7301,21 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
continue;
}
@@ -11336,7 +12897,7 @@ index 5a3227568..e78a71728 100644
&vp->nbsp->header_);
}
-@@ -6850,20 +7147,20 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -6850,20 +7330,20 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
if (check_lsp_is_up &&
!lsp_is_up(op->nbsp) && !lsp_is_router(op->nbsp) &&
strcmp(op->nbsp->type, "localport")) {
@@ -11363,7 +12924,7 @@ index 5a3227568..e78a71728 100644
"eth.dst = eth.src; "
"eth.src = %s; "
"arp.op = 2; /* ARP reply */ "
-@@ -6878,8 +7175,8 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -6878,8 +7358,8 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
op->lsp_addrs[i].ipv4_addrs[j].addr_s);
ovn_lflow_add_with_hint(lflows, op->od,
S_SWITCH_IN_ARP_ND_RSP, 50,
@@ -11374,7 +12935,7 @@ index 5a3227568..e78a71728 100644
&op->nbsp->header_);
/* Do not reply to an ARP request from the port that owns
-@@ -6894,10 +7191,10 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -6894,10 +7374,10 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
* address is intended to detect situations where the
* network is not working as configured, so dropping the
* request would frustrate that intent.) */
@@ -11387,7 +12948,7 @@ index 5a3227568..e78a71728 100644
&op->nbsp->header_);
}
-@@ -6905,15 +7202,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -6905,15 +7385,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
* unicast IPv6 address and its all-nodes multicast address,
* but always respond with the unicast IPv6 address. */
for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
@@ -11407,7 +12968,7 @@ index 5a3227568..e78a71728 100644
"%s { "
"eth.src = %s; "
"ip6.src = %s; "
-@@ -6930,93 +7227,99 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -6930,93 +7410,99 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
op->lsp_addrs[i].ea_s);
ovn_lflow_add_with_hint(lflows, op->od,
S_SWITCH_IN_ARP_ND_RSP, 50,
@@ -11564,7 +13125,7 @@ index 5a3227568..e78a71728 100644
}
bool is_external = lsp_is_external(op->nbsp);
-@@ -7024,7 +7327,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -7024,7 +7510,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
!op->nbsp->ha_chassis_group)) {
/* If it's an external port and there are no localnet ports
* and if it doesn't belong to an HA chassis group ignore it. */
@@ -11573,7 +13134,7 @@ index 5a3227568..e78a71728 100644
}
for (size_t i = 0; i < op->n_lsp_addrs; i++) {
-@@ -7047,14 +7350,35 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -7047,14 +7533,35 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
}
}
}
@@ -11616,7 +13177,7 @@ index 5a3227568..e78a71728 100644
ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 100,
"udp.dst == 53",
-@@ -7071,47 +7395,33 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -7071,47 +7578,33 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100,
dns_match, dns_action);
}
@@ -11683,7 +13244,7 @@ index 5a3227568..e78a71728 100644
ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 110,
"eth.dst == $svc_monitor_mac",
-@@ -7120,22 +7430,22 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -7120,22 +7613,22 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
struct mcast_switch_info *mcast_sw_info = &od->mcast_info.sw;
if (mcast_sw_info->enabled) {
@@ -11711,7 +13272,7 @@ index 5a3227568..e78a71728 100644
/* Flood all IP multicast traffic destined to 224.0.0.X to all
* ports - RFC 4541, section 2.1.2, item 2.
-@@ -7157,10 +7467,10 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -7157,10 +7650,10 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
* handled by the L2 multicast flow.
*/
if (!mcast_sw_info->flood_unregistered) {
@@ -11724,7 +13285,7 @@ index 5a3227568..e78a71728 100644
"clone { "
"outport = \""MC_MROUTER_FLOOD"\"; "
"output; "
-@@ -7168,7 +7478,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -7168,7 +7661,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
}
if (mcast_sw_info->flood_static) {
@@ -11733,7 +13294,7 @@ index 5a3227568..e78a71728 100644
}
/* Explicitly drop the traffic if relay or static flooding
-@@ -7176,30 +7486,33 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -7176,30 +7669,33 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
*/
if (!mcast_sw_info->flood_relay &&
!mcast_sw_info->flood_static) {
@@ -11778,7 +13339,7 @@ index 5a3227568..e78a71728 100644
struct mcast_switch_info *mcast_sw_info =
&igmp_group->datapath->mcast_info.sw;
-@@ -7211,57 +7524,62 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -7211,57 +7707,62 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
ovs_be32 group_address =
in6_addr_get_mapped_ipv4(&igmp_group->address);
if (ip_is_local_multicast(group_address)) {
@@ -11856,7 +13417,7 @@ index 5a3227568..e78a71728 100644
/* For ports connected to logical routers add flows to bypass the
* broadcast flooding of ARP/ND requests in table 19. We direct the
-@@ -7279,15 +7597,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -7279,15 +7780,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
struct eth_addr mac;
if (ovs_scan(op->nbsp->addresses[i],
ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
@@ -11878,7 +13439,7 @@ index 5a3227568..e78a71728 100644
&op->nbsp->header_);
} else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
if (lsp_is_enabled(op->nbsp)) {
-@@ -7300,15 +7618,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -7300,15 +7801,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
continue;
}
@@ -11900,7 +13461,7 @@ index 5a3227568..e78a71728 100644
&op->nbsp->header_);
} else if (!strcmp(op->nbsp->addresses[i], "router")) {
if (!op->peer || !op->peer->nbrp
-@@ -7316,8 +7634,8 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -7316,8 +7817,8 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
continue;
}
@@ -11911,7 +13472,7 @@ index 5a3227568..e78a71728 100644
ETH_ADDR_ARGS(mac));
if (op->peer->od->l3dgw_port
&& op->peer->od->l3redirect_port
-@@ -7343,16 +7661,16 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -7343,16 +7844,16 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
}
if (add_chassis_resident_check) {
@@ -11932,7 +13493,7 @@ index 5a3227568..e78a71728 100644
&op->nbsp->header_);
/* Add ethernet addresses specified in NAT rules on
-@@ -7366,19 +7684,19 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -7366,19 +7867,19 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
&& nat->logical_port && nat->external_mac
&& eth_addr_from_string(nat->external_mac, &mac)) {
@@ -11958,7 +13519,7 @@ index 5a3227568..e78a71728 100644
&op->nbsp->header_);
}
}
-@@ -7392,71 +7710,202 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -7392,94 +7893,225 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
}
}
}
@@ -11978,20 +13539,20 @@ index 5a3227568..e78a71728 100644
- }
- }
+ const struct sbrec_bfd *sb_bt;
-
-- build_lswitch_output_port_sec(ports, datapaths, lflows);
++
+ bool ref;
+};
-- ds_destroy(&match);
-- ds_destroy(&actions);
+- build_lswitch_output_port_sec(ports, datapaths, lflows);
+static struct bfd_entry *
+bfd_port_lookup(struct hmap *bfd_map, const char *logical_port,
+ const char *dst_ip)
+{
+ struct bfd_entry *bfd_e;
+ uint32_t hash;
-+
+
+- ds_destroy(&match);
+- ds_destroy(&actions);
+ hash = hash_string(dst_ip, 0);
+ hash = hash_string(logical_port, hash);
+ HMAP_FOR_EACH_WITH_HASH (bfd_e, hmap_node, hash, bfd_map) {
@@ -12094,9 +13655,9 @@ index 5a3227568..e78a71728 100644
+ int min_rx = nb_bt->n_min_rx ? nb_bt->min_rx[0] : BFD_DEF_MINRX;
+ if (min_rx != sb_bt->min_rx) {
+ sbrec_bfd_set_min_rx(sb_bt, min_rx);
-+ }
-+}
-+
+ }
+ }
+
+/* RFC 5881 section 4
+ * The source port MUST be in the range 49152 through 65535.
+ * The same UDP source port number MUST be used for all BFD
@@ -12106,20 +13667,36 @@ index 5a3227568..e78a71728 100644
+ */
+#define BFD_UDP_SRC_PORT_START 49152
+#define BFD_UDP_SRC_PORT_LEN (65535 - BFD_UDP_SRC_PORT_START)
-+
+
+-/* Returns a string of the IP address of the router port 'op' that
+- * overlaps with 'ip_s". If one is not found, returns NULL.
+- *
+- * The caller must not free the returned string. */
+-static const char *
+-find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
+static int bfd_get_unused_port(unsigned long *bfd_src_ports)
-+{
+ {
+- bool is_ipv4 = strchr(ip_s, '.') ? true : false;
+ int port;
-+
+
+- if (is_ipv4) {
+- ovs_be32 ip;
+ port = bitmap_scan(bfd_src_ports, 0, 0, BFD_UDP_SRC_PORT_LEN);
+ if (port == BFD_UDP_SRC_PORT_LEN) {
+ return -ENOSPC;
- }
++ }
+ bitmap_set1(bfd_src_ports, port);
-+
+
+- if (!ip_parse(ip_s, &ip)) {
+- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+- VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
+- return NULL;
+- }
+ return port + BFD_UDP_SRC_PORT_START;
- }
++}
+- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+- const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
+static void
+build_bfd_table(struct northd_context *ctx, struct hmap *bfd_connections,
+ struct hmap *ports)
@@ -12129,7 +13706,8 @@ index 5a3227568..e78a71728 100644
+ unsigned long *bfd_src_ports;
+ struct bfd_entry *bfd_e;
+ uint32_t hash;
-+
+
+- if (!((na->network ^ ip) & na->mask)) {
+ bfd_src_ports = bitmap_allocate(BFD_UDP_SRC_PORT_LEN);
+
+ SBREC_BFD_FOR_EACH (sb_bt, ctx->ovnsb_idl) {
@@ -12205,10 +13783,33 @@ index 5a3227568..e78a71728 100644
+
+ bitmap_free(bfd_src_ports);
+}
-
- /* Returns a string of the IP address of the router port 'op' that
- * overlaps with 'ip_s". If one is not found, returns NULL.
-@@ -7549,33 +7998,39 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
++
++/* Returns a string of the IP address of the router port 'op' that
++ * overlaps with 'ip_s". If one is not found, returns NULL.
++ *
++ * The caller must not free the returned string. */
++static const char *
++find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
++{
++ bool is_ipv4 = strchr(ip_s, '.') ? true : false;
++
++ if (is_ipv4) {
++ ovs_be32 ip;
++
++ if (!ip_parse(ip_s, &ip)) {
++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
++ VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
++ return NULL;
++ }
++
++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
++ const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
++
++ if (!((na->network ^ ip) & na->mask)) {
+ /* There should be only 1 interface that matches the
+ * supplied IP. Otherwise, it's a configuration error,
+ * because subnets of a router's interfaces should NOT
+@@ -7549,33 +8181,39 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
struct ds actions = DS_EMPTY_INITIALIZER;
if (!strcmp(rule->action, "reroute")) {
@@ -12253,7 +13854,7 @@ index 5a3227568..e78a71728 100644
is_ipv4 ? REG_SRC_IPV4 : REG_SRC_IPV6,
lrp_addr_s,
out_port->lrp_networks.ea_s,
-@@ -7588,7 +8043,7 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
+@@ -7588,7 +8226,7 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
if (pkt_mark) {
ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark);
}
@@ -12262,7 +13863,7 @@ index 5a3227568..e78a71728 100644
}
ds_put_format(&match, "%s", rule->match);
-@@ -7598,6 +8053,107 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
+@@ -7598,6 +8236,107 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
ds_destroy(&actions);
}
@@ -12370,7 +13971,7 @@ index 5a3227568..e78a71728 100644
struct parsed_route {
struct ovs_list list_node;
struct in6_addr prefix;
-@@ -7619,7 +8175,8 @@ route_hash(struct parsed_route *route)
+@@ -7619,7 +8358,8 @@ route_hash(struct parsed_route *route)
* Otherwise return NULL. */
static struct parsed_route *
parsed_routes_add(struct ovs_list *routes,
@@ -12380,7 +13981,7 @@ index 5a3227568..e78a71728 100644
{
/* Verify that the next hop is an IP address with an all-ones mask. */
struct in6_addr nexthop;
-@@ -7660,6 +8217,25 @@ parsed_routes_add(struct ovs_list *routes,
+@@ -7660,6 +8400,25 @@ parsed_routes_add(struct ovs_list *routes,
return NULL;
}
@@ -12406,7 +14007,16 @@ index 5a3227568..e78a71728 100644
struct parsed_route *pr = xzalloc(sizeof *pr);
pr->prefix = prefix;
pr->plen = plen;
-@@ -8102,16 +8678,15 @@ add_route(struct hmap *lflows, const struct ovn_port *op,
+@@ -7978,7 +8737,7 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
+ out_port->lrp_networks.ea_s,
+ IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
+ port_ip, out_port->json_key);
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 100,
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
+ ds_cstr(&match), ds_cstr(&actions),
+ &st_route->header_);
+
+@@ -8102,16 +8861,15 @@ add_route(struct hmap *lflows, const struct ovn_port *op,
build_route_match(op_inport, network_s, plen, is_src_route, is_ipv4,
&match, &priority);
@@ -12428,7 +14038,7 @@ index 5a3227568..e78a71728 100644
"%s = %s; "
"eth.src = %s; "
"outport = %s; "
-@@ -8121,11 +8696,20 @@ add_route(struct hmap *lflows, const struct ovn_port *op,
+@@ -8121,11 +8879,20 @@ add_route(struct hmap *lflows, const struct ovn_port *op,
lrp_addr_s,
op->lrp_networks.ea_s,
op->json_key);
@@ -12449,7 +14059,7 @@ index 5a3227568..e78a71728 100644
ds_destroy(&actions);
}
-@@ -8203,25 +8787,26 @@ get_force_snat_ip(struct ovn_datapath *od, const char *key_type,
+@@ -8203,25 +8970,26 @@ get_force_snat_ip(struct ovn_datapath *od, const char *key_type,
return false;
}
@@ -12483,7 +14093,7 @@ index 5a3227568..e78a71728 100644
const char *proto, struct nbrec_load_balancer *lb,
struct shash *meter_groups, struct sset *nat_entries)
{
-@@ -8230,9 +8815,10 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
+@@ -8230,9 +8998,10 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
/* A match and actions for new connections. */
char *new_match = xasprintf("ct.new && %s", ds_cstr(match));
@@ -12497,7 +14107,7 @@ index 5a3227568..e78a71728 100644
ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, priority,
new_match, new_actions, &lb->header_);
free(new_actions);
-@@ -8243,11 +8829,12 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
+@@ -8243,11 +9012,12 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
/* A match and actions for established connections. */
char *est_match = xasprintf("ct.est && %s", ds_cstr(match));
@@ -12514,7 +14124,7 @@ index 5a3227568..e78a71728 100644
} else {
ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, priority,
est_match, "ct_dnat;", &lb->header_);
-@@ -8320,11 +8907,13 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
+@@ -8320,11 +9090,13 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
ds_put_format(&undnat_match, ") && outport == %s && "
"is_chassis_resident(%s)", od->l3dgw_port->json_key,
od->l3redirect_port->json_key);
@@ -12531,7 +14141,7 @@ index 5a3227568..e78a71728 100644
} else {
ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 120,
ds_cstr(&undnat_match), "ct_dnat;",
-@@ -8334,6 +8923,105 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
+@@ -8334,6 +9106,105 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
ds_destroy(&undnat_match);
}
@@ -12637,7 +14247,7 @@ index 5a3227568..e78a71728 100644
#define ND_RA_MAX_INTERVAL_MAX 1800
#define ND_RA_MAX_INTERVAL_MIN 4
-@@ -8538,14 +9226,12 @@ build_lrouter_arp_flow(struct ovn_datapath *od, struct ovn_port *op,
+@@ -8538,14 +9409,12 @@ build_lrouter_arp_flow(struct ovn_datapath *od, struct ovn_port *op,
"arp.op = 2; /* ARP reply */ "
"arp.tha = arp.sha; "
"arp.sha = %s; "
@@ -12654,7 +14264,7 @@ index 5a3227568..e78a71728 100644
}
ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT, priority,
-@@ -8788,2375 +9474,2514 @@ build_lrouter_force_snat_flows(struct hmap *lflows, struct ovn_datapath *od,
+@@ -8788,2375 +9657,2549 @@ build_lrouter_force_snat_flows(struct hmap *lflows, struct ovn_datapath *od,
}
static void
@@ -12667,28 +14277,23 @@ index 5a3227568..e78a71728 100644
{
- /* This flow table structure is documented in ovn-northd(8), so please
- * update ovn-northd.8.xml if you change anything. */
-+ if (!op->nbrp || !op->peer || !op->od->lb_force_snat_router_ip) {
-+ return;
-+ }
-
+-
- struct ds match = DS_EMPTY_INITIALIZER;
- struct ds actions = DS_EMPTY_INITIALIZER;
-+ if (op->lrp_networks.n_ipv4_addrs) {
-+ ds_clear(match);
-+ ds_clear(actions);
-
+-
- struct ovn_datapath *od;
- struct ovn_port *op;
-+ ds_put_format(match, "inport == %s && ip4.dst == %s",
-+ op->json_key, op->lrp_networks.ipv4_addrs[0].addr_s);
-+ ovn_lflow_add(lflows, op->od, S_ROUTER_IN_UNSNAT, 110,
-+ ds_cstr(match), "ct_snat;");
++ if (!op->nbrp || !op->peer || !op->od->lb_force_snat_router_ip) {
++ return;
++ }
- HMAP_FOR_EACH (od, key_node, datapaths) {
- if (!od->nbr) {
- continue;
- }
++ if (op->lrp_networks.n_ipv4_addrs) {
+ ds_clear(match);
++ ds_clear(actions);
- /* Priority-90-92 flows handle ARP requests and ND packets. Most are
- * per logical port but DNAT addresses can be handled per datapath
@@ -12700,6 +14305,25 @@ index 5a3227568..e78a71728 100644
- */
- for (int i = 0; i < od->nbr->n_nat; i++) {
- struct ovn_nat *nat_entry = &od->nat_entries[i];
++ ds_put_format(match, "inport == %s && ip4.dst == %s",
++ op->json_key, op->lrp_networks.ipv4_addrs[0].addr_s);
++ ovn_lflow_add(lflows, op->od, S_ROUTER_IN_UNSNAT, 110,
++ ds_cstr(match), "ct_snat;");
+
+- /* Skip entries we failed to parse. */
+- if (!nat_entry_is_valid(nat_entry)) {
+- continue;
+- }
++ ds_clear(match);
+
+- /* Skip SNAT entries for now, we handle unique SNAT IPs separately
+- * below.
+- */
+- if (!strcmp(nat_entry->nb->type, "snat")) {
+- continue;
+- }
+- build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
+- }
+ /* Higher priority rules to force SNAT with the router port ip.
+ * This only takes effect when the packet has already been
+ * load balanced once. */
@@ -12725,24 +14349,24 @@ index 5a3227568..e78a71728 100644
+ ds_clear(match);
+ ds_clear(actions);
-- /* Skip entries we failed to parse. */
-- if (!nat_entry_is_valid(nat_entry)) {
-- continue;
-- }
+- /* Now handle SNAT entries too, one per unique SNAT IP. */
+- struct shash_node *snat_snode;
+- SHASH_FOR_EACH (snat_snode, &od->snat_ips) {
+- struct ovn_snat_ip *snat_ip = snat_snode->data;
+ ds_put_format(match, "inport == %s && ip6.dst == %s",
+ op->json_key, op->lrp_networks.ipv6_addrs[0].addr_s);
+ ovn_lflow_add(lflows, op->od, S_ROUTER_IN_UNSNAT, 110,
+ ds_cstr(match), "ct_snat;");
-- /* Skip SNAT entries for now, we handle unique SNAT IPs separately
-- * below.
-- */
-- if (!strcmp(nat_entry->nb->type, "snat")) {
+- if (ovs_list_is_empty(&snat_ip->snat_entries)) {
- continue;
- }
-- build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
+ ds_clear(match);
-+
+
+- struct ovn_nat *nat_entry =
+- CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
+- struct ovn_nat, ext_addr_list_node);
+- build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
+ /* Higher priority rules to force SNAT with the router port ip.
+ * This only takes effect when the packet has already been
+ * load balanced once. */
@@ -12760,13 +14384,14 @@ index 5a3227568..e78a71728 100644
+ "balancer", op->json_key,
+ op->lrp_networks.ipv6_addrs[0].addr_s);
}
-+ }
+ }
+}
-- /* Now handle SNAT entries too, one per unique SNAT IP. */
-- struct shash_node *snat_snode;
-- SHASH_FOR_EACH (snat_snode, &od->snat_ips) {
-- struct ovn_snat_ip *snat_ip = snat_snode->data;
+- /* Logical router ingress table 3: IP Input for IPv4. */
+- HMAP_FOR_EACH (op, key_node, ports) {
+- if (!op->nbrp) {
+- continue;
+- }
+static void
+build_lrouter_bfd_flows(struct hmap *lflows, struct ovn_port *op)
+{
@@ -12774,17 +14399,25 @@ index 5a3227568..e78a71728 100644
+ return;
+ }
-- if (ovs_list_is_empty(&snat_ip->snat_entries)) {
-- continue;
-- }
+- if (op->derived) {
+- /* No ingress packets are accepted on a chassisredirect
+- * port, so no need to program flows for that port. */
+- continue;
+- }
+ struct ds ip_list = DS_EMPTY_INITIALIZER;
+ struct ds match = DS_EMPTY_INITIALIZER;
-- struct ovn_nat *nat_entry =
-- CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
-- struct ovn_nat, ext_addr_list_node);
-- build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
-- }
+- if (op->lrp_networks.n_ipv4_addrs) {
+- /* L3 admission control: drop packets that originate from an
+- * IPv4 address owned by the router or a broadcast address
+- * known to the router (priority 100). */
+- ds_clear(&match);
+- ds_put_cstr(&match, "ip4.src == ");
+- op_put_v4_networks(&match, op, true);
+- ds_put_cstr(&match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
+- ds_cstr(&match), "drop;",
+- &op->nbrp->header_);
+ if (op->lrp_networks.n_ipv4_addrs) {
+ op_put_v4_networks(&ip_list, op, false);
+ ds_put_format(&match, "ip4.src == %s && udp.dst == 3784",
@@ -12798,15 +14431,20 @@ index 5a3227568..e78a71728 100644
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 110,
+ ds_cstr(&match), "handle_bfd_msg(); ",
+ &op->nbrp->header_);
- }
++ }
+ if (op->lrp_networks.n_ipv6_addrs) {
+ ds_clear(&ip_list);
+ ds_clear(&match);
-- /* Logical router ingress table 3: IP Input for IPv4. */
-- HMAP_FOR_EACH (op, key_node, ports) {
-- if (!op->nbrp) {
-- continue;
+- /* ICMP echo reply. These flows reply to ICMP echo requests
+- * received for the router's IP address. Since packets only
+- * get here as part of the logical router datapath, the inport
+- * (i.e. the incoming locally attached net) does not matter.
+- * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
+- ds_clear(&match);
+- ds_put_cstr(&match, "ip4.dst == ");
+- op_put_v4_networks(&match, op, false);
+- ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
+ op_put_v6_networks(&ip_list, op);
+ ds_put_format(&match, "ip6.src == %s && udp.dst == 3784",
+ ds_cstr(&ip_list));
@@ -12820,11 +14458,24 @@ index 5a3227568..e78a71728 100644
+ ds_cstr(&match), "handle_bfd_msg(); ",
+ &op->nbrp->header_);
+ }
-+
+
+- const char * icmp_actions = "ip4.dst <-> ip4.src; "
+- "ip.ttl = 255; "
+- "icmp4.type = 0; "
+- "flags.loopback = 1; "
+- "next; ";
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
+- ds_cstr(&match), icmp_actions,
+- &op->nbrp->header_);
+- }
+ ds_destroy(&ip_list);
+ ds_destroy(&match);
+}
-+
+
+- /* ICMP time exceeded */
+- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+- ds_clear(&match);
+- ds_clear(&actions);
+/* Logical router ingress Table 0: L2 Admission Control
+ * Generic admission control flows (without inport check).
+ */
@@ -12839,7 +14490,23 @@ index 5a3227568..e78a71728 100644
+ "vlan.present || eth.src[40]", "drop;");
+ }
+}
-+
+
+- ds_put_format(&match,
+- "inport == %s && ip4 && "
+- "ip.ttl == {0, 1} && !ip.later_frag", op->json_key);
+- ds_put_format(&actions,
+- "icmp4 {"
+- "eth.dst <-> eth.src; "
+- "icmp4.type = 11; /* Time exceeded */ "
+- "icmp4.code = 0; /* TTL exceeded in transit */ "
+- "ip4.dst = ip4.src; "
+- "ip4.src = %s; "
+- "ip.ttl = 255; "
+- "next; };",
+- op->lrp_networks.ipv4_addrs[i].addr_s);
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
+- ds_cstr(&match), ds_cstr(&actions),
+- &op->nbrp->header_);
+/* Logical router ingress Table 0: L2 Admission Control
+ * This table drops packets that the router shouldn’t see at all based
+ * on their Ethernet headers.
@@ -12856,26 +14523,71 @@ index 5a3227568..e78a71728 100644
+ return;
}
- if (op->derived) {
-- /* No ingress packets are accepted on a chassisredirect
-- * port, so no need to program flows for that port. */
-- continue;
+- /* ARP reply. These flows reply to ARP requests for the router's own
+- * IP address. */
+- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+- ds_clear(&match);
+- ds_put_format(&match, "arp.spa == %s/%u",
+- op->lrp_networks.ipv4_addrs[i].network_s,
+- op->lrp_networks.ipv4_addrs[i].plen);
+-
+- if (op->od->l3dgw_port && op->od->l3redirect_port && op->peer
+- && op->peer->od->n_localnet_ports) {
+- bool add_chassis_resident_check = false;
+- if (op == op->od->l3dgw_port) {
+- /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
+- * should only be sent from the gateway chassis, so that
+- * upstream MAC learning points to the gateway chassis.
+- * Also need to avoid generation of multiple ARP responses
+- * from different chassis. */
+- add_chassis_resident_check = true;
+- } else {
+- /* Check if the option 'reside-on-redirect-chassis'
+- * is set to true on the router port. If set to true
+- * and if peer's logical switch has a localnet port, it
+- * means the router pipeline for the packets from
+- * peer's logical switch is be run on the chassis
+- * hosting the gateway port and it should reply to the
+- * ARP requests for the router port IPs.
+- */
+- add_chassis_resident_check = smap_get_bool(
+- &op->nbrp->options,
+- "reside-on-redirect-chassis", false);
+- }
+-
+- if (add_chassis_resident_check) {
+- ds_put_format(&match, " && is_chassis_resident(%s)",
+- op->od->l3redirect_port->json_key);
+- }
+- }
+-
+- build_lrouter_arp_flow(op->od, op,
+- op->lrp_networks.ipv4_addrs[i].addr_s,
+- REG_INPORT_ETH_ADDR, &match, false, 90,
+- &op->nbrp->header_, lflows);
++ if (op->derived) {
+ /* No ingress packets should be received on a chassisredirect
+ * port. */
+ return;
}
-- if (op->lrp_networks.n_ipv4_addrs) {
-- /* L3 admission control: drop packets that originate from an
-- * IPv4 address owned by the router or a broadcast address
-- * known to the router (priority 100). */
+- /* A set to hold all load-balancer vips that need ARP responses. */
+- struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
+- struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
+- get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6);
+-
+- const char *ip_address;
+- SSET_FOR_EACH (ip_address, &all_ips_v4) {
- ds_clear(&match);
-- ds_put_cstr(&match, "ip4.src == ");
-- op_put_v4_networks(&match, op, true);
-- ds_put_cstr(&match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
-- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
-- ds_cstr(&match), "drop;",
-- &op->nbrp->header_);
+- if (op == op->od->l3dgw_port) {
+- ds_put_format(&match, "is_chassis_resident(%s)",
+- op->od->l3redirect_port->json_key);
+- }
+-
+- build_lrouter_arp_flow(op->od, op,
+- ip_address, REG_INPORT_ETH_ADDR,
+- &match, false, 90, NULL, lflows);
+- }
+ /* Store the ethernet address of the port receiving the packet.
+ * This will save us from having to match on inport further down in
+ * the pipeline.
@@ -12884,29 +14596,21 @@ index 5a3227568..e78a71728 100644
+ ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
+ op->lrp_networks.ea_s);
-- /* ICMP echo reply. These flows reply to ICMP echo requests
-- * received for the router's IP address. Since packets only
-- * get here as part of the logical router datapath, the inport
-- * (i.e. the incoming locally attached net) does not matter.
-- * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
+- SSET_FOR_EACH (ip_address, &all_ips_v6) {
- ds_clear(&match);
-- ds_put_cstr(&match, "ip4.dst == ");
-- op_put_v4_networks(&match, op, false);
-- ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
+- if (op == op->od->l3dgw_port) {
+- ds_put_format(&match, "is_chassis_resident(%s)",
+- op->od->l3redirect_port->json_key);
+- }
+ ds_clear(match);
+ ds_put_format(match, "eth.mcast && inport == %s", op->json_key);
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
+ ds_cstr(match), ds_cstr(actions),
+ &op->nbrp->header_);
-- const char * icmp_actions = "ip4.dst <-> ip4.src; "
-- "ip.ttl = 255; "
-- "icmp4.type = 0; "
-- "flags.loopback = 1; "
-- "next; ";
-- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
-- ds_cstr(&match), icmp_actions,
-- &op->nbrp->header_);
+- build_lrouter_nd_flow(op->od, op, "nd_na",
+- ip_address, NULL, REG_INPORT_ETH_ADDR,
+- &match, false, 90, NULL, lflows);
+ ds_clear(match);
+ ds_put_format(match, "eth.dst == %s && inport == %s",
+ op->lrp_networks.ea_s, op->json_key);
@@ -12923,46 +14627,76 @@ index 5a3227568..e78a71728 100644
+ }
+}
-- /* ICMP time exceeded */
-- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-- ds_clear(&match);
-- ds_clear(&actions);
-
-- ds_put_format(&match,
-- "inport == %s && ip4 && "
-- "ip.ttl == {0, 1} && !ip.later_frag", op->json_key);
-- ds_put_format(&actions,
-- "icmp4 {"
-- "eth.dst <-> eth.src; "
-- "icmp4.type = 11; /* Time exceeded */ "
-- "icmp4.code = 0; /* TTL exceeded in transit */ "
-- "ip4.dst = ip4.src; "
-- "ip4.src = %s; "
-- "ip.ttl = 255; "
-- "next; };",
-- op->lrp_networks.ipv4_addrs[i].addr_s);
-- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
-- ds_cstr(&match), ds_cstr(&actions),
-- &op->nbrp->header_);
-- }
-+/* Logical router ingress Table 1 and 2: Neighbor lookup and learning
-+ * lflows for logical routers. */
-+static void
-+build_neigh_learning_flows_for_lrouter(
-+ struct ovn_datapath *od, struct hmap *lflows,
-+ struct ds *match, struct ds *actions)
-+{
-+ if (od->nbr) {
-
-- /* ARP reply. These flows reply to ARP requests for the router's own
-- * IP address. */
-- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-- ds_clear(&match);
-- ds_put_format(&match, "arp.spa == %s/%u",
-- op->lrp_networks.ipv4_addrs[i].network_s,
-- op->lrp_networks.ipv4_addrs[i].plen);
+- sset_destroy(&all_ips_v4);
+- sset_destroy(&all_ips_v6);
+-
+- if (!smap_get(&op->od->nbr->options, "chassis")
+- && !op->od->l3dgw_port) {
+- /* UDP/TCP port unreachable. */
+- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+- ds_clear(&match);
+- ds_put_format(&match,
+- "ip4 && ip4.dst == %s && !ip.later_frag && udp",
+- op->lrp_networks.ipv4_addrs[i].addr_s);
+- const char *action = "icmp4 {"
+- "eth.dst <-> eth.src; "
+- "ip4.dst <-> ip4.src; "
+- "ip.ttl = 255; "
+- "icmp4.type = 3; "
+- "icmp4.code = 3; "
+- "next; };";
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+- 80, ds_cstr(&match), action,
+- &op->nbrp->header_);
+-
+- ds_clear(&match);
+- ds_put_format(&match,
+- "ip4 && ip4.dst == %s && !ip.later_frag && tcp",
+- op->lrp_networks.ipv4_addrs[i].addr_s);
+- action = "tcp_reset {"
+- "eth.dst <-> eth.src; "
+- "ip4.dst <-> ip4.src; "
+- "next; };";
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+- 80, ds_cstr(&match), action,
+- &op->nbrp->header_);
+
+- ds_clear(&match);
+- ds_put_format(&match,
+- "ip4 && ip4.dst == %s && !ip.later_frag",
+- op->lrp_networks.ipv4_addrs[i].addr_s);
+- action = "icmp4 {"
+- "eth.dst <-> eth.src; "
+- "ip4.dst <-> ip4.src; "
+- "ip.ttl = 255; "
+- "icmp4.type = 3; "
+- "icmp4.code = 2; "
+- "next; };";
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+- 70, ds_cstr(&match), action,
+- &op->nbrp->header_);
+- }
+- }
++/* Logical router ingress Table 1 and 2: Neighbor lookup and learning
++ * lflows for logical routers. */
++static void
++build_neigh_learning_flows_for_lrouter(
++ struct ovn_datapath *od, struct hmap *lflows,
++ struct ds *match, struct ds *actions)
++{
++ if (od->nbr) {
+
+- /* Drop IP traffic destined to router owned IPs except if the IP is
+- * also a SNAT IP. Those are dropped later, in stage
+- * "lr_in_arp_resolve", if unSNAT was unsuccessful.
+ /* Learn MAC bindings from ARP/IPv6 ND.
-+ *
+ *
+- * Priority 60.
+- */
+- build_lrouter_drop_own_dest(op, S_ROUTER_IN_IP_INPUT, 60, false,
+- lflows);
+-
+- /* ARP / ND handling for external IP addresses.
+ * For ARP packets, table LOOKUP_NEIGHBOR does a lookup for the
+ * (arp.spa, arp.sha) in the mac binding table using the 'lookup_arp'
+ * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_RESULT bit.
@@ -12971,14 +14705,23 @@ index 5a3227568..e78a71728 100644
+ * "lookup_arp_ip" action for ARP request packets, and stores the
+ * result in REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit; or set that bit
+ * to "1" directly for ARP response packets.
-+ *
+ *
+- * DNAT and SNAT IP addresses are external IP addresses that need ARP
+- * handling.
+ * For IPv6 ND NA packets, table LOOKUP_NEIGHBOR does a lookup
+ * for the (nd.target, nd.tll) in the mac binding table using the
+ * 'lookup_nd' action and stores the result in
+ * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. If
+ * "always_learn_from_arp_request" is set to false,
+ * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit is set.
-+ *
+ *
+- * These are already taken care globally, per router. The only
+- * exception is on the l3dgw_port where we might need to use a
+- * different ETH address.
+- */
+- if (op != op->od->l3dgw_port) {
+- continue;
+- }
+ * For IPv6 ND NS packets, table LOOKUP_NEIGHBOR does a lookup
+ * for the (ip6.src, nd.sll) in the mac binding table using the
+ * 'lookup_nd' action and stores the result in
@@ -12995,29 +14738,8 @@ index 5a3227568..e78a71728 100644
+ *
+ * */
-- if (op->od->l3dgw_port && op->od->l3redirect_port && op->peer
-- && op->peer->od->n_localnet_ports) {
-- bool add_chassis_resident_check = false;
-- if (op == op->od->l3dgw_port) {
-- /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
-- * should only be sent from the gateway chassis, so that
-- * upstream MAC learning points to the gateway chassis.
-- * Also need to avoid generation of multiple ARP responses
-- * from different chassis. */
-- add_chassis_resident_check = true;
-- } else {
-- /* Check if the option 'reside-on-redirect-chassis'
-- * is set to true on the router port. If set to true
-- * and if peer's logical switch has a localnet port, it
-- * means the router pipeline for the packets from
-- * peer's logical switch is be run on the chassis
-- * hosting the gateway port and it should reply to the
-- * ARP requests for the router port IPs.
-- */
-- add_chassis_resident_check = smap_get_bool(
-- &op->nbrp->options,
-- "reside-on-redirect-chassis", false);
-- }
+- for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
+- struct ovn_nat *nat_entry = &op->od->nat_entries[i];
+ /* Flows for LOOKUP_NEIGHBOR. */
+ bool learn_from_arp_request = smap_get_bool(&od->nbr->options,
+ "always_learn_from_arp_request", true);
@@ -13029,10 +14751,9 @@ index 5a3227568..e78a71728 100644
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100,
+ "arp.op == 2", ds_cstr(actions));
-- if (add_chassis_resident_check) {
-- ds_put_format(&match, " && is_chassis_resident(%s)",
-- op->od->l3redirect_port->json_key);
-- }
+- /* Skip entries we failed to parse. */
+- if (!nat_entry_is_valid(nat_entry)) {
+- continue;
- }
+ ds_clear(actions);
+ ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
@@ -13042,11 +14763,30 @@ index 5a3227568..e78a71728 100644
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_na",
+ ds_cstr(actions));
-- build_lrouter_arp_flow(op->od, op,
-- op->lrp_networks.ipv4_addrs[i].addr_s,
-- REG_INPORT_ETH_ADDR, &match, false, 90,
-- &op->nbrp->header_, lflows);
+- /* Skip SNAT entries for now, we handle unique SNAT IPs separately
+- * below.
+- */
+- if (!strcmp(nat_entry->nb->type, "snat")) {
+- continue;
+- }
+- build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows);
+- }
+-
+- /* Now handle SNAT entries too, one per unique SNAT IP. */
+- struct shash_node *snat_snode;
+- SHASH_FOR_EACH (snat_snode, &op->od->snat_ips) {
+- struct ovn_snat_ip *snat_ip = snat_snode->data;
+-
+- if (ovs_list_is_empty(&snat_ip->snat_entries)) {
+- continue;
+- }
+-
+- struct ovn_nat *nat_entry =
+- CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
+- struct ovn_nat, ext_addr_list_node);
+- build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows);
- }
+- }
+ ds_clear(actions);
+ ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
+ " = lookup_nd(inport, ip6.src, nd.sll); %snext;",
@@ -13056,22 +14796,36 @@ index 5a3227568..e78a71728 100644
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_ns",
+ ds_cstr(actions));
-- /* A set to hold all load-balancer vips that need ARP responses. */
-- struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
-- struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
-- get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6);
+- /* NAT, Defrag and load balancing. */
+- HMAP_FOR_EACH (od, key_node, datapaths) {
+- if (!od->nbr) {
+- continue;
+- }
+ /* For other packet types, we can skip neighbor learning.
+ * So set REGBIT_LOOKUP_NEIGHBOR_RESULT to 1. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 0, "1",
+ REGBIT_LOOKUP_NEIGHBOR_RESULT" = 1; next;");
-- const char *ip_address;
-- SSET_FOR_EACH (ip_address, &all_ips_v4) {
-- ds_clear(&match);
-- if (op == op->od->l3dgw_port) {
-- ds_put_format(&match, "is_chassis_resident(%s)",
-- op->od->l3redirect_port->json_key);
-- }
+- /* Packets are allowed by default. */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;");
+-
+- /* Send the IPv6 NS packets to next table. When ovn-controller
+- * generates IPv6 NS (for the action - nd_ns{}), the injected
+- * packet would go through conntrack - which is not required. */
+- ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, "nd_ns", "next;");
+-
+- /* NAT rules are only valid on Gateway routers and routers with
+- * l3dgw_port (router has a port with gateway chassis
+- * specified). */
+- if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
+- continue;
+- }
+ /* Flows for LEARN_NEIGHBOR. */
+ /* Skip Neighbor learning if not required. */
+ ds_clear(match);
@@ -13081,52 +14835,34 @@ index 5a3227568..e78a71728 100644
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 100,
+ ds_cstr(match), "next;");
-- build_lrouter_arp_flow(op->od, op,
-- ip_address, REG_INPORT_ETH_ADDR,
-- &match, false, 90, NULL, lflows);
-- }
+- struct sset nat_entries = SSET_INITIALIZER(&nat_entries);
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
+ "arp", "put_arp(inport, arp.spa, arp.sha); next;");
-- SSET_FOR_EACH (ip_address, &all_ips_v6) {
-- ds_clear(&match);
-- if (op == op->od->l3dgw_port) {
-- ds_put_format(&match, "is_chassis_resident(%s)",
-- op->od->l3redirect_port->json_key);
-- }
+- bool dnat_force_snat_ip =
+- !lport_addresses_is_empty(&od->dnat_force_snat_addrs);
+- bool lb_force_snat_ip =
+- !lport_addresses_is_empty(&od->lb_force_snat_addrs);
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
+ "nd_na", "put_nd(inport, nd.target, nd.tll); next;");
-- build_lrouter_nd_flow(op->od, op, "nd_na",
-- ip_address, NULL, REG_INPORT_ETH_ADDR,
-- &match, false, 90, NULL, lflows);
-- }
+- for (int i = 0; i < od->nbr->n_nat; i++) {
+- const struct nbrec_nat *nat;
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
+ "nd_ns", "put_nd(inport, ip6.src, nd.sll); next;");
+ }
-- sset_destroy(&all_ips_v4);
-- sset_destroy(&all_ips_v6);
+- nat = od->nbr->nat[i];
+}
-- if (!smap_get(&op->od->nbr->options, "chassis")
-- && !op->od->l3dgw_port) {
-- /* UDP/TCP port unreachable. */
-- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-- ds_clear(&match);
-- ds_put_format(&match,
-- "ip4 && ip4.dst == %s && !ip.later_frag && udp",
-- op->lrp_networks.ipv4_addrs[i].addr_s);
-- const char *action = "icmp4 {"
-- "eth.dst <-> eth.src; "
-- "ip4.dst <-> ip4.src; "
-- "ip.ttl = 255; "
-- "icmp4.type = 3; "
-- "icmp4.code = 3; "
-- "next; };";
-- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
-- 80, ds_cstr(&match), action,
-- &op->nbrp->header_);
+- ovs_be32 ip, mask;
+- struct in6_addr ipv6, mask_v6, v6_exact = IN6ADDR_EXACT_INIT;
+- bool is_v6 = false;
+- bool stateless = lrouter_nat_is_stateless(nat);
+- struct nbrec_address_set *allowed_ext_ips =
+- nat->allowed_ext_ips;
+- struct nbrec_address_set *exempted_ext_ips =
+- nat->exempted_ext_ips;
+/* Logical router ingress Table 1: Neighbor lookup lflows
+ * for logical router ports. */
+static void
@@ -13136,27 +14872,32 @@ index 5a3227568..e78a71728 100644
+{
+ if (op->nbrp) {
-- ds_clear(&match);
-- ds_put_format(&match,
-- "ip4 && ip4.dst == %s && !ip.later_frag && tcp",
-- op->lrp_networks.ipv4_addrs[i].addr_s);
-- action = "tcp_reset {"
-- "eth.dst <-> eth.src; "
-- "ip4.dst <-> ip4.src; "
-- "next; };";
-- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
-- 80, ds_cstr(&match), action,
-- &op->nbrp->header_);
+- if (allowed_ext_ips && exempted_ext_ips) {
+- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+- VLOG_WARN_RL(&rl, "NAT rule: "UUID_FMT" not applied, since "
+- "both allowed and exempt external ips set",
+- UUID_ARGS(&(nat->header_.uuid)));
+- continue;
+- }
+ bool learn_from_arp_request = smap_get_bool(&op->od->nbr->options,
+ "always_learn_from_arp_request", true);
-- ds_clear(&match);
-- ds_put_format(&match,
-- "ip4 && ip4.dst == %s && !ip.later_frag",
-+ /* Check if we need to learn mac-binding from ARP requests. */
-+ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-+ if (!learn_from_arp_request) {
-+ /* ARP request to this address should always get learned,
+- char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
+- if (error || mask != OVS_BE32_MAX) {
+- free(error);
+- error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6);
+- if (error || memcmp(&mask_v6, &v6_exact, sizeof(mask_v6))) {
+- /* Invalid for both IPv4 and IPv6 */
+- static struct vlog_rate_limit rl =
+- VLOG_RATE_LIMIT_INIT(5, 1);
+- VLOG_WARN_RL(&rl, "bad external ip %s for nat",
+- nat->external_ip);
+- free(error);
+- continue;
++ /* Check if we need to learn mac-binding from ARP requests. */
++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
++ if (!learn_from_arp_request) {
++ /* ARP request to this address should always get learned,
+ * so add a priority-110 flow to set
+ * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT to 1. */
+ ds_clear(match);
@@ -13166,21 +14907,27 @@ index 5a3227568..e78a71728 100644
+ op->json_key,
+ op->lrp_networks.ipv4_addrs[i].network_s,
+ op->lrp_networks.ipv4_addrs[i].plen,
- op->lrp_networks.ipv4_addrs[i].addr_s);
-- action = "icmp4 {"
-- "eth.dst <-> eth.src; "
-- "ip4.dst <-> ip4.src; "
-- "ip.ttl = 255; "
-- "icmp4.type = 3; "
-- "icmp4.code = 2; "
-- "next; };";
-- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
-- 70, ds_cstr(&match), action,
++ op->lrp_networks.ipv4_addrs[i].addr_s);
+ if (op->od->l3dgw_port && op == op->od->l3dgw_port
+ && op->od->l3redirect_port) {
+ ds_put_format(match, " && is_chassis_resident(%s)",
+ op->od->l3redirect_port->json_key);
-+ }
+ }
+- /* It was an invalid IPv4 address, but valid IPv6.
+- * Treat the rest of the handling of this NAT rule
+- * as IPv6. */
+- is_v6 = true;
+- }
+-
+- /* Check the validity of nat->logical_ip. 'logical_ip' can
+- * be a subnet when the type is "snat". */
+- int cidr_bits;
+- if (is_v6) {
+- error = ipv6_parse_masked(nat->logical_ip, &ipv6, &mask_v6);
+- cidr_bits = ipv6_count_cidr_bits(&mask_v6);
+- } else {
+- error = ip_parse_masked(nat->logical_ip, &ip, &mask);
+- cidr_bits = ip_count_cidr_bits(mask);
+ const char *actions_s = REGBIT_LOOKUP_NEIGHBOR_RESULT
+ " = lookup_arp(inport, arp.spa, arp.sha); "
+ REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1;"
@@ -13188,8 +14935,31 @@ index 5a3227568..e78a71728 100644
+ ovn_lflow_add_with_hint(lflows, op->od,
+ S_ROUTER_IN_LOOKUP_NEIGHBOR, 110,
+ ds_cstr(match), actions_s,
- &op->nbrp->header_);
++ &op->nbrp->header_);
}
+- if (!strcmp(nat->type, "snat")) {
+- if (error) {
+- /* Invalid for both IPv4 and IPv6 */
+- static struct vlog_rate_limit rl =
+- VLOG_RATE_LIMIT_INIT(5, 1);
+- VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
+- "in router "UUID_FMT"",
+- nat->logical_ip, UUID_ARGS(&od->key));
+- free(error);
+- continue;
+- }
+- } else {
+- if (error || (!is_v6 && mask != OVS_BE32_MAX)
+- || (is_v6 && memcmp(&mask_v6, &v6_exact,
+- sizeof mask_v6))) {
+- /* Invalid for both IPv4 and IPv6 */
+- static struct vlog_rate_limit rl =
+- VLOG_RATE_LIMIT_INIT(5, 1);
+- VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
+- ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
+- free(error);
+- continue;
+- }
+ ds_clear(match);
+ ds_put_format(match,
+ "inport == %s && arp.spa == %s/%u && arp.op == 1",
@@ -13200,7 +14970,7 @@ index 5a3227568..e78a71728 100644
+ && op->od->l3redirect_port) {
+ ds_put_format(match, " && is_chassis_resident(%s)",
+ op->od->l3redirect_port->json_key);
-+ }
+ }
+ ds_clear(actions);
+ ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
+ " = lookup_arp(inport, arp.spa, arp.sha); %snext;",
@@ -13211,18 +14981,26 @@ index 5a3227568..e78a71728 100644
+ S_ROUTER_IN_LOOKUP_NEIGHBOR, 100,
+ ds_cstr(match), ds_cstr(actions),
+ &op->nbrp->header_);
- }
++ }
+ }
+}
-- /* Drop IP traffic destined to router owned IPs except if the IP is
-- * also a SNAT IP. Those are dropped later, in stage
-- * "lr_in_arp_resolve", if unSNAT was unsuccessful.
-- *
-- * Priority 60.
-- */
-- build_lrouter_drop_own_dest(op, S_ROUTER_IN_IP_INPUT, 60, false,
-- lflows);
+- /* For distributed router NAT, determine whether this NAT rule
+- * satisfies the conditions for distributed NAT processing. */
+- bool distributed = false;
+- struct eth_addr mac;
+- if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
+- nat->logical_port && nat->external_mac) {
+- if (eth_addr_from_string(nat->external_mac, &mac)) {
+- distributed = true;
+- } else {
+- static struct vlog_rate_limit rl =
+- VLOG_RATE_LIMIT_INIT(5, 1);
+- VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
+- ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
+- continue;
+- }
+- }
+/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: IPv6 Router
+ * Adv (RA) options and response. */
+static void
@@ -13234,31 +15012,57 @@ index 5a3227568..e78a71728 100644
+ return;
+ }
-- /* ARP / ND handling for external IP addresses.
-- *
-- * DNAT and SNAT IP addresses are external IP addresses that need ARP
-- * handling.
-- *
-- * These are already taken care globally, per router. The only
-- * exception is on the l3dgw_port where we might need to use a
-- * different ETH address.
-- */
-- if (op != op->od->l3dgw_port) {
-- continue;
-- }
+- /* Ingress UNSNAT table: It is for already established connections'
+- * reverse traffic. i.e., SNAT has already been done in egress
+- * pipeline and now the packet has entered the ingress pipeline as
+- * part of a reply. We undo the SNAT here.
+- *
+- * Undoing SNAT has to happen before DNAT processing. This is
+- * because when the packet was DNATed in ingress pipeline, it did
+- * not know about the possibility of eventual additional SNAT in
+- * egress pipeline. */
+- if (!strcmp(nat->type, "snat")
+- || !strcmp(nat->type, "dnat_and_snat")) {
+- if (!od->l3dgw_port) {
+- /* Gateway router. */
+- ds_clear(&match);
+- ds_clear(&actions);
+- ds_put_format(&match, "ip && ip%s.dst == %s",
+- is_v6 ? "6" : "4",
+- nat->external_ip);
+- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+- ds_put_format(&actions, "ip%s.dst=%s; next;",
+- is_v6 ? "6" : "4", nat->logical_ip);
+- } else {
+- ds_put_cstr(&actions, "ct_snat;");
+- }
+ if (!op->lrp_networks.n_ipv6_addrs) {
+ return;
+ }
-- for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
-- struct ovn_nat *nat_entry = &op->od->nat_entries[i];
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT,
+- 90, ds_cstr(&match),
+- ds_cstr(&actions),
+- &nat->header_);
+- } else {
+- /* Distributed router. */
+ struct smap options;
+ smap_clone(&options, &op->sb->options);
-- /* Skip entries we failed to parse. */
-- if (!nat_entry_is_valid(nat_entry)) {
-- continue;
-- }
+- /* Traffic received on l3dgw_port is subject to NAT. */
+- ds_clear(&match);
+- ds_clear(&actions);
+- ds_put_format(&match, "ip && ip%s.dst == %s"
+- " && inport == %s",
+- is_v6 ? "6" : "4",
+- nat->external_ip,
+- od->l3dgw_port->json_key);
+- if (!distributed && od->l3redirect_port) {
+- /* Flows for NAT rules that are centralized are only
+- * programmed on the gateway chassis. */
+- ds_put_format(&match, " && is_chassis_resident(%s)",
+- od->l3redirect_port->json_key);
+- }
+ /* enable IPv6 prefix delegation */
+ bool prefix_delegation = smap_get_bool(&op->nbrp->options,
+ "prefix_delegation", false);
@@ -13268,14 +15072,12 @@ index 5a3227568..e78a71728 100644
+ smap_add(&options, "ipv6_prefix_delegation",
+ prefix_delegation ? "true" : "false");
-- /* Skip SNAT entries for now, we handle unique SNAT IPs separately
-- * below.
-- */
-- if (!strcmp(nat_entry->nb->type, "snat")) {
-- continue;
-- }
-- build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows);
-- }
+- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+- ds_put_format(&actions, "ip%s.dst=%s; next;",
+- is_v6 ? "6" : "4", nat->logical_ip);
+- } else {
+- ds_put_cstr(&actions, "ct_snat;");
+- }
+ bool ipv6_prefix = smap_get_bool(&op->nbrp->options,
+ "prefix", false);
+ if (!lrport_is_enabled(op->nbrp)) {
@@ -13285,23 +15087,43 @@ index 5a3227568..e78a71728 100644
+ ipv6_prefix ? "true" : "false");
+ sbrec_port_binding_set_options(op->sb, &options);
-- /* Now handle SNAT entries too, one per unique SNAT IP. */
-- struct shash_node *snat_snode;
-- SHASH_FOR_EACH (snat_snode, &op->od->snat_ips) {
-- struct ovn_snat_ip *snat_ip = snat_snode->data;
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT,
+- 100,
+- ds_cstr(&match), ds_cstr(&actions),
+- &nat->header_);
+- }
+- }
+ smap_destroy(&options);
-- if (ovs_list_is_empty(&snat_ip->snat_entries)) {
-- continue;
-- }
+- /* Ingress DNAT table: Packets enter the pipeline with destination
+- * IP address that needs to be DNATted from a external IP address
+- * to a logical IP address. */
+- if (!strcmp(nat->type, "dnat")
+- || !strcmp(nat->type, "dnat_and_snat")) {
+- if (!od->l3dgw_port) {
+- /* Gateway router. */
+- /* Packet when it goes from the initiator to destination.
+- * We need to set flags.loopback because the router can
+- * send the packet back through the same interface. */
+- ds_clear(&match);
+- ds_put_format(&match, "ip && ip%s.dst == %s",
+- is_v6 ? "6" : "4",
+- nat->external_ip);
+- ds_clear(&actions);
+- if (allowed_ext_ips || exempted_ext_ips) {
+- lrouter_nat_add_ext_ip_match(od, lflows, &match, nat,
+- is_v6, true, mask);
+- }
+ const char *address_mode = smap_get(
+ &op->nbrp->ipv6_ra_configs, "address_mode");
-- struct ovn_nat *nat_entry =
-- CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
-- struct ovn_nat, ext_addr_list_node);
-- build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows);
-- }
+- if (dnat_force_snat_ip) {
+- /* Indicate to the future tables that a DNAT has taken
+- * place and a force SNAT needs to be done in the
+- * Egress SNAT table. */
+- ds_put_format(&actions,
+- "flags.force_snat_for_dnat = 1; ");
+- }
+ if (!address_mode) {
+ return;
+ }
@@ -13312,339 +15134,38 @@ index 5a3227568..e78a71728 100644
+ VLOG_WARN_RL(&rl, "Invalid address mode [%s] defined",
+ address_mode);
+ return;
- }
++ }
-- /* NAT, Defrag and load balancing. */
-- HMAP_FOR_EACH (od, key_node, datapaths) {
-- if (!od->nbr) {
-- continue;
-- }
--
-- /* Packets are allowed by default. */
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
-- ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
-- ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
-- ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;");
--
-- /* Send the IPv6 NS packets to next table. When ovn-controller
-- * generates IPv6 NS (for the action - nd_ns{}), the injected
-- * packet would go through conntrack - which is not required. */
-- ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, "nd_ns", "next;");
--
-- /* NAT rules are only valid on Gateway routers and routers with
-- * l3dgw_port (router has a port with gateway chassis
-- * specified). */
-- if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
-- continue;
-- }
+- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+- ds_put_format(&actions, "flags.loopback = 1; "
+- "ip%s.dst=%s; next;",
+- is_v6 ? "6" : "4", nat->logical_ip);
+- } else {
+- ds_put_format(&actions, "flags.loopback = 1; "
+- "ct_dnat(%s", nat->logical_ip);
+ if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic",
+ false)) {
+ copy_ra_to_sb(op, address_mode);
+ }
-- struct sset nat_entries = SSET_INITIALIZER(&nat_entries);
-+ ds_clear(match);
-+ ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
-+ op->json_key);
-+ ds_clear(actions);
-
-- bool dnat_force_snat_ip =
-- !lport_addresses_is_empty(&od->dnat_force_snat_addrs);
-- bool lb_force_snat_ip =
-- !lport_addresses_is_empty(&od->lb_force_snat_addrs);
-+ const char *mtu_s = smap_get(
-+ &op->nbrp->ipv6_ra_configs, "mtu");
-
-- for (int i = 0; i < od->nbr->n_nat; i++) {
-- const struct nbrec_nat *nat;
-+ /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
-+ uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0;
-
-- nat = od->nbr->nat[i];
-+ ds_put_format(actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts("
-+ "addr_mode = \"%s\", slla = %s",
-+ address_mode, op->lrp_networks.ea_s);
-+ if (mtu > 0) {
-+ ds_put_format(actions, ", mtu = %u", mtu);
-+ }
-
-- ovs_be32 ip, mask;
-- struct in6_addr ipv6, mask_v6, v6_exact = IN6ADDR_EXACT_INIT;
-- bool is_v6 = false;
-- bool stateless = lrouter_nat_is_stateless(nat);
-- struct nbrec_address_set *allowed_ext_ips =
-- nat->allowed_ext_ips;
-- struct nbrec_address_set *exempted_ext_ips =
-- nat->exempted_ext_ips;
-+ const char *prf = smap_get_def(
-+ &op->nbrp->ipv6_ra_configs, "router_preference", "MEDIUM");
-+ if (strcmp(prf, "MEDIUM")) {
-+ ds_put_format(actions, ", router_preference = \"%s\"", prf);
-+ }
-
-- if (allowed_ext_ips && exempted_ext_ips) {
-- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
-- VLOG_WARN_RL(&rl, "NAT rule: "UUID_FMT" not applied, since "
-- "both allowed and exempt external ips set",
-- UUID_ARGS(&(nat->header_.uuid)));
-- continue;
-- }
-+ bool add_rs_response_flow = false;
-
-- char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
-- if (error || mask != OVS_BE32_MAX) {
-- free(error);
-- error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6);
-- if (error || memcmp(&mask_v6, &v6_exact, sizeof(mask_v6))) {
-- /* Invalid for both IPv4 and IPv6 */
-- static struct vlog_rate_limit rl =
-- VLOG_RATE_LIMIT_INIT(5, 1);
-- VLOG_WARN_RL(&rl, "bad external ip %s for nat",
-- nat->external_ip);
-- free(error);
-- continue;
-- }
-- /* It was an invalid IPv4 address, but valid IPv6.
-- * Treat the rest of the handling of this NAT rule
-- * as IPv6. */
-- is_v6 = true;
-- }
--
-- /* Check the validity of nat->logical_ip. 'logical_ip' can
-- * be a subnet when the type is "snat". */
-- int cidr_bits;
-- if (is_v6) {
-- error = ipv6_parse_masked(nat->logical_ip, &ipv6, &mask_v6);
-- cidr_bits = ipv6_count_cidr_bits(&mask_v6);
-- } else {
-- error = ip_parse_masked(nat->logical_ip, &ip, &mask);
-- cidr_bits = ip_count_cidr_bits(mask);
-- }
-- if (!strcmp(nat->type, "snat")) {
-- if (error) {
-- /* Invalid for both IPv4 and IPv6 */
-- static struct vlog_rate_limit rl =
-- VLOG_RATE_LIMIT_INIT(5, 1);
-- VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
-- "in router "UUID_FMT"",
-- nat->logical_ip, UUID_ARGS(&od->key));
-- free(error);
-- continue;
-- }
-- } else {
-- if (error || (!is_v6 && mask != OVS_BE32_MAX)
-- || (is_v6 && memcmp(&mask_v6, &v6_exact,
-- sizeof mask_v6))) {
-- /* Invalid for both IPv4 and IPv6 */
-- static struct vlog_rate_limit rl =
-- VLOG_RATE_LIMIT_INIT(5, 1);
-- VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
-- ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
-- free(error);
-- continue;
-- }
-- }
-+ for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
-+ if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
-+ continue;
-+ }
-
-- /* For distributed router NAT, determine whether this NAT rule
-- * satisfies the conditions for distributed NAT processing. */
-- bool distributed = false;
-- struct eth_addr mac;
-- if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
-- nat->logical_port && nat->external_mac) {
-- if (eth_addr_from_string(nat->external_mac, &mac)) {
-- distributed = true;
-- } else {
-- static struct vlog_rate_limit rl =
-- VLOG_RATE_LIMIT_INIT(5, 1);
-- VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
-- ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
-- continue;
-- }
-- }
-+ ds_put_format(actions, ", prefix = %s/%u",
-+ op->lrp_networks.ipv6_addrs[i].network_s,
-+ op->lrp_networks.ipv6_addrs[i].plen);
-
-- /* Ingress UNSNAT table: It is for already established connections'
-- * reverse traffic. i.e., SNAT has already been done in egress
-- * pipeline and now the packet has entered the ingress pipeline as
-- * part of a reply. We undo the SNAT here.
-- *
-- * Undoing SNAT has to happen before DNAT processing. This is
-- * because when the packet was DNATed in ingress pipeline, it did
-- * not know about the possibility of eventual additional SNAT in
-- * egress pipeline. */
-- if (!strcmp(nat->type, "snat")
-- || !strcmp(nat->type, "dnat_and_snat")) {
-- if (!od->l3dgw_port) {
-- /* Gateway router. */
-- ds_clear(&match);
-- ds_clear(&actions);
-- ds_put_format(&match, "ip && ip%s.dst == %s",
-- is_v6 ? "6" : "4",
-- nat->external_ip);
-- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
-- ds_put_format(&actions, "ip%s.dst=%s; next;",
-- is_v6 ? "6" : "4", nat->logical_ip);
-- } else {
-- ds_put_cstr(&actions, "ct_snat;");
-- }
--
-- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT,
-- 90, ds_cstr(&match),
-- ds_cstr(&actions),
-- &nat->header_);
-- } else {
-- /* Distributed router. */
--
-- /* Traffic received on l3dgw_port is subject to NAT. */
-- ds_clear(&match);
-- ds_clear(&actions);
-- ds_put_format(&match, "ip && ip%s.dst == %s"
-- " && inport == %s",
-- is_v6 ? "6" : "4",
-- nat->external_ip,
-- od->l3dgw_port->json_key);
-- if (!distributed && od->l3redirect_port) {
-- /* Flows for NAT rules that are centralized are only
-- * programmed on the gateway chassis. */
-- ds_put_format(&match, " && is_chassis_resident(%s)",
-- od->l3redirect_port->json_key);
-- }
--
-- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
-- ds_put_format(&actions, "ip%s.dst=%s; next;",
-- is_v6 ? "6" : "4", nat->logical_ip);
-- } else {
-- ds_put_cstr(&actions, "ct_snat;");
-- }
--
-- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT,
-- 100,
-- ds_cstr(&match), ds_cstr(&actions),
-- &nat->header_);
-- }
-- }
-+ add_rs_response_flow = true;
-+ }
-
-- /* Ingress DNAT table: Packets enter the pipeline with destination
-- * IP address that needs to be DNATted from a external IP address
-- * to a logical IP address. */
-- if (!strcmp(nat->type, "dnat")
-- || !strcmp(nat->type, "dnat_and_snat")) {
-- if (!od->l3dgw_port) {
-- /* Gateway router. */
-- /* Packet when it goes from the initiator to destination.
-- * We need to set flags.loopback because the router can
-- * send the packet back through the same interface. */
-- ds_clear(&match);
-- ds_put_format(&match, "ip && ip%s.dst == %s",
-- is_v6 ? "6" : "4",
-- nat->external_ip);
-- ds_clear(&actions);
-- if (allowed_ext_ips || exempted_ext_ips) {
-- lrouter_nat_add_ext_ip_match(od, lflows, &match, nat,
-- is_v6, true, mask);
-- }
-+ if (add_rs_response_flow) {
-+ ds_put_cstr(actions, "); next;");
-+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS,
-+ 50, ds_cstr(match), ds_cstr(actions),
-+ &op->nbrp->header_);
-+ ds_clear(actions);
-+ ds_clear(match);
-+ ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && "
-+ "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key);
-
-- if (dnat_force_snat_ip) {
-- /* Indicate to the future tables that a DNAT has taken
-- * place and a force SNAT needs to be done in the
-- * Egress SNAT table. */
-- ds_put_format(&actions,
-- "flags.force_snat_for_dnat = 1; ");
-- }
-+ char ip6_str[INET6_ADDRSTRLEN + 1];
-+ struct in6_addr lla;
-+ in6_generate_lla(op->lrp_networks.ea, &lla);
-+ memset(ip6_str, 0, sizeof(ip6_str));
-+ ipv6_string_mapped(ip6_str, &lla);
-+ ds_put_format(actions, "eth.dst = eth.src; eth.src = %s; "
-+ "ip6.dst = ip6.src; ip6.src = %s; "
-+ "outport = inport; flags.loopback = 1; "
-+ "output;",
-+ op->lrp_networks.ea_s, ip6_str);
-+ ovn_lflow_add_with_hint(lflows, op->od,
-+ S_ROUTER_IN_ND_RA_RESPONSE, 50,
-+ ds_cstr(match), ds_cstr(actions),
-+ &op->nbrp->header_);
-+ }
-+}
-
-- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
-- ds_put_format(&actions, "flags.loopback = 1; "
-- "ip%s.dst=%s; next;",
-- is_v6 ? "6" : "4", nat->logical_ip);
-- } else {
-- ds_put_format(&actions, "flags.loopback = 1; "
-- "ct_dnat(%s", nat->logical_ip);
-+/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: RS
-+ * responder, by default goto next. (priority 0). */
-+static void
-+build_ND_RA_flows_for_lrouter(struct ovn_datapath *od, struct hmap *lflows)
-+{
-+ if (od->nbr) {
-+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;");
-+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;");
-+ }
-+}
-
- if (nat->external_port_range[0]) {
- ds_put_format(&actions, ",%s",
- nat->external_port_range);
- }
- ds_put_format(&actions, ");");
- }
-+/* Logical router ingress table IP_ROUTING : IP Routing.
-+ *
-+ * A packet that arrives at this table is an IP packet that should be
-+ * routed to the address in 'ip[46].dst'.
-+ *
-+ * For regular routes without ECMP, table IP_ROUTING sets outport to the
-+ * correct output port, eth.src to the output port's MAC address, and
-+ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
-+ * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
-+ * advances to the next table.
-+ *
-+ * For ECMP routes, i.e. multiple routes with same policy and prefix, table
-+ * IP_ROUTING remembers ECMP group id and selects a member id, and advances
-+ * to table IP_ROUTING_ECMP, which sets outport, eth.src and
-+ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 for the selected ECMP member.
-+ */
-+static void
-+build_ip_routing_flows_for_lrouter_port(
-+ struct ovn_port *op, struct hmap *lflows)
-+{
-+ if (op->nbrp) {
++ ds_clear(match);
++ ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
++ op->json_key);
++ ds_clear(actions);
- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100,
- ds_cstr(&match), ds_cstr(&actions),
- &nat->header_);
- } else {
- /* Distributed router. */
-+ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-+ add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
-+ op->lrp_networks.ipv4_addrs[i].network_s,
-+ op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
-+ &op->nbrp->header_);
-+ }
++ const char *mtu_s = smap_get(
++ &op->nbrp->ipv6_ra_configs, "mtu");
- /* Traffic received on l3dgw_port is subject to NAT. */
- ds_clear(&match);
@@ -13664,14 +15185,8 @@ index 5a3227568..e78a71728 100644
- lrouter_nat_add_ext_ip_match(od, lflows, &match, nat,
- is_v6, true, mask);
- }
-+ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
-+ add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
-+ op->lrp_networks.ipv6_addrs[i].network_s,
-+ op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
-+ &op->nbrp->header_);
-+ }
-+ }
-+}
++ /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
++ uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0;
- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
- ds_put_format(&actions, "ip%s.dst=%s; next;",
@@ -13684,31 +15199,24 @@ index 5a3227568..e78a71728 100644
- }
- ds_put_format(&actions, ");");
- }
-+static void
-+build_static_route_flows_for_lrouter(
-+ struct ovn_datapath *od, struct hmap *lflows,
-+ struct hmap *ports, struct hmap *bfd_connections)
-+{
-+ if (od->nbr) {
-+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, 150,
-+ REG_ECMP_GROUP_ID" == 0", "next;");
++ ds_put_format(actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts("
++ "addr_mode = \"%s\", slla = %s",
++ address_mode, op->lrp_networks.ea_s);
++ if (mtu > 0) {
++ ds_put_format(actions, ", mtu = %u", mtu);
++ }
- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100,
- ds_cstr(&match), ds_cstr(&actions),
- &nat->header_);
- }
-+ struct hmap ecmp_groups = HMAP_INITIALIZER(&ecmp_groups);
-+ struct hmap unique_routes = HMAP_INITIALIZER(&unique_routes);
-+ struct ovs_list parsed_routes = OVS_LIST_INITIALIZER(&parsed_routes);
-+ struct ecmp_groups_node *group;
-+ for (int i = 0; i < od->nbr->n_static_routes; i++) {
-+ struct parsed_route *route =
-+ parsed_routes_add(&parsed_routes, od->nbr->static_routes[i],
-+ bfd_connections);
-+ if (!route) {
-+ continue;
- }
--
+- }
++ const char *prf = smap_get_def(
++ &op->nbrp->ipv6_ra_configs, "router_preference", "MEDIUM");
++ if (strcmp(prf, "MEDIUM")) {
++ ds_put_format(actions, ", router_preference = \"%s\"", prf);
++ }
+
- /* ARP resolve for NAT IPs. */
- if (od->l3dgw_port) {
- if (!strcmp(nat->type, "snat")) {
@@ -13721,7 +15229,8 @@ index 5a3227568..e78a71728 100644
- 120, ds_cstr(&match), "next;",
- &nat->header_);
- }
--
++ bool add_rs_response_flow = false;
+
- if (!sset_contains(&nat_entries, nat->external_ip)) {
- ds_clear(&match);
- ds_put_format(
@@ -13741,16 +15250,17 @@ index 5a3227568..e78a71728 100644
- &nat->header_);
- sset_add(&nat_entries, nat->external_ip);
- }
-+ group = ecmp_groups_find(&ecmp_groups, route);
-+ if (group) {
-+ ecmp_groups_add_route(group, route);
- } else {
+- } else {
- /* Add the NAT external_ip to the nat_entries even for
- * gateway routers. This is required for adding load balancer
- * flows.*/
- sset_add(&nat_entries, nat->external_ip);
- }
--
++ for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
++ if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
++ continue;
++ }
+
- /* Egress UNDNAT table: It is for already established connections'
- * reverse traffic. i.e., DNAT has already been done in ingress
- * pipeline and now the packet has entered the egress pipeline as
@@ -13870,14 +15380,11 @@ index 5a3227568..e78a71728 100644
- nat->external_port_range);
- }
- ds_put_format(&actions, ");");
-+ const struct parsed_route *existed_route =
-+ unique_routes_remove(&unique_routes, route);
-+ if (existed_route) {
-+ group = ecmp_groups_add(&ecmp_groups, existed_route);
-+ if (group) {
-+ ecmp_groups_add_route(group, route);
- }
--
+- }
++ ds_put_format(actions, ", prefix = %s/%u",
++ op->lrp_networks.ipv6_addrs[i].network_s,
++ op->lrp_networks.ipv6_addrs[i].plen);
+
- /* The priority here is calculated such that the
- * nat->logical_ip with the longest mask gets a higher
- * priority. */
@@ -13887,7 +15394,9 @@ index 5a3227568..e78a71728 100644
- &nat->header_);
- }
- }
--
++ add_rs_response_flow = true;
++ }
+
- /* Logical router ingress table 0:
- * For NAT on a distributed router, add rules allowing
- * ingress traffic with eth.dst matching nat->external_mac
@@ -13901,7 +15410,16 @@ index 5a3227568..e78a71728 100644
- ds_clear(&actions);
- ds_put_format(&actions, REG_INPORT_ETH_ADDR " = %s; next;",
- od->l3dgw_port->lrp_networks.ea_s);
--
++ if (add_rs_response_flow) {
++ ds_put_cstr(actions, "); next;");
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS,
++ 50, ds_cstr(match), ds_cstr(actions),
++ &op->nbrp->header_);
++ ds_clear(actions);
++ ds_clear(match);
++ ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && "
++ "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key);
+
- ds_clear(&match);
- ds_put_format(&match,
- "eth.dst == "ETH_ADDR_FMT" && inport == %s"
@@ -13913,7 +15431,23 @@ index 5a3227568..e78a71728 100644
- ds_cstr(&match), ds_cstr(&actions),
- &nat->header_);
- }
--
++ char ip6_str[INET6_ADDRSTRLEN + 1];
++ struct in6_addr lla;
++ in6_generate_lla(op->lrp_networks.ea, &lla);
++ memset(ip6_str, 0, sizeof(ip6_str));
++ ipv6_string_mapped(ip6_str, &lla);
++ ds_put_format(actions, "eth.dst = eth.src; eth.src = %s; "
++ "ip6.dst = ip6.src; ip6.src = %s; "
++ "outport = inport; flags.loopback = 1; "
++ "output;",
++ op->lrp_networks.ea_s, ip6_str);
++ ovn_lflow_add_with_hint(lflows, op->od,
++ S_ROUTER_IN_ND_RA_RESPONSE, 50,
++ ds_cstr(match), ds_cstr(actions),
++ &op->nbrp->header_);
++ }
++}
+
- /* Ingress Gateway Redirect Table: For NAT on a distributed
- * router, add flows that are specific to a NAT rule. These
- * flows indicate the presence of an applicable NAT rule that
@@ -13939,7 +15473,17 @@ index 5a3227568..e78a71728 100644
- 100, ds_cstr(&match),
- ds_cstr(&actions), &nat->header_);
- }
--
++/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: RS
++ * responder, by default goto next. (priority 0). */
++static void
++build_ND_RA_flows_for_lrouter(struct ovn_datapath *od, struct hmap *lflows)
++{
++ if (od->nbr) {
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;");
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;");
++ }
++}
+
- /* Egress Loopback table: For NAT on a distributed router.
- * If packets in the egress pipeline on the distributed
- * gateway port have ip.dst matching a NAT external IP, then
@@ -13955,11 +15499,32 @@ index 5a3227568..e78a71728 100644
- if (!distributed) {
- ds_put_format(&match, " && is_chassis_resident(%s)",
- od->l3redirect_port->json_key);
- } else {
+- } else {
- ds_put_format(&match, " && is_chassis_resident(\"%s\")",
- nat->logical_port);
- }
--
++/* Logical router ingress table IP_ROUTING : IP Routing.
++ *
++ * A packet that arrives at this table is an IP packet that should be
++ * routed to the address in 'ip[46].dst'.
++ *
++ * For regular routes without ECMP, table IP_ROUTING sets outport to the
++ * correct output port, eth.src to the output port's MAC address, and
++ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
++ * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
++ * advances to the next table.
++ *
++ * For ECMP routes, i.e. multiple routes with same policy and prefix, table
++ * IP_ROUTING remembers ECMP group id and selects a member id, and advances
++ * to table IP_ROUTING_ECMP, which sets outport, eth.src and
++ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 for the selected ECMP member.
++ */
++static void
++build_ip_routing_flows_for_lrouter_port(
++ struct ovn_port *op, struct hmap *ports,struct hmap *lflows)
++{
++ if (op->nbrp) {
+
- ds_clear(&actions);
- ds_put_format(&actions,
- "clone { ct_clear; "
@@ -13975,8 +15540,13 @@ index 5a3227568..e78a71728 100644
- ds_cstr(&match), ds_cstr(&actions),
- &nat->header_);
- }
-- }
--
++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
++ add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
++ op->lrp_networks.ipv4_addrs[i].network_s,
++ op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
++ &op->nbrp->header_);
+ }
+
- /* Handle force SNAT options set in the gateway router. */
- if (!od->l3dgw_port) {
- if (dnat_force_snat_ip) {
@@ -13999,9 +15569,8 @@ index 5a3227568..e78a71728 100644
- if (od->lb_force_snat_addrs.n_ipv6_addrs) {
- build_lrouter_force_snat_flows(lflows, od, "6",
- od->lb_force_snat_addrs.ipv6_addrs[0].addr_s, "lb");
-+ unique_routes_add(&unique_routes, route);
- }
- }
+- }
+- }
-
- /* For gateway router, re-circulate every packet through
- * the DNAT zone. This helps with the following.
@@ -14014,6 +15583,11 @@ index 5a3227568..e78a71728 100644
- * we can do it here, saving a future re-circulation. */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
- "ip", "flags.loopback = 1; ct_dnat;");
++ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
++ add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
++ op->lrp_networks.ipv6_addrs[i].network_s,
++ op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
++ &op->nbrp->header_);
}
-
- /* Load balancing and packet defrag are only valid on
@@ -14021,12 +15595,12 @@ index 5a3227568..e78a71728 100644
- if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
- sset_destroy(&nat_entries);
- continue;
-+ HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) {
-+ /* add a flow in IP_ROUTING, and one flow for each member in
-+ * IP_ROUTING_ECMP. */
-+ build_ecmp_route_flow(lflows, od, ports, group);
++ } else if (lsp_is_router(op->nbsp)) {
++ struct ovn_port *peer = ovn_port_get_peer(ports, op);
++ if (!peer || !peer->nbrp || !peer->lrp_networks.n_ipv4_addrs) {
++ return;
}
--
+
- /* A set to hold all ips that need defragmentation and tracking. */
- struct sset all_ips = SSET_INITIALIZER(&all_ips);
-
@@ -14091,24 +15665,33 @@ index 5a3227568..e78a71728 100644
- proto, lb_vip->vip_port);
- prio = 120;
- }
--
++ for (int i = 0; i < op->od->n_router_ports; i++) {
++ struct ovn_port *router_port = ovn_port_get_peer(
++ ports, op->od->router_ports[i]);
++ if (!router_port || !router_port->nbrp || router_port == peer) {
++ continue;
++ }
+
- if (od->l3redirect_port) {
- ds_put_format(&match, " && is_chassis_resident(%s)",
- od->l3redirect_port->json_key);
-- }
++ struct ovn_port_routable_addresses *ra = &router_port->routables;
++ for (size_t j = 0; j < ra->n_addrs; j++) {
++ struct lport_addresses *laddrs = &ra->laddrs[j];
++ for (size_t k = 0; k < laddrs->n_ipv4_addrs; k++) {
++ add_route(lflows, peer,
++ peer->lrp_networks.ipv4_addrs[0].addr_s,
++ laddrs->ipv4_addrs[k].network_s,
++ laddrs->ipv4_addrs[k].plen, NULL, false,
++ &peer->nbrp->header_);
+ }
- add_router_lb_flow(lflows, od, &match, &actions, prio,
- lb_force_snat_ip, lb_vip, proto,
- nb_lb, meter_groups, &nat_entries);
-- }
-+ const struct unique_routes_node *ur;
-+ HMAP_FOR_EACH (ur, hmap_node, &unique_routes) {
-+ build_static_route_flow(lflows, od, ports, ur->route);
+ }
}
- sset_destroy(&all_ips);
- sset_destroy(&nat_entries);
-+ ecmp_groups_destroy(&ecmp_groups);
-+ unique_routes_destroy(&unique_routes);
-+ parsed_routes_destroy(&parsed_routes);
}
-
- ds_destroy(&match);
@@ -14117,15 +15700,13 @@ index 5a3227568..e78a71728 100644
-/* Logical router ingress Table 0: L2 Admission Control
- * Generic admission control flows (without inport check).
-+/* IP Multicast lookup. Here we set the output port, adjust TTL and
-+ * advance to next table (priority 500).
- */
+- */
static void
-build_adm_ctrl_flows_for_lrouter(
- struct ovn_datapath *od, struct hmap *lflows)
-+build_mcast_lookup_flows_for_lrouter(
++build_static_route_flows_for_lrouter(
+ struct ovn_datapath *od, struct hmap *lflows,
-+ struct ds *match, struct ds *actions)
++ struct hmap *ports, struct hmap *bfd_connections)
{
if (od->nbr) {
- /* Logical VLANs not supported.
@@ -14134,6 +15715,8 @@ index 5a3227568..e78a71728 100644
- "vlan.present || eth.src[40]", "drop;");
- }
-}
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, 150,
++ REG_ECMP_GROUP_ID" == 0", "next;");
-/* Logical router ingress Table 0: L2 Admission Control
- * This table drops packets that the router shouldn’t see at all based
@@ -14148,51 +15731,49 @@ index 5a3227568..e78a71728 100644
- if (!lrport_is_enabled(op->nbrp)) {
- /* Drop packets from disabled logical ports (since logical flow
- * tables are default-drop). */
-+ /* Drop IPv6 multicast traffic that shouldn't be forwarded,
-+ * i.e., router solicitation and router advertisement.
-+ */
-+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
-+ "nd_rs || nd_ra", "drop;");
-+ if (!od->mcast_info.rtr.relay) {
- return;
+- return;
++ struct hmap ecmp_groups = HMAP_INITIALIZER(&ecmp_groups);
++ struct hmap unique_routes = HMAP_INITIALIZER(&unique_routes);
++ struct ovs_list parsed_routes = OVS_LIST_INITIALIZER(&parsed_routes);
++ struct ecmp_groups_node *group;
++ for (int i = 0; i < od->nbr->n_static_routes; i++) {
++ struct parsed_route *route =
++ parsed_routes_add(&parsed_routes, od->nbr->static_routes[i],
++ bfd_connections);
++ if (!route) {
++ continue;
++ }
++ group = ecmp_groups_find(&ecmp_groups, route);
++ if (group) {
++ ecmp_groups_add_route(group, route);
++ } else {
++ const struct parsed_route *existed_route =
++ unique_routes_remove(&unique_routes, route);
++ if (existed_route) {
++ group = ecmp_groups_add(&ecmp_groups, existed_route);
++ if (group) {
++ ecmp_groups_add_route(group, route);
++ }
++ } else {
++ unique_routes_add(&unique_routes, route);
++ }
++ }
}
-
+-
- if (op->derived) {
- /* No ingress packets should be received on a chassisredirect
- * port. */
- return;
-+ struct ovn_igmp_group *igmp_group;
-+
-+ LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) {
-+ ds_clear(match);
-+ ds_clear(actions);
-+ if (IN6_IS_ADDR_V4MAPPED(&igmp_group->address)) {
-+ ds_put_format(match, "ip4 && ip4.dst == %s ",
-+ igmp_group->mcgroup.name);
-+ } else {
-+ ds_put_format(match, "ip6 && ip6.dst == %s ",
-+ igmp_group->mcgroup.name);
-+ }
-+ if (od->mcast_info.rtr.flood_static) {
-+ ds_put_cstr(actions,
-+ "clone { "
-+ "outport = \""MC_STATIC"\"; "
-+ "ip.ttl--; "
-+ "next; "
-+ "};");
-+ }
-+ ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
-+ igmp_group->mcgroup.name);
-+ ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
-+ ds_cstr(match), ds_cstr(actions));
++ HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) {
++ /* add a flow in IP_ROUTING, and one flow for each member in
++ * IP_ROUTING_ECMP. */
++ build_ecmp_route_flow(lflows, od, ports, group);
}
-
+-
- /* Store the ethernet address of the port receiving the packet.
- * This will save us from having to match on inport further down in
- * the pipeline.
-+ /* If needed, flood unregistered multicast on statically configured
-+ * ports. Otherwise drop any multicast traffic.
- */
+- */
- ds_clear(actions);
- ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
- op->lrp_networks.ea_s);
@@ -14212,50 +15793,32 @@ index 5a3227568..e78a71728 100644
- * should only be received on the gateway chassis. */
- ds_put_format(match, " && is_chassis_resident(%s)",
- op->od->l3redirect_port->json_key);
-+ if (od->mcast_info.rtr.flood_static) {
-+ ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
-+ "ip4.mcast || ip6.mcast",
-+ "clone { "
-+ "outport = \""MC_STATIC"\"; "
-+ "ip.ttl--; "
-+ "next; "
-+ "};");
-+ } else {
-+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
-+ "ip4.mcast || ip6.mcast", "drop;");
++ const struct unique_routes_node *ur;
++ HMAP_FOR_EACH (ur, hmap_node, &unique_routes) {
++ build_static_route_flow(lflows, od, ports, ur->route);
}
- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
- ds_cstr(match), ds_cstr(actions),
- &op->nbrp->header_);
++ ecmp_groups_destroy(&ecmp_groups);
++ unique_routes_destroy(&unique_routes);
++ parsed_routes_destroy(&parsed_routes);
}
}
-
-/* Logical router ingress Table 1 and 2: Neighbor lookup and learning
- * lflows for logical routers. */
-+/* Logical router ingress table POLICY: Policy.
-+ *
-+ * A packet that arrives at this table is an IP packet that should be
-+ * permitted/denied/rerouted to the address in the rule's nexthop.
-+ * This table sets outport to the correct out_port,
-+ * eth.src to the output port's MAC address,
-+ * and REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
-+ * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
-+ * advances to the next table for ARP/ND resolution. */
++/* IP Multicast lookup. Here we set the output port, adjust TTL and
++ * advance to next table (priority 500).
++ */
static void
-build_neigh_learning_flows_for_lrouter(
-+build_ingress_policy_flows_for_lrouter(
++build_mcast_lookup_flows_for_lrouter(
struct ovn_datapath *od, struct hmap *lflows,
-- struct ds *match, struct ds *actions)
-+ struct hmap *ports)
+ struct ds *match, struct ds *actions)
{
if (od->nbr) {
-+ /* This is a catch-all rule. It has the lowest priority (0)
-+ * does a match-all("1") and pass-through (next) */
-+ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1",
-+ REG_ECMP_GROUP_ID" = 0; next;");
-+ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY_ECMP, 150,
-+ REG_ECMP_GROUP_ID" == 0", "next;");
- /* Learn MAC bindings from ARP/IPv6 ND.
- *
@@ -14290,7 +15853,15 @@ index 5a3227568..e78a71728 100644
- * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT is not set.
- *
- * */
--
++ /* Drop IPv6 multicast traffic that shouldn't be forwarded,
++ * i.e., router solicitation and router advertisement.
++ */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
++ "nd_rs || nd_ra", "drop;");
++ if (!od->mcast_info.rtr.relay) {
++ return;
++ }
+
- /* Flows for LOOKUP_NEIGHBOR. */
- bool learn_from_arp_request = smap_get_bool(&od->nbr->options,
- "always_learn_from_arp_request", true);
@@ -14301,7 +15872,8 @@ index 5a3227568..e78a71728 100644
- REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100,
- "arp.op == 2", ds_cstr(actions));
--
++ struct ovn_igmp_group *igmp_group;
+
- ds_clear(actions);
- ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
- " = lookup_nd(inport, nd.target, nd.tll); %snext;",
@@ -14309,7 +15881,30 @@ index 5a3227568..e78a71728 100644
- REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_na",
- ds_cstr(actions));
--
++ LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) {
++ ds_clear(match);
++ ds_clear(actions);
++ if (IN6_IS_ADDR_V4MAPPED(&igmp_group->address)) {
++ ds_put_format(match, "ip4 && ip4.dst == %s ",
++ igmp_group->mcgroup.name);
++ } else {
++ ds_put_format(match, "ip6 && ip6.dst == %s ",
++ igmp_group->mcgroup.name);
++ }
++ if (od->mcast_info.rtr.flood_static) {
++ ds_put_cstr(actions,
++ "clone { "
++ "outport = \""MC_STATIC"\"; "
++ "ip.ttl--; "
++ "next; "
++ "};");
++ }
++ ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
++ igmp_group->mcgroup.name);
++ ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
++ ds_cstr(match), ds_cstr(actions));
++ }
+
- ds_clear(actions);
- ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
- " = lookup_nd(inport, ip6.src, nd.sll); %snext;",
@@ -14318,18 +15913,49 @@ index 5a3227568..e78a71728 100644
- " = lookup_nd_ip(inport, ip6.src); ");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_ns",
- ds_cstr(actions));
--
++ /* If needed, flood unregistered multicast on statically configured
++ * ports. Otherwise drop any multicast traffic.
++ */
++ if (od->mcast_info.rtr.flood_static) {
++ ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
++ "ip4.mcast || ip6.mcast",
++ "clone { "
++ "outport = \""MC_STATIC"\"; "
++ "ip.ttl--; "
++ "next; "
++ "};");
++ } else {
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
++ "ip4.mcast || ip6.mcast", "drop;");
++ }
++ }
++}
+
- /* For other packet types, we can skip neighbor learning.
- * So set REGBIT_LOOKUP_NEIGHBOR_RESULT to 1. */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 0, "1",
- REGBIT_LOOKUP_NEIGHBOR_RESULT" = 1; next;");
-+ /* Convert routing policies to flows. */
-+ uint16_t ecmp_group_id = 1;
-+ for (int i = 0; i < od->nbr->n_policies; i++) {
-+ const struct nbrec_logical_router_policy *rule
-+ = od->nbr->policies[i];
-+ bool is_ecmp_reroute =
-+ (!strcmp(rule->action, "reroute") && rule->n_nexthops > 1);
++/* Logical router ingress table POLICY: Policy.
++ *
++ * A packet that arrives at this table is an IP packet that should be
++ * permitted/denied/rerouted to the address in the rule's nexthop.
++ * This table sets outport to the correct out_port,
++ * eth.src to the output port's MAC address,
++ * and REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
++ * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
++ * advances to the next table for ARP/ND resolution. */
++static void
++build_ingress_policy_flows_for_lrouter(
++ struct ovn_datapath *od, struct hmap *lflows,
++ struct hmap *ports)
++{
++ if (od->nbr) {
++ /* This is a catch-all rule. It has the lowest priority (0)
++ * does a match-all("1") and pass-through (next) */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1",
++ REG_ECMP_GROUP_ID" = 0; next;");
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY_ECMP, 150,
++ REG_ECMP_GROUP_ID" == 0", "next;");
- /* Flows for LEARN_NEIGHBOR. */
- /* Skip Neighbor learning if not required. */
@@ -14339,6 +15965,16 @@ index 5a3227568..e78a71728 100644
- " || "REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" == 0");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 100,
- ds_cstr(match), "next;");
++ /* Convert routing policies to flows. */
++ uint16_t ecmp_group_id = 1;
++ for (int i = 0; i < od->nbr->n_policies; i++) {
++ const struct nbrec_logical_router_policy *rule
++ = od->nbr->policies[i];
++ bool is_ecmp_reroute =
++ (!strcmp(rule->action, "reroute") && rule->n_nexthops > 1);
+
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
+- "arp", "put_arp(inport, arp.spa, arp.sha); next;");
+ if (is_ecmp_reroute) {
+ build_ecmp_routing_policy_flows(lflows, od, ports, rule,
+ ecmp_group_id);
@@ -14352,7 +15988,7 @@ index 5a3227568..e78a71728 100644
+}
- ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
-- "arp", "put_arp(inport, arp.spa, arp.sha); next;");
+- "nd_na", "put_nd(inport, nd.target, nd.tll); next;");
+/* Local router ingress table ARP_RESOLVE: ARP Resolution. */
+static void
+build_arp_resolve_flows_for_lrouter(
@@ -14365,16 +16001,44 @@ index 5a3227568..e78a71728 100644
+ "ip4.mcast || ip6.mcast", "next;");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
-- "nd_na", "put_nd(inport, nd.target, nd.tll); next;");
+- "nd_ns", "put_nd(inport, ip6.src, nd.sll); next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
+ "get_arp(outport, " REG_NEXT_HOP_IPV4 "); next;");
-
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
-- "nd_ns", "put_nd(inport, ip6.src, nd.sll); next;");
++
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
+ "get_nd(outport, " REG_NEXT_HOP_IPV6 "); next;");
++ }
++}
++
++static void
++routable_addresses_to_lflows(struct hmap *lflows, struct ovn_port *router_port,
++ struct ovn_port *peer, struct ds *match,
++ struct ds *actions)
++{
++ struct ovn_port_routable_addresses *ra = &router_port->routables;
++ if (!ra->n_addrs) {
++ return;
}
--
+
++ for (size_t i = 0; i < ra->n_addrs; i++) {
++ ds_clear(match);
++ ds_put_format(match, "outport == %s && "REG_NEXT_HOP_IPV4" == {",
++ peer->json_key);
++ bool first = true;
++ for (size_t j = 0; j < ra->laddrs[i].n_ipv4_addrs; j++) {
++ if (!first) {
++ ds_put_cstr(match, ", ");
++ }
++ ds_put_cstr(match, ra->laddrs[i].ipv4_addrs[j].addr_s);
++ first = false;
++ }
++ ds_put_cstr(match, "}");
++
++ ds_clear(actions);
++ ds_put_format(actions, "eth.dst = %s; next;", ra->laddrs[i].ea_s);
++ ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE, 100,
++ ds_cstr(match), ds_cstr(actions));
++ }
}
-/* Logical router ingress Table 1: Neighbor lookup lflows
@@ -14552,17 +16216,15 @@ index 5a3227568..e78a71728 100644
+ /* Get the Logical_Router_Port that the
+ * Logical_Switch_Port is connected to, as
+ * 'peer'. */
-+ const char *peer_name = smap_get(
-+ &op->od->router_ports[k]->nbsp->options,
-+ "router-port");
-+ if (!peer_name) {
++ struct ovn_port *peer = ovn_port_get_peer(
++ ports, op->od->router_ports[k]);
++ if (!peer || !peer->nbrp) {
+ continue;
+ }
- struct smap options;
- smap_clone(&options, &op->sb->options);
-+ struct ovn_port *peer = ovn_port_find(ports, peer_name);
-+ if (!peer || !peer->nbrp) {
++ if (!find_lrp_member_ip(peer, ip_s)) {
+ continue;
+ }
@@ -14574,9 +16236,10 @@ index 5a3227568..e78a71728 100644
- }
- smap_add(&options, "ipv6_prefix_delegation",
- prefix_delegation ? "true" : "false");
-+ if (!find_lrp_member_ip(peer, ip_s)) {
-+ continue;
-+ }
++ ds_clear(match);
++ ds_put_format(match, "outport == %s && "
++ REG_NEXT_HOP_IPV4 " == %s",
++ peer->json_key, ip_s);
- bool ipv6_prefix = smap_get_bool(&op->nbrp->options,
- "prefix", false);
@@ -14584,14 +16247,8 @@ index 5a3227568..e78a71728 100644
- ipv6_prefix = false;
- }
- smap_add(&options, "ipv6_prefix",
-- ipv6_prefix ? "true" : "false");
-- sbrec_port_binding_set_options(op->sb, &options);
-+ ds_clear(match);
-+ ds_put_format(match, "outport == %s && "
-+ REG_NEXT_HOP_IPV4 " == %s",
-+ peer->json_key, ip_s);
-
-- smap_destroy(&options);
+- ipv6_prefix ? "true" : "false");
+- sbrec_port_binding_set_options(op->sb, &options);
+ ds_clear(actions);
+ ds_put_format(actions, "eth.dst = %s; next;", ea_s);
+ ovn_lflow_add_with_hint(lflows, peer->od,
@@ -14602,18 +16259,22 @@ index 5a3227568..e78a71728 100644
+ }
+ }
-- const char *address_mode = smap_get(
-- &op->nbrp->ipv6_ra_configs, "address_mode");
+- smap_destroy(&options);
+ for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
+ const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
+ for (size_t k = 0; k < op->od->n_router_ports; k++) {
+ /* Get the Logical_Router_Port that the
+ * Logical_Switch_Port is connected to, as
+ * 'peer'. */
-+ const char *peer_name = smap_get(
-+ &op->od->router_ports[k]->nbsp->options,
-+ "router-port");
-+ if (!peer_name) {
++ struct ovn_port *peer = ovn_port_get_peer(
++ ports, op->od->router_ports[k]);
++ if (!peer || !peer->nbrp) {
++ continue;
++ }
+
+- const char *address_mode = smap_get(
+- &op->nbrp->ipv6_ra_configs, "address_mode");
++ if (!find_lrp_member_ip(peer, ip_s)) {
+ continue;
+ }
@@ -14628,30 +16289,11 @@ index 5a3227568..e78a71728 100644
- address_mode);
- return;
- }
-+ struct ovn_port *peer = ovn_port_find(ports, peer_name);
-+ if (!peer || !peer->nbrp) {
-+ continue;
-+ }
-
-- if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic",
-- false)) {
-- copy_ra_to_sb(op, address_mode);
-- }
-+ if (!find_lrp_member_ip(peer, ip_s)) {
-+ continue;
-+ }
-
-- ds_clear(match);
-- ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
-- op->json_key);
-- ds_clear(actions);
+ ds_clear(match);
+ ds_put_format(match, "outport == %s && "
+ REG_NEXT_HOP_IPV6 " == %s",
+ peer->json_key, ip_s);
-
-- const char *mtu_s = smap_get(
-- &op->nbrp->ipv6_ra_configs, "mtu");
++
+ ds_clear(actions);
+ ds_put_format(actions, "eth.dst = %s; next;", ea_s);
+ ovn_lflow_add_with_hint(lflows, peer->od,
@@ -14672,9 +16314,7 @@ index 5a3227568..e78a71728 100644
+ * resolved by router pipeline using the arp{} action.
+ * The MAC_Binding entry for the virtual ip might be invalid. */
+ ovs_be32 ip;
-
-- /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
-- uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0;
++
+ const char *vip = smap_get(&op->nbsp->options,
+ "virtual-ip");
+ const char *virtual_parents = smap_get(&op->nbsp->options,
@@ -14683,45 +16323,23 @@ index 5a3227568..e78a71728 100644
+ !ip_parse(vip, &ip) || !op->sb) {
+ return;
+ }
-
-- ds_put_format(actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts("
-- "addr_mode = \"%s\", slla = %s",
-- address_mode, op->lrp_networks.ea_s);
-- if (mtu > 0) {
-- ds_put_format(actions, ", mtu = %u", mtu);
-- }
++
+ if (!op->sb->virtual_parent || !op->sb->virtual_parent[0] ||
+ !op->sb->chassis) {
+ /* The virtual port is not claimed yet. */
+ for (size_t i = 0; i < op->od->n_router_ports; i++) {
-+ const char *peer_name = smap_get(
-+ &op->od->router_ports[i]->nbsp->options,
-+ "router-port");
-+ if (!peer_name) {
-+ continue;
-+ }
-
-- const char *prf = smap_get_def(
-- &op->nbrp->ipv6_ra_configs, "router_preference", "MEDIUM");
-- if (strcmp(prf, "MEDIUM")) {
-- ds_put_format(actions, ", router_preference = \"%s\"", prf);
-- }
-+ struct ovn_port *peer = ovn_port_find(ports, peer_name);
++ struct ovn_port *peer = ovn_port_get_peer(
++ ports, op->od->router_ports[i]);
+ if (!peer || !peer->nbrp) {
+ continue;
+ }
-
-- bool add_rs_response_flow = false;
++
+ if (find_lrp_member_ip(peer, vip)) {
+ ds_clear(match);
+ ds_put_format(match, "outport == %s && "
+ REG_NEXT_HOP_IPV4 " == %s",
+ peer->json_key, vip);
-
-- for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
-- if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
-- continue;
-- }
++
+ const char *arp_actions =
+ "eth.dst = 00:00:00:00:00:00; next;";
+ ovn_lflow_add_with_hint(lflows, peer->od,
@@ -14738,10 +16356,7 @@ index 5a3227568..e78a71728 100644
+ if (!vp || !vp->nbsp) {
+ return;
+ }
-
-- ds_put_format(actions, ", prefix = %s/%u",
-- op->lrp_networks.ipv6_addrs[i].network_s,
-- op->lrp_networks.ipv6_addrs[i].plen);
++
+ for (size_t i = 0; i < vp->n_lsp_addrs; i++) {
+ bool found_vip_network = false;
+ const char *ea_s = vp->lsp_addrs[i].ea_s;
@@ -14749,65 +16364,31 @@ index 5a3227568..e78a71728 100644
+ /* Get the Logical_Router_Port that the
+ * Logical_Switch_Port is connected to, as
+ * 'peer'. */
-+ const char *peer_name = smap_get(
-+ &vp->od->router_ports[j]->nbsp->options,
-+ "router-port");
-+ if (!peer_name) {
-+ continue;
-+ }
-
-- add_rs_response_flow = true;
-- }
+ struct ovn_port *peer =
-+ ovn_port_find(ports, peer_name);
++ ovn_port_get_peer(ports, vp->od->router_ports[j]);
+ if (!peer || !peer->nbrp) {
+ continue;
+ }
-- if (add_rs_response_flow) {
-- ds_put_cstr(actions, "); next;");
-- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS,
-- 50, ds_cstr(match), ds_cstr(actions),
-- &op->nbrp->header_);
-- ds_clear(actions);
-- ds_clear(match);
-- ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && "
-- "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key);
+- if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic",
+- false)) {
+- copy_ra_to_sb(op, address_mode);
+- }
+ if (!find_lrp_member_ip(peer, vip)) {
+ continue;
+ }
-- char ip6_str[INET6_ADDRSTRLEN + 1];
-- struct in6_addr lla;
-- in6_generate_lla(op->lrp_networks.ea, &lla);
-- memset(ip6_str, 0, sizeof(ip6_str));
-- ipv6_string_mapped(ip6_str, &lla);
-- ds_put_format(actions, "eth.dst = eth.src; eth.src = %s; "
-- "ip6.dst = ip6.src; ip6.src = %s; "
-- "outport = inport; flags.loopback = 1; "
-- "output;",
-- op->lrp_networks.ea_s, ip6_str);
-- ovn_lflow_add_with_hint(lflows, op->od,
-- S_ROUTER_IN_ND_RA_RESPONSE, 50,
-- ds_cstr(match), ds_cstr(actions),
-- &op->nbrp->header_);
-- }
--}
+- ds_clear(match);
+- ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
+- op->json_key);
+- ds_clear(actions);
+ ds_clear(match);
+ ds_put_format(match, "outport == %s && "
+ REG_NEXT_HOP_IPV4 " == %s",
+ peer->json_key, vip);
--/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: RS
-- * responder, by default goto next. (priority 0). */
--static void
--build_ND_RA_flows_for_lrouter(struct ovn_datapath *od, struct hmap *lflows)
--{
-- if (od->nbr) {
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;");
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;");
-- }
--}
+- const char *mtu_s = smap_get(
+- &op->nbrp->ipv6_ra_configs, "mtu");
+ ds_clear(actions);
+ ds_put_format(actions, "eth.dst = %s; next;", ea_s);
+ ovn_lflow_add_with_hint(lflows, peer->od,
@@ -14819,27 +16400,8 @@ index 5a3227568..e78a71728 100644
+ break;
+ }
--/* Logical router ingress table IP_ROUTING : IP Routing.
-- *
-- * A packet that arrives at this table is an IP packet that should be
-- * routed to the address in 'ip[46].dst'.
-- *
-- * For regular routes without ECMP, table IP_ROUTING sets outport to the
-- * correct output port, eth.src to the output port's MAC address, and
-- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
-- * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
-- * advances to the next table.
-- *
-- * For ECMP routes, i.e. multiple routes with same policy and prefix, table
-- * IP_ROUTING remembers ECMP group id and selects a member id, and advances
-- * to table IP_ROUTING_ECMP, which sets outport, eth.src and
-- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 for the selected ECMP member.
-- */
--static void
--build_ip_routing_flows_for_lrouter_port(
-- struct ovn_port *op, struct hmap *lflows)
--{
-- if (op->nbrp) {
+- /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
+- uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0;
+ if (found_vip_network) {
+ break;
+ }
@@ -14848,56 +16410,33 @@ index 5a3227568..e78a71728 100644
+ } else if (lsp_is_router(op->nbsp)) {
+ /* This is a logical switch port that connects to a router. */
-- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-- add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
-- op->lrp_networks.ipv4_addrs[i].network_s,
-- op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
-- &op->nbrp->header_);
+- ds_put_format(actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts("
+- "addr_mode = \"%s\", slla = %s",
+- address_mode, op->lrp_networks.ea_s);
+- if (mtu > 0) {
+- ds_put_format(actions, ", mtu = %u", mtu);
+- }
+ /* The peer of this switch port is the router port for which
+ * we need to add logical flows such that it can resolve
+ * ARP entries for all the other router ports connected to
+ * the switch in question. */
-+
-+ const char *peer_name = smap_get(&op->nbsp->options,
-+ "router-port");
-+ if (!peer_name) {
-+ return;
- }
-
-- for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
-- add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
-- op->lrp_networks.ipv6_addrs[i].network_s,
-- op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
-- &op->nbrp->header_);
-+ struct ovn_port *peer = ovn_port_find(ports, peer_name);
++ struct ovn_port *peer = ovn_port_get_peer(ports, op);
+ if (!peer || !peer->nbrp) {
+ return;
- }
-- }
--}
++ }
--static void
--build_static_route_flows_for_lrouter(
-- struct ovn_datapath *od, struct hmap *lflows,
-- struct hmap *ports)
--{
-- if (od->nbr) {
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, 150,
-- REG_ECMP_GROUP_ID" == 0", "next;");
+- const char *prf = smap_get_def(
+- &op->nbrp->ipv6_ra_configs, "router_preference", "MEDIUM");
+- if (strcmp(prf, "MEDIUM")) {
+- ds_put_format(actions, ", router_preference = \"%s\"", prf);
+- }
+ if (peer->od->nbr &&
+ smap_get_bool(&peer->od->nbr->options,
+ "dynamic_neigh_routers", false)) {
+ return;
+ }
-- struct hmap ecmp_groups = HMAP_INITIALIZER(&ecmp_groups);
-- struct hmap unique_routes = HMAP_INITIALIZER(&unique_routes);
-- struct ovs_list parsed_routes = OVS_LIST_INITIALIZER(&parsed_routes);
-- struct ecmp_groups_node *group;
-- for (int i = 0; i < od->nbr->n_static_routes; i++) {
-- struct parsed_route *route =
-- parsed_routes_add(&parsed_routes, od->nbr->static_routes[i]);
-- if (!route) {
+- bool add_rs_response_flow = false;
+ for (size_t i = 0; i < op->od->n_router_ports; i++) {
+ const char *router_port_name = smap_get(
+ &op->od->router_ports[i]->nbsp->options,
@@ -14905,52 +16444,21 @@ index 5a3227568..e78a71728 100644
+ struct ovn_port *router_port = ovn_port_find(ports,
+ router_port_name);
+ if (!router_port || !router_port->nbrp) {
- continue;
- }
-- group = ecmp_groups_find(&ecmp_groups, route);
-- if (group) {
-- ecmp_groups_add_route(group, route);
-- } else {
-- const struct parsed_route *existed_route =
-- unique_routes_remove(&unique_routes, route);
-- if (existed_route) {
-- group = ecmp_groups_add(&ecmp_groups, existed_route);
-- if (group) {
-- ecmp_groups_add_route(group, route);
-- }
-- } else {
-- unique_routes_add(&unique_routes, route);
-- }
-+
++ continue;
++ }
+
+- for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+- if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
+- continue;
+- }
+ /* Skip the router port under consideration. */
+ if (router_port == peer) {
+ continue;
- }
-- }
-- HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) {
-- /* add a flow in IP_ROUTING, and one flow for each member in
-- * IP_ROUTING_ECMP. */
-- build_ecmp_route_flow(lflows, od, ports, group);
-- }
-- const struct unique_routes_node *ur;
-- HMAP_FOR_EACH (ur, hmap_node, &unique_routes) {
-- build_static_route_flow(lflows, od, ports, ur->route);
-- }
-- ecmp_groups_destroy(&ecmp_groups);
-- unique_routes_destroy(&unique_routes);
-- parsed_routes_destroy(&parsed_routes);
-- }
--}
++ }
--/* IP Multicast lookup. Here we set the output port, adjust TTL and
-- * advance to next table (priority 500).
-- */
--static void
--build_mcast_lookup_flows_for_lrouter(
-- struct ovn_datapath *od, struct hmap *lflows,
-- struct ds *match, struct ds *actions)
--{
-- if (od->nbr) {
+- ds_put_format(actions, ", prefix = %s/%u",
+- op->lrp_networks.ipv6_addrs[i].network_s,
+- op->lrp_networks.ipv6_addrs[i].plen);
+ if (router_port->lrp_networks.n_ipv4_addrs) {
+ ds_clear(match);
+ ds_put_format(match, "outport == %s && "
@@ -14958,14 +16466,8 @@ index 5a3227568..e78a71728 100644
+ peer->json_key);
+ op_put_v4_networks(match, router_port, false);
-- /* Drop IPv6 multicast traffic that shouldn't be forwarded,
-- * i.e., router solicitation and router advertisement.
-- */
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
-- "nd_rs || nd_ra", "drop;");
-- if (!od->mcast_info.rtr.relay) {
-- return;
-- }
+- add_rs_response_flow = true;
+- }
+ ds_clear(actions);
+ ds_put_format(actions, "eth.dst = %s; next;",
+ router_port->lrp_networks.ea_s);
@@ -14975,7 +16477,15 @@ index 5a3227568..e78a71728 100644
+ &op->nbsp->header_);
+ }
-- struct ovn_igmp_group *igmp_group;
+- if (add_rs_response_flow) {
+- ds_put_cstr(actions, "); next;");
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS,
+- 50, ds_cstr(match), ds_cstr(actions),
+- &op->nbrp->header_);
+- ds_clear(actions);
+- ds_clear(match);
+- ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && "
+- "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key);
+ if (router_port->lrp_networks.n_ipv6_addrs) {
+ ds_clear(match);
+ ds_put_format(match, "outport == %s && "
@@ -14983,23 +16493,22 @@ index 5a3227568..e78a71728 100644
+ peer->json_key);
+ op_put_v6_networks(match, router_port);
-- LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) {
-- ds_clear(match);
-- ds_clear(actions);
-- if (IN6_IS_ADDR_V4MAPPED(&igmp_group->address)) {
-- ds_put_format(match, "ip4 && ip4.dst == %s ",
-- igmp_group->mcgroup.name);
-- } else {
-- ds_put_format(match, "ip6 && ip6.dst == %s ",
-- igmp_group->mcgroup.name);
-- }
-- if (od->mcast_info.rtr.flood_static) {
-- ds_put_cstr(actions,
-- "clone { "
-- "outport = \""MC_STATIC"\"; "
-- "ip.ttl--; "
-- "next; "
-- "};");
+- char ip6_str[INET6_ADDRSTRLEN + 1];
+- struct in6_addr lla;
+- in6_generate_lla(op->lrp_networks.ea, &lla);
+- memset(ip6_str, 0, sizeof(ip6_str));
+- ipv6_string_mapped(ip6_str, &lla);
+- ds_put_format(actions, "eth.dst = eth.src; eth.src = %s; "
+- "ip6.dst = ip6.src; ip6.src = %s; "
+- "outport = inport; flags.loopback = 1; "
+- "output;",
+- op->lrp_networks.ea_s, ip6_str);
+- ovn_lflow_add_with_hint(lflows, op->od,
+- S_ROUTER_IN_ND_RA_RESPONSE, 50,
+- ds_cstr(match), ds_cstr(actions),
+- &op->nbrp->header_);
+- }
+-}
+ ds_clear(actions);
+ ds_put_format(actions, "eth.dst = %s; next;",
+ router_port->lrp_networks.ea_s);
@@ -15007,77 +16516,80 @@ index 5a3227568..e78a71728 100644
+ S_ROUTER_IN_ARP_RESOLVE, 100,
+ ds_cstr(match), ds_cstr(actions),
+ &op->nbsp->header_);
- }
-- ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
-- igmp_group->mcgroup.name);
-- ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
-- ds_cstr(match), ds_cstr(actions));
-- }
--
-- /* If needed, flood unregistered multicast on statically configured
-- * ports. Otherwise drop any multicast traffic.
-- */
-- if (od->mcast_info.rtr.flood_static) {
-- ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
-- "ip4.mcast || ip6.mcast",
-- "clone { "
-- "outport = \""MC_STATIC"\"; "
-- "ip.ttl--; "
-- "next; "
-- "};");
-- } else {
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
-- "ip4.mcast || ip6.mcast", "drop;");
- }
++ }
+
+-/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: RS
+- * responder, by default goto next. (priority 0). */
+-static void
+-build_ND_RA_flows_for_lrouter(struct ovn_datapath *od, struct hmap *lflows)
+-{
+- if (od->nbr) {
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;");
++ if (smap_get(&peer->od->nbr->options, "chassis") ||
++ (peer->od->l3dgw_port && peer == peer->od->l3dgw_port)) {
++ routable_addresses_to_lflows(lflows, router_port, peer,
++ match, actions);
++ }
++ }
}
+
}
--/* Logical router ingress table POLICY: Policy.
+-/* Logical router ingress table IP_ROUTING : IP Routing.
+/* Local router ingress table CHK_PKT_LEN: Check packet length.
*
- * A packet that arrives at this table is an IP packet that should be
-- * permitted/denied/rerouted to the address in the rule's nexthop.
-- * This table sets outport to the correct out_port,
-- * eth.src to the output port's MAC address,
-- * and REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
-- * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
-- * advances to the next table for ARP/ND resolution. */
+- * routed to the address in 'ip[46].dst'.
+ * Any IPv4 packet with outport set to the distributed gateway
+ * router port, check the packet length and store the result in the
+ * 'REGBIT_PKT_LARGER' register bit.
-+ *
+ *
+- * For regular routes without ECMP, table IP_ROUTING sets outport to the
+- * correct output port, eth.src to the output port's MAC address, and
+- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
+- * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
+- * advances to the next table.
+ * Local router ingress table LARGER_PKTS: Handle larger packets.
-+ *
+ *
+- * For ECMP routes, i.e. multiple routes with same policy and prefix, table
+- * IP_ROUTING remembers ECMP group id and selects a member id, and advances
+- * to table IP_ROUTING_ECMP, which sets outport, eth.src and
+- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 for the selected ECMP member.
+- */
+ * Any IPv4 packet with outport set to the distributed gateway
+ * router port and the 'REGBIT_PKT_LARGER' register bit is set,
+ * generate ICMPv4 packet with type 3 (Destination Unreachable) and
+ * code 4 (Fragmentation needed).
+ * */
static void
--build_ingress_policy_flows_for_lrouter(
+-build_ip_routing_flows_for_lrouter_port(
+- struct ovn_port *op, struct hmap *lflows)
+build_check_pkt_len_flows_for_lrouter(
- struct ovn_datapath *od, struct hmap *lflows,
-- struct hmap *ports)
++ struct ovn_datapath *od, struct hmap *lflows,
+ struct hmap *ports,
+ struct ds *match, struct ds *actions)
{
- if (od->nbr) {
-- /* This is a catch-all rule. It has the lowest priority (0)
-- * does a match-all("1") and pass-through (next) */
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1", "next;");
+- if (op->nbrp) {
++ if (od->nbr) {
-- /* Convert routing policies to flows. */
-- for (int i = 0; i < od->nbr->n_policies; i++) {
-- const struct nbrec_logical_router_policy *rule
-- = od->nbr->policies[i];
-- build_routing_policy_flow(lflows, od, ports, rule, &rule->header_);
+- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+- add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
+- op->lrp_networks.ipv4_addrs[i].network_s,
+- op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
+- &op->nbrp->header_);
+- }
+ /* Packets are allowed by default. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 0, "1",
+ "next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LARGER_PKTS, 0, "1",
+ "next;");
-+
+
+- for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+- add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
+- op->lrp_networks.ipv6_addrs[i].network_s,
+- op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
+- &op->nbrp->header_);
+ if (od->l3dgw_port && od->l3redirect_port) {
+ int gw_mtu = 0;
+ if (od->l3dgw_port->nbrp) {
@@ -15169,7 +16681,6 @@ index 5a3227568..e78a71728 100644
}
}
--/* Local router ingress table ARP_RESOLVE: ARP Resolution. */
+/* Logical router ingress table GW_REDIRECT: Gateway redirect.
+ *
+ * For traffic with outport equal to the l3dgw_port
@@ -15178,28 +16689,46 @@ index 5a3227568..e78a71728 100644
+ * the central instance of the l3dgw_port.
+ */
static void
--build_arp_resolve_flows_for_lrouter(
-- struct ovn_datapath *od, struct hmap *lflows)
+-build_static_route_flows_for_lrouter(
+build_gateway_redirect_flows_for_lrouter(
-+ struct ovn_datapath *od, struct hmap *lflows,
+ struct ovn_datapath *od, struct hmap *lflows,
+- struct hmap *ports)
+ struct ds *match, struct ds *actions)
{
if (od->nbr) {
-- /* Multicast packets already have the outport set so just advance to
-- * next table (priority 500). */
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 500,
-- "ip4.mcast || ip6.mcast", "next;");
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, 150,
+- REG_ECMP_GROUP_ID" == 0", "next;");
+ if (od->l3dgw_port && od->l3redirect_port) {
+ const struct ovsdb_idl_row *stage_hint = NULL;
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
-- "get_arp(outport, " REG_NEXT_HOP_IPV4 "); next;");
+- struct hmap ecmp_groups = HMAP_INITIALIZER(&ecmp_groups);
+- struct hmap unique_routes = HMAP_INITIALIZER(&unique_routes);
+- struct ovs_list parsed_routes = OVS_LIST_INITIALIZER(&parsed_routes);
+- struct ecmp_groups_node *group;
+- for (int i = 0; i < od->nbr->n_static_routes; i++) {
+- struct parsed_route *route =
+- parsed_routes_add(&parsed_routes, od->nbr->static_routes[i]);
+- if (!route) {
+- continue;
+- }
+- group = ecmp_groups_find(&ecmp_groups, route);
+- if (group) {
+- ecmp_groups_add_route(group, route);
+- } else {
+- const struct parsed_route *existed_route =
+- unique_routes_remove(&unique_routes, route);
+- if (existed_route) {
+- group = ecmp_groups_add(&ecmp_groups, existed_route);
+- if (group) {
+- ecmp_groups_add_route(group, route);
+- }
+- } else {
+- unique_routes_add(&unique_routes, route);
+- }
+ if (od->l3dgw_port->nbrp) {
+ stage_hint = &od->l3dgw_port->nbrp->header_;
-+ }
-
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
-- "get_nd(outport, " REG_NEXT_HOP_IPV6 "); next;");
+ }
++
+ /* For traffic with outport == l3dgw_port, if the
+ * packet did not match any higher priority redirect
+ * rule, then the traffic is redirected to the central
@@ -15213,57 +16742,53 @@ index 5a3227568..e78a71728 100644
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50,
+ ds_cstr(match), ds_cstr(actions),
+ stage_hint);
-+ }
+ }
+- HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) {
+- /* add a flow in IP_ROUTING, and one flow for each member in
+- * IP_ROUTING_ECMP. */
+- build_ecmp_route_flow(lflows, od, ports, group);
+- }
+- const struct unique_routes_node *ur;
+- HMAP_FOR_EACH (ur, hmap_node, &unique_routes) {
+- build_static_route_flow(lflows, od, ports, ur->route);
+- }
+- ecmp_groups_destroy(&ecmp_groups);
+- unique_routes_destroy(&unique_routes);
+- parsed_routes_destroy(&parsed_routes);
+
+ /* Packets are allowed by default. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
}
}
--/* Local router ingress table ARP_RESOLVE: ARP Resolution.
-+/* Local router ingress table ARP_REQUEST: ARP request.
- *
-- * Any unicast packet that reaches this table is an IP packet whose
-- * next-hop IP address is in REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6
-- * (ip4.dst/ipv6.dst is the final destination).
-- * This table resolves the IP address in
-- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 into an output port in outport and
-- * an Ethernet address in eth.dst.
+-/* IP Multicast lookup. Here we set the output port, adjust TTL and
+- * advance to next table (priority 500).
- */
++/* Local router ingress table ARP_REQUEST: ARP request.
++ *
+ * In the common case where the Ethernet destination has been resolved,
+ * this table outputs the packet (priority 0). Otherwise, it composes
+ * and sends an ARP/IPv6 NA request (priority 100). */
static void
--build_arp_resolve_flows_for_lrouter_port(
-- struct ovn_port *op, struct hmap *lflows,
-- struct hmap *ports,
+-build_mcast_lookup_flows_for_lrouter(
+build_arp_request_flows_for_lrouter(
-+ struct ovn_datapath *od, struct hmap *lflows,
+ struct ovn_datapath *od, struct hmap *lflows,
struct ds *match, struct ds *actions)
{
-- if (op->nbsp && !lsp_is_enabled(op->nbsp)) {
-- return;
-- }
-+ if (od->nbr) {
+ if (od->nbr) {
+ for (int i = 0; i < od->nbr->n_static_routes; i++) {
+ const struct nbrec_logical_router_static_route *route;
-- if (op->nbrp) {
-- /* This is a logical router port. If next-hop IP address in
-- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 matches IP address of this
-- * router port, then the packet is intended to eventually be sent
-- * to this logical port. Set the destination mac address using
-- * this port's mac address.
-- *
-- * The packet is still in peer's logical pipeline. So the match
-- * should be on peer's outport. */
-- if (op->peer && op->nbrp->peer) {
-- if (op->lrp_networks.n_ipv4_addrs) {
-- ds_clear(match);
-- ds_put_format(match, "outport == %s && "
-- REG_NEXT_HOP_IPV4 "== ",
-- op->peer->json_key);
-- op_put_v4_networks(match, op, false);
+- /* Drop IPv6 multicast traffic that shouldn't be forwarded,
+- * i.e., router solicitation and router advertisement.
+- */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
+- "nd_rs || nd_ra", "drop;");
+- if (!od->mcast_info.rtr.relay) {
+- return;
+- }
+-
+- struct ovn_igmp_group *igmp_group;
+ route = od->nbr->static_routes[i];
+ struct in6_addr gw_ip6;
+ unsigned int plen;
@@ -15273,14 +16798,8 @@ index 5a3227568..e78a71728 100644
+ continue;
+ }
-- ds_clear(actions);
-- ds_put_format(actions, "eth.dst = %s; next;",
-- op->lrp_networks.ea_s);
-- ovn_lflow_add_with_hint(lflows, op->peer->od,
-- S_ROUTER_IN_ARP_RESOLVE, 100,
-- ds_cstr(match), ds_cstr(actions),
-- &op->nbrp->header_);
-+ ds_clear(match);
+- LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) {
+ ds_clear(match);
+ ds_put_format(match, "eth.dst == 00:00:00:00:00:00 && "
+ "ip6 && " REG_NEXT_HOP_IPV6 " == %s",
+ route->nexthop);
@@ -15292,7 +16811,27 @@ index 5a3227568..e78a71728 100644
+ char sn_addr_s[INET6_ADDRSTRLEN + 1];
+ ipv6_string_mapped(sn_addr_s, &sn_addr);
+
-+ ds_clear(actions);
+ ds_clear(actions);
+- if (IN6_IS_ADDR_V4MAPPED(&igmp_group->address)) {
+- ds_put_format(match, "ip4 && ip4.dst == %s ",
+- igmp_group->mcgroup.name);
+- } else {
+- ds_put_format(match, "ip6 && ip6.dst == %s ",
+- igmp_group->mcgroup.name);
+- }
+- if (od->mcast_info.rtr.flood_static) {
+- ds_put_cstr(actions,
+- "clone { "
+- "outport = \""MC_STATIC"\"; "
+- "ip.ttl--; "
+- "next; "
+- "};");
+- }
+- ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
+- igmp_group->mcgroup.name);
+- ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
+- ds_cstr(match), ds_cstr(actions));
+- }
+ ds_put_format(actions,
+ "nd_ns { "
+ "eth.dst = "ETH_ADDR_FMT"; "
@@ -15301,11 +16840,25 @@ index 5a3227568..e78a71728 100644
+ "output; "
+ "};", ETH_ADDR_ARGS(eth_dst), sn_addr_s,
+ route->nexthop);
-+
+
+- /* If needed, flood unregistered multicast on statically configured
+- * ports. Otherwise drop any multicast traffic.
+- */
+- if (od->mcast_info.rtr.flood_static) {
+- ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
+- "ip4.mcast || ip6.mcast",
+- "clone { "
+- "outport = \""MC_STATIC"\"; "
+- "ip.ttl--; "
+- "next; "
+- "};");
+- } else {
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
+- "ip4.mcast || ip6.mcast", "drop;");
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ARP_REQUEST, 200,
+ ds_cstr(match), ds_cstr(actions),
+ &route->header_);
-+ }
+ }
+
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
+ "eth.dst == 00:00:00:00:00:00 && ip4",
@@ -15323,28 +16876,48 @@ index 5a3227568..e78a71728 100644
+ "output; "
+ "};");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
-+ }
-+}
-+
+ }
+ }
+
+-/* Logical router ingress table POLICY: Policy.
+/* Logical router egress table DELIVERY: Delivery (priority 100-110).
-+ *
+ *
+- * A packet that arrives at this table is an IP packet that should be
+- * permitted/denied/rerouted to the address in the rule's nexthop.
+- * This table sets outport to the correct out_port,
+- * eth.src to the output port's MAC address,
+- * and REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
+- * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
+- * advances to the next table for ARP/ND resolution. */
+ * Priority 100 rules deliver packets to enabled logical ports.
+ * Priority 110 rules match multicast packets and update the source
+ * mac before delivering to enabled logical ports. IP multicast traffic
+ * bypasses S_ROUTER_IN_IP_ROUTING route lookups.
+ */
-+static void
+ static void
+-build_ingress_policy_flows_for_lrouter(
+- struct ovn_datapath *od, struct hmap *lflows,
+- struct hmap *ports)
+build_egress_delivery_flows_for_lrouter_port(
+ struct ovn_port *op, struct hmap *lflows,
+ struct ds *match, struct ds *actions)
-+{
+ {
+- if (od->nbr) {
+- /* This is a catch-all rule. It has the lowest priority (0)
+- * does a match-all("1") and pass-through (next) */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1", "next;");
+ if (op->nbrp) {
+ if (!lrport_is_enabled(op->nbrp)) {
+ /* Drop packets to disabled logical ports (since logical flow
+ * tables are default-drop). */
+ return;
+ }
-+
+
+- /* Convert routing policies to flows. */
+- for (int i = 0; i < od->nbr->n_policies; i++) {
+- const struct nbrec_logical_router_policy *rule
+- = od->nbr->policies[i];
+- build_routing_policy_flow(lflows, od, ports, rule, &rule->header_);
+ if (op->derived) {
+ /* No egress packets should be processed in the context of
+ * a chassisredirect port. The chassisredirect port should
@@ -15365,21 +16938,27 @@ index 5a3227568..e78a71728 100644
+ op->lrp_networks.ea_s);
+ ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 110,
+ ds_cstr(match), ds_cstr(actions));
-+ }
+ }
+
+ ds_clear(match);
+ ds_put_format(match, "outport == %s", op->json_key);
+ ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
+ ds_cstr(match), "output;");
-+ }
-+
-+}
+ }
+
-+static void
+ }
+
+-/* Local router ingress table ARP_RESOLVE: ARP Resolution. */
+ static void
+-build_arp_resolve_flows_for_lrouter(
+build_misc_local_traffic_drop_flows_for_lrouter(
-+ struct ovn_datapath *od, struct hmap *lflows)
-+{
-+ if (od->nbr) {
+ struct ovn_datapath *od, struct hmap *lflows)
+ {
+ if (od->nbr) {
+- /* Multicast packets already have the outport set so just advance to
+- * next table (priority 500). */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 500,
+- "ip4.mcast || ip6.mcast", "next;");
+ /* L3 admission control: drop multicast and broadcast source, localhost
+ * source or destination, and zero network source or destination
+ * (priority 100). */
@@ -15391,7 +16970,9 @@ index 5a3227568..e78a71728 100644
+ "ip4.src == 0.0.0.0/8 || "
+ "ip4.dst == 0.0.0.0/8",
+ "drop;");
-+
+
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
+- "get_arp(outport, " REG_NEXT_HOP_IPV4 "); next;");
+ /* Drop ARP packets (priority 85). ARP request packets for router's own
+ * IPs are handled with priority-90 flows.
+ * Drop IPv6 ND packets (priority 85). ND NA packets for router's own
@@ -15399,7 +16980,9 @@ index 5a3227568..e78a71728 100644
+ */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 85,
+ "arp || nd", "drop;");
-+
+
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
+- "get_nd(outport, " REG_NEXT_HOP_IPV6 "); next;");
+ /* Allow IPv6 multicast traffic that's supposed to reach the
+ * router pipeline (e.g., router solicitations).
+ */
@@ -15427,14 +17010,28 @@ index 5a3227568..e78a71728 100644
+ /* Pass other traffic not already handled to the next table for
+ * routing. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
-+ }
-+}
-+
-+static void
+ }
+ }
+
+-/* Local router ingress table ARP_RESOLVE: ARP Resolution.
+- *
+- * Any unicast packet that reaches this table is an IP packet whose
+- * next-hop IP address is in REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6
+- * (ip4.dst/ipv6.dst is the final destination).
+- * This table resolves the IP address in
+- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 into an output port in outport and
+- * an Ethernet address in eth.dst.
+- */
+ static void
+-build_arp_resolve_flows_for_lrouter_port(
+build_dhcpv6_reply_flows_for_lrouter_port(
-+ struct ovn_port *op, struct hmap *lflows,
+ struct ovn_port *op, struct hmap *lflows,
+- struct hmap *ports,
+- struct ds *match, struct ds *actions)
+ struct ds *match)
-+{
+ {
+- if (op->nbsp && !lsp_is_enabled(op->nbsp)) {
+- return;
+ if (op->nbrp && (!op->derived)) {
+ for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+ ds_clear(match);
@@ -15445,10 +17042,34 @@ index 5a3227568..e78a71728 100644
+ ds_cstr(match),
+ "reg0 = 0; handle_dhcpv6_reply;");
+ }
-+ }
-+
+ }
+
+- if (op->nbrp) {
+- /* This is a logical router port. If next-hop IP address in
+- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 matches IP address of this
+- * router port, then the packet is intended to eventually be sent
+- * to this logical port. Set the destination mac address using
+- * this port's mac address.
+- *
+- * The packet is still in peer's logical pipeline. So the match
+- * should be on peer's outport. */
+- if (op->peer && op->nbrp->peer) {
+- if (op->lrp_networks.n_ipv4_addrs) {
+- ds_clear(match);
+- ds_put_format(match, "outport == %s && "
+- REG_NEXT_HOP_IPV4 "== ",
+- op->peer->json_key);
+- op_put_v4_networks(match, op, false);
+}
-+
+
+- ds_clear(actions);
+- ds_put_format(actions, "eth.dst = %s; next;",
+- op->lrp_networks.ea_s);
+- ovn_lflow_add_with_hint(lflows, op->peer->od,
+- S_ROUTER_IN_ARP_RESOLVE, 100,
+- ds_cstr(match), ds_cstr(actions),
+- &op->nbrp->header_);
+- }
+static void
+build_ipv6_input_flows_for_lrouter_port(
+ struct ovn_port *op, struct hmap *lflows,
@@ -15464,7 +17085,13 @@ index 5a3227568..e78a71728 100644
+ ds_put_cstr(match, "ip6.dst == ");
+ op_put_v6_networks(match, op);
+ ds_put_cstr(match, " && icmp6.type == 128 && icmp6.code == 0");
-+
+
+- if (op->lrp_networks.n_ipv6_addrs) {
+- ds_clear(match);
+- ds_put_format(match, "outport == %s && "
+- REG_NEXT_HOP_IPV6 " == ",
+- op->peer->json_key);
+- op_put_v6_networks(match, op);
+ const char *lrp_actions =
+ "ip6.dst <-> ip6.src; "
+ "ip.ttl = 255; "
@@ -15475,7 +17102,14 @@ index 5a3227568..e78a71728 100644
+ ds_cstr(match), lrp_actions,
+ &op->nbrp->header_);
+ }
-+
+
+- ds_clear(actions);
+- ds_put_format(actions, "eth.dst = %s; next;",
+- op->lrp_networks.ea_s);
+- ovn_lflow_add_with_hint(lflows, op->peer->od,
+- S_ROUTER_IN_ARP_RESOLVE, 100,
+- ds_cstr(match), ds_cstr(actions),
+- &op->nbrp->header_);
+ /* ND reply. These flows reply to ND solicitations for the
+ * router's own IP address. */
+ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
@@ -15490,24 +17124,39 @@ index 5a3227568..e78a71728 100644
+ ds_put_format(match, "is_chassis_resident(%s)",
+ op->od->l3redirect_port->json_key);
}
-
-- if (op->lrp_networks.n_ipv6_addrs) {
++
+ build_lrouter_nd_flow(op->od, op, "nd_na_router",
+ op->lrp_networks.ipv6_addrs[i].addr_s,
+ op->lrp_networks.ipv6_addrs[i].sn_addr_s,
+ REG_INPORT_ETH_ADDR, match, false, 90,
+ &op->nbrp->header_, lflows);
-+ }
-+
+ }
+
+- if (!op->derived && op->od->l3redirect_port) {
+- const char *redirect_type = smap_get(&op->nbrp->options,
+- "redirect-type");
+- if (redirect_type && !strcasecmp(redirect_type, "bridged")) {
+- /* Packet is on a non gateway chassis and
+- * has an unresolved ARP on a network behind gateway
+- * chassis attached router port. Since, redirect type
+- * is "bridged", instead of calling "get_arp"
+- * on this node, we will redirect the packet to gateway
+- * chassis, by setting destination mac router port mac.*/
+ /* UDP/TCP/SCTP port unreachable */
+ if (!smap_get(&op->od->nbr->options, "chassis")
+ && !op->od->l3dgw_port) {
+ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
ds_clear(match);
- ds_put_format(match, "outport == %s && "
-- REG_NEXT_HOP_IPV6 " == ",
-- op->peer->json_key);
-- op_put_v6_networks(match, op);
+- "!is_chassis_resident(%s)", op->json_key,
+- op->od->l3redirect_port->json_key);
+- ds_clear(actions);
+- ds_put_format(actions, "eth.dst = %s; next;",
+- op->lrp_networks.ea_s);
+-
+- ovn_lflow_add_with_hint(lflows, op->od,
+- S_ROUTER_IN_ARP_RESOLVE, 50,
+- ds_cstr(match), ds_cstr(actions),
+ ds_put_format(match,
+ "ip6 && ip6.dst == %s && !ip.later_frag && tcp",
+ op->lrp_networks.ipv6_addrs[i].addr_s);
@@ -15517,14 +17166,25 @@ index 5a3227568..e78a71728 100644
+ "next; };";
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+ 80, ds_cstr(match), action,
-+ &op->nbrp->header_);
+ &op->nbrp->header_);
+- }
+- }
-- ds_clear(actions);
-- ds_put_format(actions, "eth.dst = %s; next;",
-- op->lrp_networks.ea_s);
-- ovn_lflow_add_with_hint(lflows, op->peer->od,
-- S_ROUTER_IN_ARP_RESOLVE, 100,
-- ds_cstr(match), ds_cstr(actions),
+- /* Drop IP traffic destined to router owned IPs. Part of it is dropped
+- * in stage "lr_in_ip_input" but traffic that could have been unSNATed
+- * but didn't match any existing session might still end up here.
+- *
+- * Priority 1.
+- */
+- build_lrouter_drop_own_dest(op, S_ROUTER_IN_ARP_RESOLVE, 1, true,
+- lflows);
+- } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp)
+- && strcmp(op->nbsp->type, "virtual")) {
+- /* This is a logical switch port that backs a VM or a container.
+- * Extract its addresses. For each of the address, go through all
+- * the router ports attached to the switch (to which this port
+- * connects) and if the address in question is reachable from the
+- * router port, add an ARP/ND entry in that router's pipeline. */
+ ds_clear(match);
+ ds_put_format(match,
+ "ip6 && ip6.dst == %s && !ip.later_frag && sctp",
@@ -15535,27 +17195,23 @@ index 5a3227568..e78a71728 100644
+ "next; };";
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+ 80, ds_cstr(match), action,
- &op->nbrp->header_);
-- }
-- }
++ &op->nbrp->header_);
-- if (!op->derived && op->od->l3redirect_port) {
-- const char *redirect_type = smap_get(&op->nbrp->options,
-- "redirect-type");
-- if (redirect_type && !strcasecmp(redirect_type, "bridged")) {
-- /* Packet is on a non gateway chassis and
-- * has an unresolved ARP on a network behind gateway
-- * chassis attached router port. Since, redirect type
-- * is "bridged", instead of calling "get_arp"
-- * on this node, we will redirect the packet to gateway
-- * chassis, by setting destination mac router port mac.*/
- ds_clear(match);
-- ds_put_format(match, "outport == %s && "
-- "!is_chassis_resident(%s)", op->json_key,
-- op->od->l3redirect_port->json_key);
-- ds_clear(actions);
-- ds_put_format(actions, "eth.dst = %s; next;",
-- op->lrp_networks.ea_s);
+- for (size_t i = 0; i < op->n_lsp_addrs; i++) {
+- const char *ea_s = op->lsp_addrs[i].ea_s;
+- for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
+- const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
+- for (size_t k = 0; k < op->od->n_router_ports; k++) {
+- /* Get the Logical_Router_Port that the
+- * Logical_Switch_Port is connected to, as
+- * 'peer'. */
+- const char *peer_name = smap_get(
+- &op->od->router_ports[k]->nbsp->options,
+- "router-port");
+- if (!peer_name) {
+- continue;
+- }
++ ds_clear(match);
+ ds_put_format(match,
+ "ip6 && ip6.dst == %s && !ip.later_frag && udp",
+ op->lrp_networks.ipv6_addrs[i].addr_s);
@@ -15570,9 +17226,10 @@ index 5a3227568..e78a71728 100644
+ 80, ds_cstr(match), action,
+ &op->nbrp->header_);
-- ovn_lflow_add_with_hint(lflows, op->od,
-- S_ROUTER_IN_ARP_RESOLVE, 50,
-- ds_cstr(match), ds_cstr(actions),
+- struct ovn_port *peer = ovn_port_find(ports, peer_name);
+- if (!peer || !peer->nbrp) {
+- continue;
+- }
+ ds_clear(match);
+ ds_put_format(match,
+ "ip6 && ip6.dst == %s && !ip.later_frag",
@@ -15586,25 +17243,13 @@ index 5a3227568..e78a71728 100644
+ "next; };";
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+ 70, ds_cstr(match), action,
- &op->nbrp->header_);
- }
- }
++ &op->nbrp->header_);
++ }
++ }
-- /* Drop IP traffic destined to router owned IPs. Part of it is dropped
-- * in stage "lr_in_ip_input" but traffic that could have been unSNATed
-- * but didn't match any existing session might still end up here.
-- *
-- * Priority 1.
-- */
-- build_lrouter_drop_own_dest(op, S_ROUTER_IN_ARP_RESOLVE, 1, true,
-- lflows);
-- } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp)
-- && strcmp(op->nbsp->type, "virtual")) {
-- /* This is a logical switch port that backs a VM or a container.
-- * Extract its addresses. For each of the address, go through all
-- * the router ports attached to the switch (to which this port
-- * connects) and if the address in question is reachable from the
-- * router port, add an ARP/ND entry in that router's pipeline. */
+- if (!find_lrp_member_ip(peer, ip_s)) {
+- continue;
+- }
+ /* ICMPv6 time exceeded */
+ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+ /* skip link-local address */
@@ -15612,23 +17257,22 @@ index 5a3227568..e78a71728 100644
+ continue;
+ }
-- for (size_t i = 0; i < op->n_lsp_addrs; i++) {
-- const char *ea_s = op->lsp_addrs[i].ea_s;
-- for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
-- const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
-- for (size_t k = 0; k < op->od->n_router_ports; k++) {
-- /* Get the Logical_Router_Port that the
-- * Logical_Switch_Port is connected to, as
-- * 'peer'. */
-- const char *peer_name = smap_get(
-- &op->od->router_ports[k]->nbsp->options,
-- "router-port");
-- if (!peer_name) {
-- continue;
-- }
+- ds_clear(match);
+- ds_put_format(match, "outport == %s && "
+- REG_NEXT_HOP_IPV4 " == %s",
+- peer->json_key, ip_s);
+ ds_clear(match);
+ ds_clear(actions);
-+
+
+- ds_clear(actions);
+- ds_put_format(actions, "eth.dst = %s; next;", ea_s);
+- ovn_lflow_add_with_hint(lflows, peer->od,
+- S_ROUTER_IN_ARP_RESOLVE, 100,
+- ds_cstr(match),
+- ds_cstr(actions),
+- &op->nbsp->header_);
+- }
+- }
+ ds_put_format(match,
+ "inport == %s && ip6 && "
+ "ip6.src == %s/%d && "
@@ -15652,13 +17296,22 @@ index 5a3227568..e78a71728 100644
+ }
+ }
-- struct ovn_port *peer = ovn_port_find(ports, peer_name);
-- if (!peer || !peer->nbrp) {
+- for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
+- const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
+- for (size_t k = 0; k < op->od->n_router_ports; k++) {
+- /* Get the Logical_Router_Port that the
+- * Logical_Switch_Port is connected to, as
+- * 'peer'. */
+- const char *peer_name = smap_get(
+- &op->od->router_ports[k]->nbsp->options,
+- "router-port");
+- if (!peer_name) {
- continue;
- }
+}
-- if (!find_lrp_member_ip(peer, ip_s)) {
+- struct ovn_port *peer = ovn_port_find(ports, peer_name);
+- if (!peer || !peer->nbrp) {
- continue;
- }
+static void
@@ -15667,10 +17320,9 @@ index 5a3227568..e78a71728 100644
+{
+ if (od->nbr) {
-- ds_clear(match);
-- ds_put_format(match, "outport == %s && "
-- REG_NEXT_HOP_IPV4 " == %s",
-- peer->json_key, ip_s);
+- if (!find_lrp_member_ip(peer, ip_s)) {
+- continue;
+- }
+ /* Priority-90-92 flows handle ARP requests and ND packets. Most are
+ * per logical port but DNAT addresses can be handled per datapath
+ * for non gateway router ports.
@@ -15682,57 +17334,14 @@ index 5a3227568..e78a71728 100644
+ for (int i = 0; i < od->nbr->n_nat; i++) {
+ struct ovn_nat *nat_entry = &od->nat_entries[i];
-- ds_clear(actions);
-- ds_put_format(actions, "eth.dst = %s; next;", ea_s);
-- ovn_lflow_add_with_hint(lflows, peer->od,
-- S_ROUTER_IN_ARP_RESOLVE, 100,
-- ds_cstr(match),
-- ds_cstr(actions),
-- &op->nbsp->header_);
-- }
-+ /* Skip entries we failed to parse. */
-+ if (!nat_entry_is_valid(nat_entry)) {
-+ continue;
- }
-
-- for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
-- const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
-- for (size_t k = 0; k < op->od->n_router_ports; k++) {
-- /* Get the Logical_Router_Port that the
-- * Logical_Switch_Port is connected to, as
-- * 'peer'. */
-- const char *peer_name = smap_get(
-- &op->od->router_ports[k]->nbsp->options,
-- "router-port");
-- if (!peer_name) {
-- continue;
-- }
--
-- struct ovn_port *peer = ovn_port_find(ports, peer_name);
-- if (!peer || !peer->nbrp) {
-- continue;
-- }
--
-- if (!find_lrp_member_ip(peer, ip_s)) {
-- continue;
-- }
-+ /* Skip SNAT entries for now, we handle unique SNAT IPs separately
-+ * below.
-+ */
-+ if (!strcmp(nat_entry->nb->type, "snat")) {
-+ continue;
-+ }
-+ build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
-+ }
-
- ds_clear(match);
- ds_put_format(match, "outport == %s && "
- REG_NEXT_HOP_IPV6 " == %s",
- peer->json_key, ip_s);
-+ /* Now handle SNAT entries too, one per unique SNAT IP. */
-+ struct shash_node *snat_snode;
-+ SHASH_FOR_EACH (snat_snode, &od->snat_ips) {
-+ struct ovn_snat_ip *snat_ip = snat_snode->data;
++ /* Skip entries we failed to parse. */
++ if (!nat_entry_is_valid(nat_entry)) {
++ continue;
++ }
- ds_clear(actions);
- ds_put_format(actions, "eth.dst = %s; next;", ea_s);
@@ -15742,10 +17351,14 @@ index 5a3227568..e78a71728 100644
- ds_cstr(actions),
- &op->nbsp->header_);
- }
-+ if (ovs_list_is_empty(&snat_ip->snat_entries)) {
++ /* Skip SNAT entries for now, we handle unique SNAT IPs separately
++ * below.
++ */
++ if (!strcmp(nat_entry->nb->type, "snat")) {
+ continue;
}
-- }
++ build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
+ }
- } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp)
- && !strcmp(op->nbsp->type, "virtual")) {
- /* This is a virtual port. Add ARP replies for the virtual ip with
@@ -15764,13 +17377,11 @@ index 5a3227568..e78a71728 100644
- if (!vip || !virtual_parents ||
- !ip_parse(vip, &ip) || !op->sb) {
- return;
-+ struct ovn_nat *nat_entry =
-+ CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
-+ struct ovn_nat, ext_addr_list_node);
-+ build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
- }
-+ }
-+}
+- }
++ /* Now handle SNAT entries too, one per unique SNAT IP. */
++ struct shash_node *snat_snode;
++ SHASH_FOR_EACH (snat_snode, &od->snat_ips) {
++ struct ovn_snat_ip *snat_ip = snat_snode->data;
- if (!op->sb->virtual_parent || !op->sb->virtual_parent[0] ||
- !op->sb->chassis) {
@@ -15782,11 +17393,27 @@ index 5a3227568..e78a71728 100644
- if (!peer_name) {
- continue;
- }
--
++ if (ovs_list_is_empty(&snat_ip->snat_entries)) {
++ continue;
++ }
+
- struct ovn_port *peer = ovn_port_find(ports, peer_name);
- if (!peer || !peer->nbrp) {
- continue;
- }
++ struct ovn_nat *nat_entry =
++ CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
++ struct ovn_nat, ext_addr_list_node);
++ build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
++ }
++ }
++}
+
+- if (find_lrp_member_ip(peer, vip)) {
+- ds_clear(match);
+- ds_put_format(match, "outport == %s && "
+- REG_NEXT_HOP_IPV4 " == %s",
+- peer->json_key, vip);
+/* Logical router ingress table 3: IP Input for IPv4. */
+static void
+build_lrouter_ipv4_ip_input(struct ovn_port *op,
@@ -15808,21 +17435,6 @@ index 5a3227568..e78a71728 100644
+ ds_cstr(match), "drop;",
+ &op->nbrp->header_);
-- if (find_lrp_member_ip(peer, vip)) {
-- ds_clear(match);
-- ds_put_format(match, "outport == %s && "
-- REG_NEXT_HOP_IPV4 " == %s",
-- peer->json_key, vip);
-+ /* ICMP echo reply. These flows reply to ICMP echo requests
-+ * received for the router's IP address. Since packets only
-+ * get here as part of the logical router datapath, the inport
-+ * (i.e. the incoming locally attached net) does not matter.
-+ * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
-+ ds_clear(match);
-+ ds_put_cstr(match, "ip4.dst == ");
-+ op_put_v4_networks(match, op, false);
-+ ds_put_cstr(match, " && icmp4.type == 8 && icmp4.code == 0");
-
- const char *arp_actions =
- "eth.dst = 00:00:00:00:00:00; next;";
- ovn_lflow_add_with_hint(lflows, peer->od,
@@ -15839,15 +17451,15 @@ index 5a3227568..e78a71728 100644
- if (!vp || !vp->nbsp) {
- return;
- }
-+ const char * icmp_actions = "ip4.dst <-> ip4.src; "
-+ "ip.ttl = 255; "
-+ "icmp4.type = 0; "
-+ "flags.loopback = 1; "
-+ "next; ";
-+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
-+ ds_cstr(match), icmp_actions,
-+ &op->nbrp->header_);
-+ }
++ /* ICMP echo reply. These flows reply to ICMP echo requests
++ * received for the router's IP address. Since packets only
++ * get here as part of the logical router datapath, the inport
++ * (i.e. the incoming locally attached net) does not matter.
++ * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
++ ds_clear(match);
++ ds_put_cstr(match, "ip4.dst == ");
++ op_put_v4_networks(match, op, false);
++ ds_put_cstr(match, " && icmp4.type == 8 && icmp4.code == 0");
- for (size_t i = 0; i < vp->n_lsp_addrs; i++) {
- bool found_vip_network = false;
@@ -15862,22 +17474,36 @@ index 5a3227568..e78a71728 100644
- if (!peer_name) {
- continue;
- }
-+ /* BFD msg handling */
-+ build_lrouter_bfd_flows(lflows, op);
++ const char * icmp_actions = "ip4.dst <-> ip4.src; "
++ "ip.ttl = 255; "
++ "icmp4.type = 0; "
++ "flags.loopback = 1; "
++ "next; ";
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
++ ds_cstr(match), icmp_actions,
++ &op->nbrp->header_);
++ }
- struct ovn_port *peer =
- ovn_port_find(ports, peer_name);
- if (!peer || !peer->nbrp) {
- continue;
- }
++ /* BFD msg handling */
++ build_lrouter_bfd_flows(lflows, op);
+
+- if (!find_lrp_member_ip(peer, vip)) {
+- continue;
+- }
+ /* ICMP time exceeded */
+ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+ ds_clear(match);
+ ds_clear(actions);
-- if (!find_lrp_member_ip(peer, vip)) {
-- continue;
-- }
+- ds_clear(match);
+- ds_put_format(match, "outport == %s && "
+- REG_NEXT_HOP_IPV4 " == %s",
+- peer->json_key, vip);
+ ds_put_format(match,
+ "inport == %s && ip4 && "
+ "ip.ttl == {0, 1} && !ip.later_frag", op->json_key);
@@ -15896,18 +17522,6 @@ index 5a3227568..e78a71728 100644
+ &op->nbrp->header_);
+ }
-- ds_clear(match);
-- ds_put_format(match, "outport == %s && "
-- REG_NEXT_HOP_IPV4 " == %s",
-- peer->json_key, vip);
-+ /* ARP reply. These flows reply to ARP requests for the router's own
-+ * IP address. */
-+ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-+ ds_clear(match);
-+ ds_put_format(match, "arp.spa == %s/%u",
-+ op->lrp_networks.ipv4_addrs[i].network_s,
-+ op->lrp_networks.ipv4_addrs[i].plen);
-
- ds_clear(actions);
- ds_put_format(actions, "eth.dst = %s; next;", ea_s);
- ovn_lflow_add_with_hint(lflows, peer->od,
@@ -15917,6 +17531,14 @@ index 5a3227568..e78a71728 100644
- &op->nbsp->header_);
- found_vip_network = true;
- break;
++ /* ARP reply. These flows reply to ARP requests for the router's own
++ * IP address. */
++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
++ ds_clear(match);
++ ds_put_format(match, "arp.spa == %s/%u",
++ op->lrp_networks.ipv4_addrs[i].network_s,
++ op->lrp_networks.ipv4_addrs[i].plen);
++
+ if (op->od->l3dgw_port && op->od->l3redirect_port && op->peer
+ && op->peer->od->n_localnet_ports) {
+ bool add_chassis_resident_check = false;
@@ -15961,39 +17583,34 @@ index 5a3227568..e78a71728 100644
- * we need to add logical flows such that it can resolve
- * ARP entries for all the other router ports connected to
- * the switch in question. */
-+ /* A set to hold all load-balancer vips that need ARP responses. */
-+ struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
-+ struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
-+ get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6);
-
-- const char *peer_name = smap_get(&op->nbsp->options,
-- "router-port");
-- if (!peer_name) {
-- return;
-- }
+ const char *ip_address;
-+ if (sset_count(&all_ips_v4)) {
++ if (sset_count(&op->od->lb_ips_v4)) {
+ ds_clear(match);
+ if (op == op->od->l3dgw_port) {
+ ds_put_format(match, "is_chassis_resident(%s)",
+ op->od->l3redirect_port->json_key);
+ }
+- const char *peer_name = smap_get(&op->nbsp->options,
+- "router-port");
+- if (!peer_name) {
+- return;
+- }
++ struct ds load_balancer_ips_v4 = DS_EMPTY_INITIALIZER;
+
- struct ovn_port *peer = ovn_port_find(ports, peer_name);
- if (!peer || !peer->nbrp) {
- return;
- }
-+ struct ds load_balancer_ips_v4 = DS_EMPTY_INITIALIZER;
++ /* For IPv4 we can just create one rule with all required IPs. */
++ ds_put_cstr(&load_balancer_ips_v4, "{ ");
++ ds_put_and_free_cstr(&load_balancer_ips_v4,
++ sset_join(&op->od->lb_ips_v4, ", ", " }"));
- if (peer->od->nbr &&
- smap_get_bool(&peer->od->nbr->options,
- "dynamic_neigh_routers", false)) {
- return;
-+ /* For IPv4 we can just create one rule with all required IPs. */
-+ ds_put_cstr(&load_balancer_ips_v4, "{ ");
-+ ds_put_and_free_cstr(&load_balancer_ips_v4,
-+ sset_join(&all_ips_v4, ", ", " }"));
-+
+ build_lrouter_arp_flow(op->od, op, ds_cstr(&load_balancer_ips_v4),
+ REG_INPORT_ETH_ADDR,
+ match, false, 90, NULL, lflows);
@@ -16008,7 +17625,7 @@ index 5a3227568..e78a71728 100644
- router_port_name);
- if (!router_port || !router_port->nbrp) {
- continue;
-+ SSET_FOR_EACH (ip_address, &all_ips_v6) {
++ SSET_FOR_EACH (ip_address, &op->od->lb_ips_v6) {
+ ds_clear(match);
+ if (op == op->od->l3dgw_port) {
+ ds_put_format(match, "is_chassis_resident(%s)",
@@ -16025,9 +17642,6 @@ index 5a3227568..e78a71728 100644
+ }
- if (router_port->lrp_networks.n_ipv4_addrs) {
-+ sset_destroy(&all_ips_v4);
-+ sset_destroy(&all_ips_v6);
-+
+ if (!smap_get(&op->od->nbr->options, "chassis")
+ && !op->od->l3dgw_port) {
+ /* UDP/TCP/SCTP port unreachable. */
@@ -16669,7 +18283,11 @@ index 5a3227568..e78a71728 100644
+ is_v6 ? "6" : "4", nat->external_ip);
+ } else {
+ ds_put_format(actions, "ct_snat(%s", nat->external_ip);
-+
+
+- /* Allow other multicast if relay enabled (priority 82). */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 82,
+- "ip4.mcast || ip6.mcast",
+- od->mcast_info.rtr.relay ? "next;" : "drop;");
+ if (nat->external_port_range[0]) {
+ ds_put_format(actions, ",%s",
+ nat->external_port_range);
@@ -16700,27 +18318,26 @@ index 5a3227568..e78a71728 100644
+ }
+ ds_clear(actions);
-- /* Allow other multicast if relay enabled (priority 82). */
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 82,
-- "ip4.mcast || ip6.mcast",
-- od->mcast_info.rtr.relay ? "next;" : "drop;");
+- /* Drop Ethernet local broadcast. By definition this traffic should
+- * not be forwarded.*/
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
+- "eth.bcast", "drop;");
+ if (nat->allowed_ext_ips || nat->exempted_ext_ips) {
+ lrouter_nat_add_ext_ip_match(od, lflows, match, nat,
+ is_v6, false, mask);
+ }
-- /* Drop Ethernet local broadcast. By definition this traffic should
-- * not be forwarded.*/
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
-- "eth.bcast", "drop;");
+- /* TTL discard */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
+- "ip4 && ip.ttl == {0, 1}", "drop;");
+ if (distributed) {
+ ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ",
+ ETH_ADDR_ARGS(mac));
+ }
-- /* TTL discard */
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
-- "ip4 && ip.ttl == {0, 1}", "drop;");
+- /* Pass other traffic not already handled to the next table for
+- * routing. */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+ ds_put_format(actions, "ip%s.src=%s; next;",
+ is_v6 ? "6" : "4", nat->external_ip);
@@ -16732,10 +18349,7 @@ index 5a3227568..e78a71728 100644
+ }
+ ds_put_format(actions, ");");
+ }
-
-- /* Pass other traffic not already handled to the next table for
-- * routing. */
-- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
++
+ /* The priority here is calculated such that the
+ * nat->logical_ip with the longest mask gets a higher
+ * priority. */
@@ -16966,6 +18580,7 @@ index 5a3227568..e78a71728 100644
+static void
+build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od,
+ struct hmap *lflows,
++ struct hmap *ports,
+ struct shash *meter_groups,
+ struct hmap *lbs,
+ struct ds *match, struct ds *actions)
@@ -17101,10 +18716,21 @@ index 5a3227568..e78a71728 100644
- op->json_key,
- op->lrp_networks.ipv6_addrs[i].network_s,
- op->lrp_networks.ipv6_addrs[i].plen);
-+ "ip%s.src == %s && outport == %s && "
-+ "is_chassis_resident(\"%s\")",
++ "ip%s.src == %s && outport == %s",
+ is_v6 ? "6" : "4", nat->logical_ip,
-+ od->l3dgw_port->json_key, nat->logical_port);
++ od->l3dgw_port->json_key);
++ /* Add a rule to drop traffic from a distributed NAT if
++ * the virtual port has not claimed yet becaused otherwise
++ * the traffic will be centralized misconfiguring the TOR switch.
++ */
++ struct ovn_port *op = ovn_port_find(ports, nat->logical_port);
++ if (op && op->nbsp && !strcmp(op->nbsp->type, "virtual")) {
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT,
++ 80, ds_cstr(match), "drop;",
++ &nat->header_);
++ }
++ ds_put_format(match, " && is_chassis_resident(\"%s\")",
++ nat->logical_port);
+ ds_put_format(actions, "eth.src = %s; %s = %s; next;",
+ nat->external_mac,
+ is_v6 ? REG_SRC_IPV6 : REG_SRC_IPV4,
@@ -17137,7 +18763,9 @@ index 5a3227568..e78a71728 100644
+ ds_clear(actions);
ds_put_format(actions,
- "icmp6 {"
-- "eth.dst <-> eth.src; "
++ "clone { ct_clear; "
++ "inport = outport; outport = \"\"; "
+ "eth.dst <-> eth.src; "
- "ip6.dst = ip6.src; "
- "ip6.src = %s; "
- "ip.ttl = 255; "
@@ -17146,8 +18774,6 @@ index 5a3227568..e78a71728 100644
- "next; };",
- op->lrp_networks.ipv6_addrs[i].addr_s);
- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
-+ "clone { ct_clear; "
-+ "inport = outport; outport = \"\"; "
+ "flags = 0; flags.loopback = 1; ");
+ for (int j = 0; j < MFF_N_LOG_REGS; j++) {
+ ds_put_format(actions, "reg%d = 0; ", j);
@@ -17218,7 +18844,7 @@ index 5a3227568..e78a71728 100644
struct lswitch_flow_build_info {
struct hmap *datapaths;
struct hmap *ports;
-@@ -11177,7 +12002,8 @@ struct lswitch_flow_build_info {
+@@ -11177,7 +12220,8 @@ struct lswitch_flow_build_info {
static void
build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od,
@@ -17228,7 +18854,7 @@ index 5a3227568..e78a71728 100644
{
/* Build Logical Switch Flows. */
build_lswitch_lflows_pre_acl_and_acl(od, lsi->port_groups, lsi->lflows,
-@@ -11186,13 +12012,20 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od,
+@@ -11186,13 +12230,20 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od,
build_fwd_group_lflows(od, lsi->lflows);
build_lswitch_lflows_admission_control(od, lsi->lflows);
build_lswitch_input_port_sec_od(od, lsi->lflows);
@@ -17250,17 +18876,18 @@ index 5a3227568..e78a71728 100644
build_mcast_lookup_flows_for_lrouter(od, lsi->lflows, &lsi->match,
&lsi->actions);
build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->ports);
-@@ -11204,6 +12037,9 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od,
+@@ -11204,6 +12255,10 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od,
build_arp_request_flows_for_lrouter(od, lsi->lflows, &lsi->match,
&lsi->actions);
build_misc_local_traffic_drop_flows_for_lrouter(od, lsi->lflows);
+ build_lrouter_arp_nd_for_datapath(od, lsi->lflows);
-+ build_lrouter_nat_defrag_and_lb(od, lsi->lflows, lsi->meter_groups,
-+ lsi->lbs, &lsi->match, &lsi->actions);
++ build_lrouter_nat_defrag_and_lb(od, lsi->lflows, lsi->ports,
++ lsi->meter_groups, lsi->lbs, &lsi->match,
++ &lsi->actions);
}
/* Helper function to combine all lflow generation which is iterated by port.
-@@ -11216,6 +12052,20 @@ build_lswitch_and_lrouter_iterate_by_op(struct ovn_port *op,
+@@ -11216,13 +12271,27 @@ build_lswitch_and_lrouter_iterate_by_op(struct ovn_port *op,
/* Build Logical Switch Flows. */
build_lswitch_input_port_sec_op(op, lsi->lflows, &lsi->actions,
&lsi->match);
@@ -17281,7 +18908,15 @@ index 5a3227568..e78a71728 100644
/* Build Logical Router Flows. */
build_adm_ctrl_flows_for_lrouter_port(op, lsi->lflows, &lsi->match,
-@@ -11232,6 +12082,10 @@ build_lswitch_and_lrouter_iterate_by_op(struct ovn_port *op,
+ &lsi->actions);
+ build_neigh_learning_flows_for_lrouter_port(op, lsi->lflows, &lsi->match,
+ &lsi->actions);
+- build_ip_routing_flows_for_lrouter_port(op, lsi->lflows);
++ build_ip_routing_flows_for_lrouter_port(op, lsi->ports, lsi->lflows);
+ build_ND_RA_flows_for_lrouter_port(op, lsi->lflows, &lsi->match,
+ &lsi->actions);
+ build_arp_resolve_flows_for_lrouter_port(op, lsi->lflows, lsi->ports,
+@@ -11232,6 +12301,10 @@ build_lswitch_and_lrouter_iterate_by_op(struct ovn_port *op,
build_dhcpv6_reply_flows_for_lrouter_port(op, lsi->lflows, &lsi->match);
build_ipv6_input_flows_for_lrouter_port(op, lsi->lflows,
&lsi->match, &lsi->actions);
@@ -17292,7 +18927,7 @@ index 5a3227568..e78a71728 100644
}
static void
-@@ -11239,10 +12093,13 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -11239,10 +12312,13 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
struct hmap *port_groups, struct hmap *lflows,
struct hmap *mcgroups,
struct hmap *igmp_groups,
@@ -17307,7 +18942,7 @@ index 5a3227568..e78a71728 100644
char *svc_check_match = xasprintf("eth.dst == %s", svc_monitor_mac);
-@@ -11264,22 +12121,28 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
+@@ -11264,22 +12340,28 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
* will move here and will be reogranized by iterator type.
*/
HMAP_FOR_EACH (od, key_node, datapaths) {
@@ -17343,7 +18978,7 @@ index 5a3227568..e78a71728 100644
}
struct ovn_dp_group {
-@@ -11356,13 +12219,14 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
+@@ -11356,13 +12438,14 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
struct hmap *ports, struct hmap *port_groups,
struct hmap *mcgroups, struct hmap *igmp_groups,
struct shash *meter_groups,
@@ -17360,7 +18995,7 @@ index 5a3227568..e78a71728 100644
/* Collecting all unique datapath groups. */
struct hmap dp_groups = HMAP_INITIALIZER(&dp_groups);
-@@ -11801,17 +12665,20 @@ static void
+@@ -11801,17 +12884,20 @@ static void
sync_meters_iterate_nb_meter(struct northd_context *ctx,
const char *meter_name,
const struct nbrec_meter *nb_meter,
@@ -17384,7 +19019,7 @@ index 5a3227568..e78a71728 100644
if (new_sb_meter || bands_need_update(nb_meter, sb_meter)) {
struct sbrec_meter_band **sb_bands;
-@@ -11833,6 +12700,24 @@ sync_meters_iterate_nb_meter(struct northd_context *ctx,
+@@ -11833,6 +12919,24 @@ sync_meters_iterate_nb_meter(struct northd_context *ctx,
sbrec_meter_set_unit(sb_meter, nb_meter->unit);
}
@@ -17409,7 +19044,7 @@ index 5a3227568..e78a71728 100644
/* Each entry in the Meter and Meter_Band tables in OVN_Northbound have
* a corresponding entries in the Meter and Meter_Band tables in
* OVN_Southbound. Additionally, ACL logs that use fair meters have
-@@ -11840,9 +12725,10 @@ sync_meters_iterate_nb_meter(struct northd_context *ctx,
+@@ -11840,9 +12944,10 @@ sync_meters_iterate_nb_meter(struct northd_context *ctx,
*/
static void
sync_meters(struct northd_context *ctx, struct hmap *datapaths,
@@ -17421,7 +19056,7 @@ index 5a3227568..e78a71728 100644
const struct sbrec_meter *sb_meter;
SBREC_METER_FOR_EACH (sb_meter, ctx->ovnsb_idl) {
-@@ -11852,7 +12738,7 @@ sync_meters(struct northd_context *ctx, struct hmap *datapaths,
+@@ -11852,7 +12957,7 @@ sync_meters(struct northd_context *ctx, struct hmap *datapaths,
const struct nbrec_meter *nb_meter;
NBREC_METER_FOR_EACH (nb_meter, ctx->ovnnb_idl) {
sync_meters_iterate_nb_meter(ctx, nb_meter->name, nb_meter,
@@ -17430,7 +19065,7 @@ index 5a3227568..e78a71728 100644
}
/*
-@@ -11866,19 +12752,28 @@ sync_meters(struct northd_context *ctx, struct hmap *datapaths,
+@@ -11866,19 +12971,28 @@ sync_meters(struct northd_context *ctx, struct hmap *datapaths,
continue;
}
for (size_t i = 0; i < od->nbs->n_acls; i++) {
@@ -17468,7 +19103,7 @@ index 5a3227568..e78a71728 100644
struct shash_node *node, *next;
SHASH_FOR_EACH_SAFE (node, next, &sb_meters) {
sbrec_meter_delete(node->data);
-@@ -12274,6 +13169,7 @@ ovnnb_db_run(struct northd_context *ctx,
+@@ -12274,6 +13388,7 @@ ovnnb_db_run(struct northd_context *ctx,
struct hmap igmp_groups;
struct shash meter_groups = SHASH_INITIALIZER(&meter_groups);
struct hmap lbs;
@@ -17476,7 +19111,7 @@ index 5a3227568..e78a71728 100644
/* Sync ipsec configuration.
* Copy nb_cfg from northbound to southbound database.
-@@ -12354,6 +13250,10 @@ ovnnb_db_run(struct northd_context *ctx,
+@@ -12354,28 +13469,36 @@ ovnnb_db_run(struct northd_context *ctx,
use_logical_dp_groups = smap_get_bool(&nb->options,
"use_logical_dp_groups", false);
@@ -17487,7 +19122,17 @@ index 5a3227568..e78a71728 100644
controller_event_en = smap_get_bool(&nb->options,
"controller_event", false);
check_lsp_is_up = !smap_get_bool(&nb->options,
-@@ -12368,14 +13268,16 @@ ovnnb_db_run(struct northd_context *ctx,
+ "ignore_lsp_down", false);
+
+ build_datapaths(ctx, datapaths, lr_list);
++ build_ovn_lbs(ctx, datapaths, &lbs);
++ build_lrouter_lbs(datapaths, &lbs);
+ build_ports(ctx, sbrec_chassis_by_name, datapaths, ports);
+- build_ovn_lbs(ctx, datapaths, ports, &lbs);
++ build_ovn_lb_svcs(ctx, ports, &lbs);
+ build_ipam(datapaths, ports);
+ build_port_group_lswitches(ctx, &port_groups, ports);
+ build_lrouter_groups(ports, lr_list);
build_ip_mcast(ctx, datapaths);
build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups);
build_meter_groups(ctx, &meter_groups);
@@ -17506,7 +19151,7 @@ index 5a3227568..e78a71728 100644
struct ovn_northd_lb *lb;
HMAP_FOR_EACH_POP (lb, hmap_node, &lbs) {
-@@ -12393,9 +13295,13 @@ ovnnb_db_run(struct northd_context *ctx,
+@@ -12393,9 +13516,13 @@ ovnnb_db_run(struct northd_context *ctx,
HMAP_FOR_EACH_SAFE (pg, next_pg, key_node, &port_groups) {
ovn_port_group_destroy(&port_groups, pg);
}
@@ -17520,7 +19165,7 @@ index 5a3227568..e78a71728 100644
struct shash_node *node, *next;
SHASH_FOR_EACH_SAFE (node, next, &meter_groups) {
-@@ -12542,7 +13448,17 @@ handle_port_binding_changes(struct northd_context *ctx, struct hmap *ports,
+@@ -12542,7 +13669,17 @@ handle_port_binding_changes(struct northd_context *ctx, struct hmap *ports,
continue;
}
@@ -17539,7 +19184,7 @@ index 5a3227568..e78a71728 100644
if (!op->nbsp->up || *op->nbsp->up != up) {
nbrec_logical_switch_port_set_up(op->nbsp, &up, 1);
}
-@@ -12690,7 +13606,7 @@ static const char *rbac_encap_update[] =
+@@ -12690,7 +13827,7 @@ static const char *rbac_encap_update[] =
static const char *rbac_port_binding_auth[] =
{""};
static const char *rbac_port_binding_update[] =
@@ -17548,7 +19193,7 @@ index 5a3227568..e78a71728 100644
static const char *rbac_mac_binding_auth[] =
{""};
-@@ -13176,6 +14092,8 @@ main(int argc, char *argv[])
+@@ -13176,6 +14313,8 @@ main(int argc, char *argv[])
&sbrec_port_binding_col_ha_chassis_group);
ovsdb_idl_add_column(ovnsb_idl_loop.idl,
&sbrec_port_binding_col_virtual_parent);
@@ -17557,7 +19202,7 @@ index 5a3227568..e78a71728 100644
ovsdb_idl_add_column(ovnsb_idl_loop.idl,
&sbrec_gateway_chassis_col_chassis);
ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_gateway_chassis_col_name);
-@@ -13324,9 +14242,25 @@ main(int argc, char *argv[])
+@@ -13324,9 +14463,25 @@ main(int argc, char *argv[])
add_column_noalert(ovnsb_idl_loop.idl, &sbrec_load_balancer_col_name);
add_column_noalert(ovnsb_idl_loop.idl, &sbrec_load_balancer_col_vips);
add_column_noalert(ovnsb_idl_loop.idl, &sbrec_load_balancer_col_protocol);
@@ -17583,7 +19228,7 @@ index 5a3227568..e78a71728 100644
struct ovsdb_idl_index *sbrec_chassis_by_name
= chassis_index_create(ovnsb_idl_loop.idl);
-@@ -13350,6 +14284,15 @@ main(int argc, char *argv[])
+@@ -13350,6 +14505,15 @@ main(int argc, char *argv[])
state.had_lock = false;
state.paused = false;
while (!exiting) {
@@ -17599,7 +19244,7 @@ index 5a3227568..e78a71728 100644
if (!state.paused) {
if (!ovsdb_idl_has_lock(ovnsb_idl_loop.idl) &&
!ovsdb_idl_is_lock_contended(ovnsb_idl_loop.idl))
-@@ -13421,6 +14364,7 @@ main(int argc, char *argv[])
+@@ -13421,6 +14585,7 @@ main(int argc, char *argv[])
unixctl_server_run(unixctl);
unixctl_server_wait(unixctl);
@@ -17607,7 +19252,7 @@ index 5a3227568..e78a71728 100644
if (exiting) {
poll_immediate_wake();
}
-@@ -13449,6 +14393,7 @@ main(int argc, char *argv[])
+@@ -13449,6 +14614,7 @@ main(int argc, char *argv[])
}
}
@@ -17693,7 +19338,7 @@ index 269e3a888..29019809c 100644
"isRoot": true}}
}
diff --git a/ovn-nb.xml b/ovn-nb.xml
-index c9ab25ceb..5f5c2cda0 100644
+index c9ab25ceb..b85a51868 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -226,6 +226,21 @@
@@ -17718,7 +19363,7 @@ index c9ab25ceb..5f5c2cda0 100644
These options control how routes are advertised between OVN -@@ -1635,6 +1650,30 @@ +@@ -1635,6 +1650,42 @@ See External IDs at the beginning of this document.
true
, then neighbor routers will have logical
++ flows added that will allow for routing to the VIP IP. It also will
++ have ARP resolution logical flows added. By setting this option, it
++ means there is no reason to create a
++ from neighbor routers to
++ this NAT address. It also means that no ARP request is required for
++ neighbor routers to learn the IP-MAC mapping for this VIP IP. For
++ more information about what flows are added for IP routes, please
++ see the ovn-northd
manpage section on IP Routing.
++