From 7699c1043a3fec9eb215fc430202ca01846c505e Mon Sep 17 00:00:00 2001
Message-Id: <7699c1043a3fec9eb215fc430202ca01846c505e.1610458802.git.lorenzo.bianconi@redhat.com>
In-Reply-To: <f21c1b7a467a691847b5552d4570af706fcc5bb0.1610458802.git.lorenzo.bianconi@redhat.com>
References: <f21c1b7a467a691847b5552d4570af706fcc5bb0.1610458802.git.lorenzo.bianconi@redhat.com>
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Date: Tue, 5 Jan 2021 17:49:38 +0000
Subject: [PATCH 11/16] ovn-northd: move NAT, Defrag and lb to a function.
Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Signed-off-by: Numan Siddique <numans@ovn.org>
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
northd/ovn-northd.c | 4128 +++++++++++++++++++++----------------------
1 file changed, 2058 insertions(+), 2070 deletions(-)
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index f9b8d588b..f588d8c32 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -8923,2391 +8923,2380 @@ build_lrouter_force_snat_flows(struct hmap *lflows, struct ovn_datapath *od,
ds_destroy(&actions);
}
+/* Logical router ingress Table 0: L2 Admission Control
+ * Generic admission control flows (without inport check).
+ */
static void
-build_lrouter_flows(struct hmap *datapaths,
- struct hmap *lflows, struct shash *meter_groups,
- struct hmap *lbs)
+build_adm_ctrl_flows_for_lrouter(
+ struct ovn_datapath *od, struct hmap *lflows)
{
- /* This flow table structure is documented in ovn-northd(8), so please
- * update ovn-northd.8.xml if you change anything. */
-
- struct ds match = DS_EMPTY_INITIALIZER;
- struct ds actions = DS_EMPTY_INITIALIZER;
+ if (od->nbr) {
+ /* Logical VLANs not supported.
+ * Broadcast/multicast source address is invalid. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
+ "vlan.present || eth.src[40]", "drop;");
+ }
+}
- struct ovn_datapath *od;
+/* Logical router ingress Table 0: L2 Admission Control
+ * This table drops packets that the router shouldn’t see at all based
+ * on their Ethernet headers.
+ */
+static void
+build_adm_ctrl_flows_for_lrouter_port(
+ struct ovn_port *op, struct hmap *lflows,
+ struct ds *match, struct ds *actions)
+{
+ if (op->nbrp) {
+ if (!lrport_is_enabled(op->nbrp)) {
+ /* Drop packets from disabled logical ports (since logical flow
+ * tables are default-drop). */
+ return;
+ }
- /* NAT, Defrag and load balancing. */
- HMAP_FOR_EACH (od, key_node, datapaths) {
- if (!od->nbr) {
- continue;
+ if (op->derived) {
+ /* No ingress packets should be received on a chassisredirect
+ * port. */
+ return;
}
- /* Packets are allowed by default. */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
- ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
- ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
- ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;");
+ /* Store the ethernet address of the port receiving the packet.
+ * This will save us from having to match on inport further down in
+ * the pipeline.
+ */
+ ds_clear(actions);
+ ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
+ op->lrp_networks.ea_s);
- /* Send the IPv6 NS packets to next table. When ovn-controller
- * generates IPv6 NS (for the action - nd_ns{}), the injected
- * packet would go through conntrack - which is not required. */
- ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, "nd_ns", "next;");
+ ds_clear(match);
+ ds_put_format(match, "eth.mcast && inport == %s", op->json_key);
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
+ ds_cstr(match), ds_cstr(actions),
+ &op->nbrp->header_);
- /* NAT rules are only valid on Gateway routers and routers with
- * l3dgw_port (router has a port with gateway chassis
- * specified). */
- if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
- continue;
+ ds_clear(match);
+ ds_put_format(match, "eth.dst == %s && inport == %s",
+ op->lrp_networks.ea_s, op->json_key);
+ if (op->od->l3dgw_port && op == op->od->l3dgw_port
+ && op->od->l3redirect_port) {
+ /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s
+ * should only be received on the gateway chassis. */
+ ds_put_format(match, " && is_chassis_resident(%s)",
+ op->od->l3redirect_port->json_key);
}
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
+ ds_cstr(match), ds_cstr(actions),
+ &op->nbrp->header_);
+ }
+}
- struct sset nat_entries = SSET_INITIALIZER(&nat_entries);
- bool dnat_force_snat_ip =
- !lport_addresses_is_empty(&od->dnat_force_snat_addrs);
- bool lb_force_snat_ip =
- !lport_addresses_is_empty(&od->lb_force_snat_addrs);
+/* Logical router ingress Table 1 and 2: Neighbor lookup and learning
+ * lflows for logical routers. */
+static void
+build_neigh_learning_flows_for_lrouter(
+ struct ovn_datapath *od, struct hmap *lflows,
+ struct ds *match, struct ds *actions)
+{
+ if (od->nbr) {
- for (int i = 0; i < od->nbr->n_nat; i++) {
- const struct nbrec_nat *nat;
+ /* Learn MAC bindings from ARP/IPv6 ND.
+ *
+ * For ARP packets, table LOOKUP_NEIGHBOR does a lookup for the
+ * (arp.spa, arp.sha) in the mac binding table using the 'lookup_arp'
+ * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_RESULT bit.
+ * If "always_learn_from_arp_request" is set to false, it will also
+ * lookup for the (arp.spa) in the mac binding table using the
+ * "lookup_arp_ip" action for ARP request packets, and stores the
+ * result in REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit; or set that bit
+ * to "1" directly for ARP response packets.
+ *
+ * For IPv6 ND NA packets, table LOOKUP_NEIGHBOR does a lookup
+ * for the (nd.target, nd.tll) in the mac binding table using the
+ * 'lookup_nd' action and stores the result in
+ * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. If
+ * "always_learn_from_arp_request" is set to false,
+ * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit is set.
+ *
+ * For IPv6 ND NS packets, table LOOKUP_NEIGHBOR does a lookup
+ * for the (ip6.src, nd.sll) in the mac binding table using the
+ * 'lookup_nd' action and stores the result in
+ * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. If
+ * "always_learn_from_arp_request" is set to false, it will also lookup
+ * for the (ip6.src) in the mac binding table using the "lookup_nd_ip"
+ * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
+ * bit.
+ *
+ * Table LEARN_NEIGHBOR learns the mac-binding using the action
+ * - 'put_arp/put_nd'. Learning mac-binding is skipped if
+ * REGBIT_LOOKUP_NEIGHBOR_RESULT bit is set or
+ * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT is not set.
+ *
+ * */
- nat = od->nbr->nat[i];
+ /* Flows for LOOKUP_NEIGHBOR. */
+ bool learn_from_arp_request = smap_get_bool(&od->nbr->options,
+ "always_learn_from_arp_request", true);
+ ds_clear(actions);
+ ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
+ " = lookup_arp(inport, arp.spa, arp.sha); %snext;",
+ learn_from_arp_request ? "" :
+ REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100,
+ "arp.op == 2", ds_cstr(actions));
- ovs_be32 ip, mask;
- struct in6_addr ipv6, mask_v6, v6_exact = IN6ADDR_EXACT_INIT;
- bool is_v6 = false;
- bool stateless = lrouter_nat_is_stateless(nat);
- struct nbrec_address_set *allowed_ext_ips =
- nat->allowed_ext_ips;
- struct nbrec_address_set *exempted_ext_ips =
- nat->exempted_ext_ips;
+ ds_clear(actions);
+ ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
+ " = lookup_nd(inport, nd.target, nd.tll); %snext;",
+ learn_from_arp_request ? "" :
+ REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_na",
+ ds_cstr(actions));
- if (allowed_ext_ips && exempted_ext_ips) {
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
- VLOG_WARN_RL(&rl, "NAT rule: "UUID_FMT" not applied, since "
- "both allowed and exempt external ips set",
- UUID_ARGS(&(nat->header_.uuid)));
- continue;
- }
+ ds_clear(actions);
+ ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
+ " = lookup_nd(inport, ip6.src, nd.sll); %snext;",
+ learn_from_arp_request ? "" :
+ REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
+ " = lookup_nd_ip(inport, ip6.src); ");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_ns",
+ ds_cstr(actions));
- char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
- if (error || mask != OVS_BE32_MAX) {
- free(error);
- error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6);
- if (error || memcmp(&mask_v6, &v6_exact, sizeof(mask_v6))) {
- /* Invalid for both IPv4 and IPv6 */
- static struct vlog_rate_limit rl =
- VLOG_RATE_LIMIT_INIT(5, 1);
- VLOG_WARN_RL(&rl, "bad external ip %s for nat",
- nat->external_ip);
- free(error);
- continue;
- }
- /* It was an invalid IPv4 address, but valid IPv6.
- * Treat the rest of the handling of this NAT rule
- * as IPv6. */
- is_v6 = true;
- }
+ /* For other packet types, we can skip neighbor learning.
+ * So set REGBIT_LOOKUP_NEIGHBOR_RESULT to 1. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 0, "1",
+ REGBIT_LOOKUP_NEIGHBOR_RESULT" = 1; next;");
- /* Check the validity of nat->logical_ip. 'logical_ip' can
- * be a subnet when the type is "snat". */
- int cidr_bits;
- if (is_v6) {
- error = ipv6_parse_masked(nat->logical_ip, &ipv6, &mask_v6);
- cidr_bits = ipv6_count_cidr_bits(&mask_v6);
- } else {
- error = ip_parse_masked(nat->logical_ip, &ip, &mask);
- cidr_bits = ip_count_cidr_bits(mask);
- }
- if (!strcmp(nat->type, "snat")) {
- if (error) {
- /* Invalid for both IPv4 and IPv6 */
- static struct vlog_rate_limit rl =
- VLOG_RATE_LIMIT_INIT(5, 1);
- VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
- "in router "UUID_FMT"",
- nat->logical_ip, UUID_ARGS(&od->key));
- free(error);
- continue;
- }
- } else {
- if (error || (!is_v6 && mask != OVS_BE32_MAX)
- || (is_v6 && memcmp(&mask_v6, &v6_exact,
- sizeof mask_v6))) {
- /* Invalid for both IPv4 and IPv6 */
- static struct vlog_rate_limit rl =
- VLOG_RATE_LIMIT_INIT(5, 1);
- VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
- ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
- free(error);
- continue;
- }
- }
+ /* Flows for LEARN_NEIGHBOR. */
+ /* Skip Neighbor learning if not required. */
+ ds_clear(match);
+ ds_put_format(match, REGBIT_LOOKUP_NEIGHBOR_RESULT" == 1%s",
+ learn_from_arp_request ? "" :
+ " || "REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" == 0");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 100,
+ ds_cstr(match), "next;");
- /* For distributed router NAT, determine whether this NAT rule
- * satisfies the conditions for distributed NAT processing. */
- bool distributed = false;
- struct eth_addr mac;
- if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
- nat->logical_port && nat->external_mac) {
- if (eth_addr_from_string(nat->external_mac, &mac)) {
- distributed = true;
- } else {
- static struct vlog_rate_limit rl =
- VLOG_RATE_LIMIT_INIT(5, 1);
- VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
- ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
- continue;
- }
- }
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
+ "arp", "put_arp(inport, arp.spa, arp.sha); next;");
- /* Ingress UNSNAT table: It is for already established connections'
- * reverse traffic. i.e., SNAT has already been done in egress
- * pipeline and now the packet has entered the ingress pipeline as
- * part of a reply. We undo the SNAT here.
- *
- * Undoing SNAT has to happen before DNAT processing. This is
- * because when the packet was DNATed in ingress pipeline, it did
- * not know about the possibility of eventual additional SNAT in
- * egress pipeline. */
- if (!strcmp(nat->type, "snat")
- || !strcmp(nat->type, "dnat_and_snat")) {
- if (!od->l3dgw_port) {
- /* Gateway router. */
- ds_clear(&match);
- ds_clear(&actions);
- ds_put_format(&match, "ip && ip%s.dst == %s",
- is_v6 ? "6" : "4",
- nat->external_ip);
- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
- ds_put_format(&actions, "ip%s.dst=%s; next;",
- is_v6 ? "6" : "4", nat->logical_ip);
- } else {
- ds_put_cstr(&actions, "ct_snat;");
- }
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
+ "nd_na", "put_nd(inport, nd.target, nd.tll); next;");
- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT,
- 90, ds_cstr(&match),
- ds_cstr(&actions),
- &nat->header_);
- } else {
- /* Distributed router. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
+ "nd_ns", "put_nd(inport, ip6.src, nd.sll); next;");
+ }
- /* Traffic received on l3dgw_port is subject to NAT. */
- ds_clear(&match);
- ds_clear(&actions);
- ds_put_format(&match, "ip && ip%s.dst == %s"
- " && inport == %s",
- is_v6 ? "6" : "4",
- nat->external_ip,
- od->l3dgw_port->json_key);
- if (!distributed && od->l3redirect_port) {
- /* Flows for NAT rules that are centralized are only
- * programmed on the gateway chassis. */
- ds_put_format(&match, " && is_chassis_resident(%s)",
- od->l3redirect_port->json_key);
- }
+}
- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
- ds_put_format(&actions, "ip%s.dst=%s; next;",
- is_v6 ? "6" : "4", nat->logical_ip);
- } else {
- ds_put_cstr(&actions, "ct_snat;");
- }
+/* Logical router ingress Table 1: Neighbor lookup lflows
+ * for logical router ports. */
+static void
+build_neigh_learning_flows_for_lrouter_port(
+ struct ovn_port *op, struct hmap *lflows,
+ struct ds *match, struct ds *actions)
+{
+ if (op->nbrp) {
- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT,
- 100,
- ds_cstr(&match), ds_cstr(&actions),
- &nat->header_);
+ bool learn_from_arp_request = smap_get_bool(&op->od->nbr->options,
+ "always_learn_from_arp_request", true);
+
+ /* Check if we need to learn mac-binding from ARP requests. */
+ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+ if (!learn_from_arp_request) {
+ /* ARP request to this address should always get learned,
+ * so add a priority-110 flow to set
+ * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT to 1. */
+ ds_clear(match);
+ ds_put_format(match,
+ "inport == %s && arp.spa == %s/%u && "
+ "arp.tpa == %s && arp.op == 1",
+ op->json_key,
+ op->lrp_networks.ipv4_addrs[i].network_s,
+ op->lrp_networks.ipv4_addrs[i].plen,
+ op->lrp_networks.ipv4_addrs[i].addr_s);
+ if (op->od->l3dgw_port && op == op->od->l3dgw_port
+ && op->od->l3redirect_port) {
+ ds_put_format(match, " && is_chassis_resident(%s)",
+ op->od->l3redirect_port->json_key);
}
+ const char *actions_s = REGBIT_LOOKUP_NEIGHBOR_RESULT
+ " = lookup_arp(inport, arp.spa, arp.sha); "
+ REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1;"
+ " next;";
+ ovn_lflow_add_with_hint(lflows, op->od,
+ S_ROUTER_IN_LOOKUP_NEIGHBOR, 110,
+ ds_cstr(match), actions_s,
+ &op->nbrp->header_);
+ }
+ ds_clear(match);
+ ds_put_format(match,
+ "inport == %s && arp.spa == %s/%u && arp.op == 1",
+ op->json_key,
+ op->lrp_networks.ipv4_addrs[i].network_s,
+ op->lrp_networks.ipv4_addrs[i].plen);
+ if (op->od->l3dgw_port && op == op->od->l3dgw_port
+ && op->od->l3redirect_port) {
+ ds_put_format(match, " && is_chassis_resident(%s)",
+ op->od->l3redirect_port->json_key);
}
+ ds_clear(actions);
+ ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
+ " = lookup_arp(inport, arp.spa, arp.sha); %snext;",
+ learn_from_arp_request ? "" :
+ REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
+ " = lookup_arp_ip(inport, arp.spa); ");
+ ovn_lflow_add_with_hint(lflows, op->od,
+ S_ROUTER_IN_LOOKUP_NEIGHBOR, 100,
+ ds_cstr(match), ds_cstr(actions),
+ &op->nbrp->header_);
+ }
+ }
+}
- /* Ingress DNAT table: Packets enter the pipeline with destination
- * IP address that needs to be DNATted from a external IP address
- * to a logical IP address. */
- if (!strcmp(nat->type, "dnat")
- || !strcmp(nat->type, "dnat_and_snat")) {
- if (!od->l3dgw_port) {
- /* Gateway router. */
- /* Packet when it goes from the initiator to destination.
- * We need to set flags.loopback because the router can
- * send the packet back through the same interface. */
- ds_clear(&match);
- ds_put_format(&match, "ip && ip%s.dst == %s",
- is_v6 ? "6" : "4",
- nat->external_ip);
- ds_clear(&actions);
- if (allowed_ext_ips || exempted_ext_ips) {
- lrouter_nat_add_ext_ip_match(od, lflows, &match, nat,
- is_v6, true, mask);
- }
+/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: IPv6 Router
+ * Adv (RA) options and response. */
+static void
+build_ND_RA_flows_for_lrouter_port(
+ struct ovn_port *op, struct hmap *lflows,
+ struct ds *match, struct ds *actions)
+{
+ if (!op->nbrp || op->nbrp->peer || !op->peer) {
+ return;
+ }
- if (dnat_force_snat_ip) {
- /* Indicate to the future tables that a DNAT has taken
- * place and a force SNAT needs to be done in the
- * Egress SNAT table. */
- ds_put_format(&actions,
- "flags.force_snat_for_dnat = 1; ");
- }
+ if (!op->lrp_networks.n_ipv6_addrs) {
+ return;
+ }
- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
- ds_put_format(&actions, "flags.loopback = 1; "
- "ip%s.dst=%s; next;",
- is_v6 ? "6" : "4", nat->logical_ip);
- } else {
- ds_put_format(&actions, "flags.loopback = 1; "
- "ct_dnat(%s", nat->logical_ip);
+ struct smap options;
+ smap_clone(&options, &op->sb->options);
- if (nat->external_port_range[0]) {
- ds_put_format(&actions, ",%s",
- nat->external_port_range);
- }
- ds_put_format(&actions, ");");
- }
+ /* enable IPv6 prefix delegation */
+ bool prefix_delegation = smap_get_bool(&op->nbrp->options,
+ "prefix_delegation", false);
+ if (!lrport_is_enabled(op->nbrp)) {
+ prefix_delegation = false;
+ }
+ smap_add(&options, "ipv6_prefix_delegation",
+ prefix_delegation ? "true" : "false");
- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100,
- ds_cstr(&match), ds_cstr(&actions),
- &nat->header_);
- } else {
- /* Distributed router. */
+ bool ipv6_prefix = smap_get_bool(&op->nbrp->options,
+ "prefix", false);
+ if (!lrport_is_enabled(op->nbrp)) {
+ ipv6_prefix = false;
+ }
+ smap_add(&options, "ipv6_prefix",
+ ipv6_prefix ? "true" : "false");
+ sbrec_port_binding_set_options(op->sb, &options);
- /* Traffic received on l3dgw_port is subject to NAT. */
- ds_clear(&match);
- ds_put_format(&match, "ip && ip%s.dst == %s"
- " && inport == %s",
- is_v6 ? "6" : "4",
- nat->external_ip,
- od->l3dgw_port->json_key);
- if (!distributed && od->l3redirect_port) {
- /* Flows for NAT rules that are centralized are only
- * programmed on the gateway chassis. */
- ds_put_format(&match, " && is_chassis_resident(%s)",
- od->l3redirect_port->json_key);
- }
- ds_clear(&actions);
- if (allowed_ext_ips || exempted_ext_ips) {
- lrouter_nat_add_ext_ip_match(od, lflows, &match, nat,
- is_v6, true, mask);
- }
+ smap_destroy(&options);
- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
- ds_put_format(&actions, "ip%s.dst=%s; next;",
- is_v6 ? "6" : "4", nat->logical_ip);
- } else {
- ds_put_format(&actions, "ct_dnat(%s", nat->logical_ip);
- if (nat->external_port_range[0]) {
- ds_put_format(&actions, ",%s",
- nat->external_port_range);
- }
- ds_put_format(&actions, ");");
- }
+ const char *address_mode = smap_get(
+ &op->nbrp->ipv6_ra_configs, "address_mode");
- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100,
- ds_cstr(&match), ds_cstr(&actions),
- &nat->header_);
- }
- }
+ if (!address_mode) {
+ return;
+ }
+ if (strcmp(address_mode, "slaac") &&
+ strcmp(address_mode, "dhcpv6_stateful") &&
+ strcmp(address_mode, "dhcpv6_stateless")) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+ VLOG_WARN_RL(&rl, "Invalid address mode [%s] defined",
+ address_mode);
+ return;
+ }
- /* ARP resolve for NAT IPs. */
- if (od->l3dgw_port) {
- if (!strcmp(nat->type, "snat")) {
- ds_clear(&match);
- ds_put_format(
- &match, "inport == %s && %s == %s",
- od->l3dgw_port->json_key,
- is_v6 ? "ip6.src" : "ip4.src", nat->external_ip);
- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT,
- 120, ds_cstr(&match), "next;",
- &nat->header_);
- }
+ if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic",
+ false)) {
+ copy_ra_to_sb(op, address_mode);
+ }
- if (!sset_contains(&nat_entries, nat->external_ip)) {
- ds_clear(&match);
- ds_put_format(
- &match, "outport == %s && %s == %s",
- od->l3dgw_port->json_key,
- is_v6 ? REG_NEXT_HOP_IPV6 : REG_NEXT_HOP_IPV4,
- nat->external_ip);
- ds_clear(&actions);
- ds_put_format(
- &actions, "eth.dst = %s; next;",
- distributed ? nat->external_mac :
- od->l3dgw_port->lrp_networks.ea_s);
- ovn_lflow_add_with_hint(lflows, od,
- S_ROUTER_IN_ARP_RESOLVE,
- 100, ds_cstr(&match),
- ds_cstr(&actions),
- &nat->header_);
- sset_add(&nat_entries, nat->external_ip);
- }
- } else {
- /* Add the NAT external_ip to the nat_entries even for
- * gateway routers. This is required for adding load balancer
- * flows.*/
- sset_add(&nat_entries, nat->external_ip);
- }
+ ds_clear(match);
+ ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
+ op->json_key);
+ ds_clear(actions);
- /* Egress UNDNAT table: It is for already established connections'
- * reverse traffic. i.e., DNAT has already been done in ingress
- * pipeline and now the packet has entered the egress pipeline as
- * part of a reply. We undo the DNAT here.
- *
- * Note that this only applies for NAT on a distributed router.
- * Undo DNAT on a gateway router is done in the ingress DNAT
- * pipeline stage. */
- if (od->l3dgw_port && (!strcmp(nat->type, "dnat")
- || !strcmp(nat->type, "dnat_and_snat"))) {
- ds_clear(&match);
- ds_put_format(&match, "ip && ip%s.src == %s"
- " && outport == %s",
- is_v6 ? "6" : "4",
- nat->logical_ip,
- od->l3dgw_port->json_key);
- if (!distributed && od->l3redirect_port) {
- /* Flows for NAT rules that are centralized are only
- * programmed on the gateway chassis. */
- ds_put_format(&match, " && is_chassis_resident(%s)",
- od->l3redirect_port->json_key);
- }
- ds_clear(&actions);
- if (distributed) {
- ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
- ETH_ADDR_ARGS(mac));
- }
+ const char *mtu_s = smap_get(
+ &op->nbrp->ipv6_ra_configs, "mtu");
- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
- ds_put_format(&actions, "ip%s.src=%s; next;",
- is_v6 ? "6" : "4", nat->external_ip);
- } else {
- ds_put_format(&actions, "ct_dnat;");
- }
+ /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
+ uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0;
- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 100,
- ds_cstr(&match), ds_cstr(&actions),
- &nat->header_);
- }
+ ds_put_format(actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts("
+ "addr_mode = \"%s\", slla = %s",
+ address_mode, op->lrp_networks.ea_s);
+ if (mtu > 0) {
+ ds_put_format(actions, ", mtu = %u", mtu);
+ }
- /* Egress SNAT table: Packets enter the egress pipeline with
- * source ip address that needs to be SNATted to a external ip
- * address. */
- if (!strcmp(nat->type, "snat")
- || !strcmp(nat->type, "dnat_and_snat")) {
- if (!od->l3dgw_port) {
- /* Gateway router. */
- ds_clear(&match);
- ds_put_format(&match, "ip && ip%s.src == %s",
- is_v6 ? "6" : "4",
- nat->logical_ip);
- ds_clear(&actions);
+ const char *prf = smap_get_def(
+ &op->nbrp->ipv6_ra_configs, "router_preference", "MEDIUM");
+ if (strcmp(prf, "MEDIUM")) {
+ ds_put_format(actions, ", router_preference = \"%s\"", prf);
+ }
- if (allowed_ext_ips || exempted_ext_ips) {
- lrouter_nat_add_ext_ip_match(od, lflows, &match, nat,
- is_v6, false, mask);
- }
+ bool add_rs_response_flow = false;
- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
- ds_put_format(&actions, "ip%s.src=%s; next;",
- is_v6 ? "6" : "4", nat->external_ip);
- } else {
- ds_put_format(&actions, "ct_snat(%s",
- nat->external_ip);
+ for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+ if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
+ continue;
+ }
- if (nat->external_port_range[0]) {
- ds_put_format(&actions, ",%s",
- nat->external_port_range);
- }
- ds_put_format(&actions, ");");
- }
+ ds_put_format(actions, ", prefix = %s/%u",
+ op->lrp_networks.ipv6_addrs[i].network_s,
+ op->lrp_networks.ipv6_addrs[i].plen);
- /* The priority here is calculated such that the
- * nat->logical_ip with the longest mask gets a higher
- * priority. */
- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT,
- cidr_bits + 1,
- ds_cstr(&match), ds_cstr(&actions),
- &nat->header_);
- } else {
- uint16_t priority = cidr_bits + 1;
+ add_rs_response_flow = true;
+ }
- /* Distributed router. */
- ds_clear(&match);
- ds_put_format(&match, "ip && ip%s.src == %s"
- " && outport == %s",
- is_v6 ? "6" : "4",
- nat->logical_ip,
- od->l3dgw_port->json_key);
- if (!distributed && od->l3redirect_port) {
- /* Flows for NAT rules that are centralized are only
- * programmed on the gateway chassis. */
- priority += 128;
- ds_put_format(&match, " && is_chassis_resident(%s)",
- od->l3redirect_port->json_key);
- }
- ds_clear(&actions);
+ if (add_rs_response_flow) {
+ ds_put_cstr(actions, "); next;");
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS,
+ 50, ds_cstr(match), ds_cstr(actions),
+ &op->nbrp->header_);
+ ds_clear(actions);
+ ds_clear(match);
+ ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && "
+ "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key);
- if (allowed_ext_ips || exempted_ext_ips) {
- lrouter_nat_add_ext_ip_match(od, lflows, &match, nat,
- is_v6, false, mask);
- }
+ char ip6_str[INET6_ADDRSTRLEN + 1];
+ struct in6_addr lla;
+ in6_generate_lla(op->lrp_networks.ea, &lla);
+ memset(ip6_str, 0, sizeof(ip6_str));
+ ipv6_string_mapped(ip6_str, &lla);
+ ds_put_format(actions, "eth.dst = eth.src; eth.src = %s; "
+ "ip6.dst = ip6.src; ip6.src = %s; "
+ "outport = inport; flags.loopback = 1; "
+ "output;",
+ op->lrp_networks.ea_s, ip6_str);
+ ovn_lflow_add_with_hint(lflows, op->od,
+ S_ROUTER_IN_ND_RA_RESPONSE, 50,
+ ds_cstr(match), ds_cstr(actions),
+ &op->nbrp->header_);
+ }
+}
- if (distributed) {
- ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
- ETH_ADDR_ARGS(mac));
- }
+/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: RS
+ * responder, by default goto next. (priority 0). */
+static void
+build_ND_RA_flows_for_lrouter(struct ovn_datapath *od, struct hmap *lflows)
+{
+ if (od->nbr) {
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;");
+ }
+}
- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
- ds_put_format(&actions, "ip%s.src=%s; next;",
- is_v6 ? "6" : "4", nat->external_ip);
- } else {
- ds_put_format(&actions, "ct_snat(%s",
- nat->external_ip);
- if (nat->external_port_range[0]) {
- ds_put_format(&actions, ",%s",
- nat->external_port_range);
- }
- ds_put_format(&actions, ");");
- }
+/* Logical router ingress table IP_ROUTING : IP Routing.
+ *
+ * A packet that arrives at this table is an IP packet that should be
+ * routed to the address in 'ip[46].dst'.
+ *
+ * For regular routes without ECMP, table IP_ROUTING sets outport to the
+ * correct output port, eth.src to the output port's MAC address, and
+ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
+ * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
+ * advances to the next table.
+ *
+ * For ECMP routes, i.e. multiple routes with same policy and prefix, table
+ * IP_ROUTING remembers ECMP group id and selects a member id, and advances
+ * to table IP_ROUTING_ECMP, which sets outport, eth.src and
+ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 for the selected ECMP member.
+ */
+static void
+build_ip_routing_flows_for_lrouter_port(
+ struct ovn_port *op, struct hmap *lflows)
+{
+ if (op->nbrp) {
- /* The priority here is calculated such that the
- * nat->logical_ip with the longest mask gets a higher
- * priority. */
- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT,
- priority, ds_cstr(&match),
- ds_cstr(&actions),
- &nat->header_);
- }
- }
+ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+ add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
+ op->lrp_networks.ipv4_addrs[i].network_s,
+ op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
+ &op->nbrp->header_);
+ }
- /* Logical router ingress table 0:
- * For NAT on a distributed router, add rules allowing
- * ingress traffic with eth.dst matching nat->external_mac
- * on the l3dgw_port instance where nat->logical_port is
- * resident. */
- if (distributed) {
- /* Store the ethernet address of the port receiving the packet.
- * This will save us from having to match on inport further
- * down in the pipeline.
- */
- ds_clear(&actions);
- ds_put_format(&actions, REG_INPORT_ETH_ADDR " = %s; next;",
- od->l3dgw_port->lrp_networks.ea_s);
+ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+ add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
+ op->lrp_networks.ipv6_addrs[i].network_s,
+ op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
+ &op->nbrp->header_);
+ }
+ }
+}
- ds_clear(&match);
- ds_put_format(&match,
- "eth.dst == "ETH_ADDR_FMT" && inport == %s"
- " && is_chassis_resident(\"%s\")",
- ETH_ADDR_ARGS(mac),
- od->l3dgw_port->json_key,
- nat->logical_port);
- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ADMISSION, 50,
- ds_cstr(&match), ds_cstr(&actions),
- &nat->header_);
- }
+static void
+build_static_route_flows_for_lrouter(
+ struct ovn_datapath *od, struct hmap *lflows,
+ struct hmap *ports)
+{
+ if (od->nbr) {
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, 150,
+ REG_ECMP_GROUP_ID" == 0", "next;");
- /* Ingress Gateway Redirect Table: For NAT on a distributed
- * router, add flows that are specific to a NAT rule. These
- * flows indicate the presence of an applicable NAT rule that
- * can be applied in a distributed manner.
- * In particulr REG_SRC_IPV4/REG_SRC_IPV6 and eth.src are set to
- * NAT external IP and NAT external mac so the ARP request
- * generated in the following stage is sent out with proper IP/MAC
- * src addresses.
- */
- if (distributed) {
- ds_clear(&match);
- ds_clear(&actions);
- ds_put_format(&match,
- "ip%s.src == %s && outport == %s && "
- "is_chassis_resident(\"%s\")",
- is_v6 ? "6" : "4", nat->logical_ip,
- od->l3dgw_port->json_key, nat->logical_port);
- ds_put_format(&actions, "eth.src = %s; %s = %s; next;",
- nat->external_mac,
- is_v6 ? REG_SRC_IPV6 : REG_SRC_IPV4,
- nat->external_ip);
- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT,
- 100, ds_cstr(&match),
- ds_cstr(&actions), &nat->header_);
+ struct hmap ecmp_groups = HMAP_INITIALIZER(&ecmp_groups);
+ struct hmap unique_routes = HMAP_INITIALIZER(&unique_routes);
+ struct ovs_list parsed_routes = OVS_LIST_INITIALIZER(&parsed_routes);
+ struct ecmp_groups_node *group;
+ for (int i = 0; i < od->nbr->n_static_routes; i++) {
+ struct parsed_route *route =
+ parsed_routes_add(&parsed_routes, od->nbr->static_routes[i]);
+ if (!route) {
+ continue;
}
-
- /* Egress Loopback table: For NAT on a distributed router.
- * If packets in the egress pipeline on the distributed
- * gateway port have ip.dst matching a NAT external IP, then
- * loop a clone of the packet back to the beginning of the
- * ingress pipeline with inport = outport. */
- if (od->l3dgw_port) {
- /* Distributed router. */
- ds_clear(&match);
- ds_put_format(&match, "ip%s.dst == %s && outport == %s",
- is_v6 ? "6" : "4",
- nat->external_ip,
- od->l3dgw_port->json_key);
- if (!distributed) {
- ds_put_format(&match, " && is_chassis_resident(%s)",
- od->l3redirect_port->json_key);
- } else {
- ds_put_format(&match, " && is_chassis_resident(\"%s\")",
- nat->logical_port);
- }
-
- ds_clear(&actions);
- ds_put_format(&actions,
- "clone { ct_clear; "
- "inport = outport; outport = \"\"; "
- "flags = 0; flags.loopback = 1; ");
- for (int j = 0; j < MFF_N_LOG_REGS; j++) {
- ds_put_format(&actions, "reg%d = 0; ", j);
+ group = ecmp_groups_find(&ecmp_groups, route);
+ if (group) {
+ ecmp_groups_add_route(group, route);
+ } else {
+ const struct parsed_route *existed_route =
+ unique_routes_remove(&unique_routes, route);
+ if (existed_route) {
+ group = ecmp_groups_add(&ecmp_groups, existed_route);
+ if (group) {
+ ecmp_groups_add_route(group, route);
+ }
+ } else {
+ unique_routes_add(&unique_routes, route);
}
- ds_put_format(&actions, REGBIT_EGRESS_LOOPBACK" = 1; "
- "next(pipeline=ingress, table=%d); };",
- ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100,
- ds_cstr(&match), ds_cstr(&actions),
- &nat->header_);
}
}
-
- /* Handle force SNAT options set in the gateway router. */
- if (!od->l3dgw_port) {
- if (dnat_force_snat_ip) {
- if (od->dnat_force_snat_addrs.n_ipv4_addrs) {
- build_lrouter_force_snat_flows(lflows, od, "4",
- od->dnat_force_snat_addrs.ipv4_addrs[0].addr_s,
- "dnat");
- }
- if (od->dnat_force_snat_addrs.n_ipv6_addrs) {
- build_lrouter_force_snat_flows(lflows, od, "6",
- od->dnat_force_snat_addrs.ipv6_addrs[0].addr_s,
- "dnat");
- }
- }
- if (lb_force_snat_ip) {
- if (od->lb_force_snat_addrs.n_ipv4_addrs) {
- build_lrouter_force_snat_flows(lflows, od, "4",
- od->lb_force_snat_addrs.ipv4_addrs[0].addr_s, "lb");
- }
- if (od->lb_force_snat_addrs.n_ipv6_addrs) {
- build_lrouter_force_snat_flows(lflows, od, "6",
- od->lb_force_snat_addrs.ipv6_addrs[0].addr_s, "lb");
- }
- }
-
- /* For gateway router, re-circulate every packet through
- * the DNAT zone. This helps with the following.
- *
- * Any packet that needs to be unDNATed in the reverse
- * direction gets unDNATed. Ideally this could be done in
- * the egress pipeline. But since the gateway router
- * does not have any feature that depends on the source
- * ip address being external IP address for IP routing,
- * we can do it here, saving a future re-circulation. */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
- "ip", "flags.loopback = 1; ct_dnat;");
+ HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) {
+ /* add a flow in IP_ROUTING, and one flow for each member in
+ * IP_ROUTING_ECMP. */
+ build_ecmp_route_flow(lflows, od, ports, group);
}
-
- /* Load balancing and packet defrag are only valid on
- * Gateway routers or router with gateway port. */
- if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
- sset_destroy(&nat_entries);
- continue;
+ const struct unique_routes_node *ur;
+ HMAP_FOR_EACH (ur, hmap_node, &unique_routes) {
+ build_static_route_flow(lflows, od, ports, ur->route);
}
+ ecmp_groups_destroy(&ecmp_groups);
+ unique_routes_destroy(&unique_routes);
+ parsed_routes_destroy(&parsed_routes);
+ }
+}
- /* A set to hold all ips that need defragmentation and tracking. */
- struct sset all_ips = SSET_INITIALIZER(&all_ips);
+/* IP Multicast lookup. Here we set the output port, adjust TTL and
+ * advance to next table (priority 500).
+ */
+static void
+build_mcast_lookup_flows_for_lrouter(
+ struct ovn_datapath *od, struct hmap *lflows,
+ struct ds *match, struct ds *actions)
+{
+ if (od->nbr) {
- for (int i = 0; i < od->nbr->n_load_balancer; i++) {
- struct nbrec_load_balancer *nb_lb = od->nbr->load_balancer[i];
- struct ovn_northd_lb *lb =
- ovn_northd_lb_find(lbs, &nb_lb->header_.uuid);
- ovs_assert(lb);
+ /* Drop IPv6 multicast traffic that shouldn't be forwarded,
+ * i.e., router solicitation and router advertisement.
+ */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
+ "nd_rs || nd_ra", "drop;");
+ if (!od->mcast_info.rtr.relay) {
+ return;
+ }
- for (size_t j = 0; j < lb->n_vips; j++) {
- struct ovn_lb_vip *lb_vip = &lb->vips[j];
- struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[j];
- ds_clear(&actions);
- build_lb_vip_actions(lb_vip, lb_vip_nb, &actions,
- lb->selection_fields, false);
+ struct ovn_igmp_group *igmp_group;
- if (!sset_contains(&all_ips, lb_vip->vip_str)) {
- sset_add(&all_ips, lb_vip->vip_str);
- /* If there are any load balancing rules, we should send
- * the packet to conntrack for defragmentation and
- * tracking. This helps with two things.
- *
- * 1. With tracking, we can send only new connections to
- * pick a DNAT ip address from a group.
- * 2. If there are L4 ports in load balancing rules, we
- * need the defragmentation to match on L4 ports. */
- ds_clear(&match);
- if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) {
- ds_put_format(&match, "ip && ip4.dst == %s",
- lb_vip->vip_str);
- } else {
- ds_put_format(&match, "ip && ip6.dst == %s",
- lb_vip->vip_str);
- }
- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG,
- 100, ds_cstr(&match), "ct_next;",
- &nb_lb->header_);
- }
+ LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) {
+ ds_clear(match);
+ ds_clear(actions);
+ if (IN6_IS_ADDR_V4MAPPED(&igmp_group->address)) {
+ ds_put_format(match, "ip4 && ip4.dst == %s ",
+ igmp_group->mcgroup.name);
+ } else {
+ ds_put_format(match, "ip6 && ip6.dst == %s ",
+ igmp_group->mcgroup.name);
+ }
+ if (od->mcast_info.rtr.flood_static) {
+ ds_put_cstr(actions,
+ "clone { "
+ "outport = \""MC_STATIC"\"; "
+ "ip.ttl--; "
+ "next; "
+ "};");
+ }
+ ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
+ igmp_group->mcgroup.name);
+ ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
+ ds_cstr(match), ds_cstr(actions));
+ }
- /* Higher priority rules are added for load-balancing in DNAT
- * table. For every match (on a VIP[:port]), we add two flows
- * via add_router_lb_flow(). One flow is for specific matching
- * on ct.new with an action of "ct_lb($targets);". The other
- * flow is for ct.est with an action of "ct_dnat;". */
- ds_clear(&match);
- if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) {
- ds_put_format(&match, "ip && ip4.dst == %s",
- lb_vip->vip_str);
- } else {
- ds_put_format(&match, "ip && ip6.dst == %s",
- lb_vip->vip_str);
- }
+ /* If needed, flood unregistered multicast on statically configured
+ * ports. Otherwise drop any multicast traffic.
+ */
+ if (od->mcast_info.rtr.flood_static) {
+ ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
+ "ip4.mcast || ip6.mcast",
+ "clone { "
+ "outport = \""MC_STATIC"\"; "
+ "ip.ttl--; "
+ "next; "
+ "};");
+ } else {
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
+ "ip4.mcast || ip6.mcast", "drop;");
+ }
+ }
+}
- int prio = 110;
- bool is_udp = nullable_string_is_equal(nb_lb->protocol, "udp");
- bool is_sctp = nullable_string_is_equal(nb_lb->protocol,
- "sctp");
- const char *proto = is_udp ? "udp" : is_sctp ? "sctp" : "tcp";
+/* Logical router ingress table POLICY: Policy.
+ *
+ * A packet that arrives at this table is an IP packet that should be
+ * permitted/denied/rerouted to the address in the rule's nexthop.
+ * This table sets outport to the correct out_port,
+ * eth.src to the output port's MAC address,
+ * and REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
+ * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
+ * advances to the next table for ARP/ND resolution. */
+static void
+build_ingress_policy_flows_for_lrouter(
+ struct ovn_datapath *od, struct hmap *lflows,
+ struct hmap *ports)
+{
+ if (od->nbr) {
+ /* This is a catch-all rule. It has the lowest priority (0)
+ * does a match-all("1") and pass-through (next) */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1",
+ REG_ECMP_GROUP_ID" = 0; next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY_ECMP, 150,
+ REG_ECMP_GROUP_ID" == 0", "next;");
- if (lb_vip->vip_port) {
- ds_put_format(&match, " && %s && %s.dst == %d", proto,
- proto, lb_vip->vip_port);
- prio = 120;
- }
+ /* Convert routing policies to flows. */
+ uint16_t ecmp_group_id = 1;
+ for (int i = 0; i < od->nbr->n_policies; i++) {
+ const struct nbrec_logical_router_policy *rule
+ = od->nbr->policies[i];
+ bool is_ecmp_reroute =
+ (!strcmp(rule->action, "reroute") && rule->n_nexthops > 1);
- if (od->l3redirect_port &&
- (lb_vip->n_backends || !lb_vip->empty_backend_rej)) {
- ds_put_format(&match, " && is_chassis_resident(%s)",
- od->l3redirect_port->json_key);
- }
- add_router_lb_flow(lflows, od, &match, &actions, prio,
- lb_force_snat_ip, lb_vip, proto,
- nb_lb, meter_groups, &nat_entries);
+ if (is_ecmp_reroute) {
+ build_ecmp_routing_policy_flows(lflows, od, ports, rule,
+ ecmp_group_id);
+ ecmp_group_id++;
+ } else {
+ build_routing_policy_flow(lflows, od, ports, rule,
+ &rule->header_);
}
}
- sset_destroy(&all_ips);
- sset_destroy(&nat_entries);
}
-
- ds_destroy(&match);
- ds_destroy(&actions);
}
-/* Logical router ingress Table 0: L2 Admission Control
- * Generic admission control flows (without inport check).
- */
+/* Local router ingress table ARP_RESOLVE: ARP Resolution. */
static void
-build_adm_ctrl_flows_for_lrouter(
+build_arp_resolve_flows_for_lrouter(
struct ovn_datapath *od, struct hmap *lflows)
{
if (od->nbr) {
- /* Logical VLANs not supported.
- * Broadcast/multicast source address is invalid. */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
- "vlan.present || eth.src[40]", "drop;");
+ /* Multicast packets already have the outport set so just advance to
+ * next table (priority 500). */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 500,
+ "ip4.mcast || ip6.mcast", "next;");
+
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
+ "get_arp(outport, " REG_NEXT_HOP_IPV4 "); next;");
+
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
+ "get_nd(outport, " REG_NEXT_HOP_IPV6 "); next;");
}
}
-/* Logical router ingress Table 0: L2 Admission Control
- * This table drops packets that the router shouldn’t see at all based
- * on their Ethernet headers.
- */
-static void
-build_adm_ctrl_flows_for_lrouter_port(
+/* Local router ingress table ARP_RESOLVE: ARP Resolution.
+ *
+ * Any unicast packet that reaches this table is an IP packet whose
+ * next-hop IP address is in REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6
+ * (ip4.dst/ipv6.dst is the final destination).
+ * This table resolves the IP address in
+ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 into an output port in outport and
+ * an Ethernet address in eth.dst.
+ */
+static void
+build_arp_resolve_flows_for_lrouter_port(
struct ovn_port *op, struct hmap *lflows,
+ struct hmap *ports,
struct ds *match, struct ds *actions)
{
- if (op->nbrp) {
- if (!lrport_is_enabled(op->nbrp)) {
- /* Drop packets from disabled logical ports (since logical flow
- * tables are default-drop). */
- return;
- }
+ if (op->nbsp && !lsp_is_enabled(op->nbsp)) {
+ return;
+ }
- if (op->derived) {
- /* No ingress packets should be received on a chassisredirect
- * port. */
- return;
- }
+ if (op->nbrp) {
+ /* This is a logical router port. If next-hop IP address in
+ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 matches IP address of this
+ * router port, then the packet is intended to eventually be sent
+ * to this logical port. Set the destination mac address using
+ * this port's mac address.
+ *
+ * The packet is still in peer's logical pipeline. So the match
+ * should be on peer's outport. */
+ if (op->peer && op->nbrp->peer) {
+ if (op->lrp_networks.n_ipv4_addrs) {
+ ds_clear(match);
+ ds_put_format(match, "outport == %s && "
+ REG_NEXT_HOP_IPV4 "== ",
+ op->peer->json_key);
+ op_put_v4_networks(match, op, false);
- /* Store the ethernet address of the port receiving the packet.
- * This will save us from having to match on inport further down in
- * the pipeline.
- */
- ds_clear(actions);
- ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
- op->lrp_networks.ea_s);
+ ds_clear(actions);
+ ds_put_format(actions, "eth.dst = %s; next;",
+ op->lrp_networks.ea_s);
+ ovn_lflow_add_with_hint(lflows, op->peer->od,
+ S_ROUTER_IN_ARP_RESOLVE, 100,
+ ds_cstr(match), ds_cstr(actions),
+ &op->nbrp->header_);
+ }
- ds_clear(match);
- ds_put_format(match, "eth.mcast && inport == %s", op->json_key);
- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
- ds_cstr(match), ds_cstr(actions),
- &op->nbrp->header_);
+ if (op->lrp_networks.n_ipv6_addrs) {
+ ds_clear(match);
+ ds_put_format(match, "outport == %s && "
+ REG_NEXT_HOP_IPV6 " == ",
+ op->peer->json_key);
+ op_put_v6_networks(match, op);
- ds_clear(match);
- ds_put_format(match, "eth.dst == %s && inport == %s",
- op->lrp_networks.ea_s, op->json_key);
- if (op->od->l3dgw_port && op == op->od->l3dgw_port
- && op->od->l3redirect_port) {
- /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s
- * should only be received on the gateway chassis. */
- ds_put_format(match, " && is_chassis_resident(%s)",
- op->od->l3redirect_port->json_key);
+ ds_clear(actions);
+ ds_put_format(actions, "eth.dst = %s; next;",
+ op->lrp_networks.ea_s);
+ ovn_lflow_add_with_hint(lflows, op->peer->od,
+ S_ROUTER_IN_ARP_RESOLVE, 100,
+ ds_cstr(match), ds_cstr(actions),
+ &op->nbrp->header_);
+ }
}
- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
- ds_cstr(match), ds_cstr(actions),
- &op->nbrp->header_);
- }
-}
+ if (!op->derived && op->od->l3redirect_port) {
+ const char *redirect_type = smap_get(&op->nbrp->options,
+ "redirect-type");
+ if (redirect_type && !strcasecmp(redirect_type, "bridged")) {
+ /* Packet is on a non gateway chassis and
+ * has an unresolved ARP on a network behind gateway
+ * chassis attached router port. Since, redirect type
+ * is "bridged", instead of calling "get_arp"
+ * on this node, we will redirect the packet to gateway
+ * chassis, by setting destination mac router port mac.*/
+ ds_clear(match);
+ ds_put_format(match, "outport == %s && "
+ "!is_chassis_resident(%s)", op->json_key,
+ op->od->l3redirect_port->json_key);
+ ds_clear(actions);
+ ds_put_format(actions, "eth.dst = %s; next;",
+ op->lrp_networks.ea_s);
-/* Logical router ingress Table 1 and 2: Neighbor lookup and learning
- * lflows for logical routers. */
-static void
-build_neigh_learning_flows_for_lrouter(
- struct ovn_datapath *od, struct hmap *lflows,
- struct ds *match, struct ds *actions)
-{
- if (od->nbr) {
+ ovn_lflow_add_with_hint(lflows, op->od,
+ S_ROUTER_IN_ARP_RESOLVE, 50,
+ ds_cstr(match), ds_cstr(actions),
+ &op->nbrp->header_);
+ }
+ }
- /* Learn MAC bindings from ARP/IPv6 ND.
- *
- * For ARP packets, table LOOKUP_NEIGHBOR does a lookup for the
- * (arp.spa, arp.sha) in the mac binding table using the 'lookup_arp'
- * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_RESULT bit.
- * If "always_learn_from_arp_request" is set to false, it will also
- * lookup for the (arp.spa) in the mac binding table using the
- * "lookup_arp_ip" action for ARP request packets, and stores the
- * result in REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit; or set that bit
- * to "1" directly for ARP response packets.
- *
- * For IPv6 ND NA packets, table LOOKUP_NEIGHBOR does a lookup
- * for the (nd.target, nd.tll) in the mac binding table using the
- * 'lookup_nd' action and stores the result in
- * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. If
- * "always_learn_from_arp_request" is set to false,
- * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit is set.
- *
- * For IPv6 ND NS packets, table LOOKUP_NEIGHBOR does a lookup
- * for the (ip6.src, nd.sll) in the mac binding table using the
- * 'lookup_nd' action and stores the result in
- * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. If
- * "always_learn_from_arp_request" is set to false, it will also lookup
- * for the (ip6.src) in the mac binding table using the "lookup_nd_ip"
- * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
- * bit.
- *
- * Table LEARN_NEIGHBOR learns the mac-binding using the action
- * - 'put_arp/put_nd'. Learning mac-binding is skipped if
- * REGBIT_LOOKUP_NEIGHBOR_RESULT bit is set or
- * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT is not set.
+ /* Drop IP traffic destined to router owned IPs. Part of it is dropped
+ * in stage "lr_in_ip_input" but traffic that could have been unSNATed
+ * but didn't match any existing session might still end up here.
*
- * */
-
- /* Flows for LOOKUP_NEIGHBOR. */
- bool learn_from_arp_request = smap_get_bool(&od->nbr->options,
- "always_learn_from_arp_request", true);
- ds_clear(actions);
- ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
- " = lookup_arp(inport, arp.spa, arp.sha); %snext;",
- learn_from_arp_request ? "" :
- REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100,
- "arp.op == 2", ds_cstr(actions));
+ * Priority 1.
+ */
+ build_lrouter_drop_own_dest(op, S_ROUTER_IN_ARP_RESOLVE, 1, true,
+ lflows);
+ } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp)
+ && strcmp(op->nbsp->type, "virtual")) {
+ /* This is a logical switch port that backs a VM or a container.
+ * Extract its addresses. For each of the address, go through all
+ * the router ports attached to the switch (to which this port
+ * connects) and if the address in question is reachable from the
+ * router port, add an ARP/ND entry in that router's pipeline. */
- ds_clear(actions);
- ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
- " = lookup_nd(inport, nd.target, nd.tll); %snext;",
- learn_from_arp_request ? "" :
- REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_na",
- ds_cstr(actions));
+ for (size_t i = 0; i < op->n_lsp_addrs; i++) {
+ const char *ea_s = op->lsp_addrs[i].ea_s;
+ for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
+ const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
+ for (size_t k = 0; k < op->od->n_router_ports; k++) {
+ /* Get the Logical_Router_Port that the
+ * Logical_Switch_Port is connected to, as
+ * 'peer'. */
+ const char *peer_name = smap_get(
+ &op->od->router_ports[k]->nbsp->options,
+ "router-port");
+ if (!peer_name) {
+ continue;
+ }
- ds_clear(actions);
- ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
- " = lookup_nd(inport, ip6.src, nd.sll); %snext;",
- learn_from_arp_request ? "" :
- REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
- " = lookup_nd_ip(inport, ip6.src); ");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_ns",
- ds_cstr(actions));
+ struct ovn_port *peer = ovn_port_find(ports, peer_name);
+ if (!peer || !peer->nbrp) {
+ continue;
+ }
- /* For other packet types, we can skip neighbor learning.
- * So set REGBIT_LOOKUP_NEIGHBOR_RESULT to 1. */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 0, "1",
- REGBIT_LOOKUP_NEIGHBOR_RESULT" = 1; next;");
+ if (!find_lrp_member_ip(peer, ip_s)) {
+ continue;
+ }
- /* Flows for LEARN_NEIGHBOR. */
- /* Skip Neighbor learning if not required. */
- ds_clear(match);
- ds_put_format(match, REGBIT_LOOKUP_NEIGHBOR_RESULT" == 1%s",
- learn_from_arp_request ? "" :
- " || "REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" == 0");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 100,
- ds_cstr(match), "next;");
+ ds_clear(match);
+ ds_put_format(match, "outport == %s && "
+ REG_NEXT_HOP_IPV4 " == %s",
+ peer->json_key, ip_s);
- ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
- "arp", "put_arp(inport, arp.spa, arp.sha); next;");
+ ds_clear(actions);
+ ds_put_format(actions, "eth.dst = %s; next;", ea_s);
+ ovn_lflow_add_with_hint(lflows, peer->od,
+ S_ROUTER_IN_ARP_RESOLVE, 100,
+ ds_cstr(match),
+ ds_cstr(actions),
+ &op->nbsp->header_);
+ }
+ }
- ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
- "nd_na", "put_nd(inport, nd.target, nd.tll); next;");
+ for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
+ const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
+ for (size_t k = 0; k < op->od->n_router_ports; k++) {
+ /* Get the Logical_Router_Port that the
+ * Logical_Switch_Port is connected to, as
+ * 'peer'. */
+ const char *peer_name = smap_get(
+ &op->od->router_ports[k]->nbsp->options,
+ "router-port");
+ if (!peer_name) {
+ continue;
+ }
- ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
- "nd_ns", "put_nd(inport, ip6.src, nd.sll); next;");
- }
-
-}
+ struct ovn_port *peer = ovn_port_find(ports, peer_name);
+ if (!peer || !peer->nbrp) {
+ continue;
+ }
-/* Logical router ingress Table 1: Neighbor lookup lflows
- * for logical router ports. */
-static void
-build_neigh_learning_flows_for_lrouter_port(
- struct ovn_port *op, struct hmap *lflows,
- struct ds *match, struct ds *actions)
-{
- if (op->nbrp) {
+ if (!find_lrp_member_ip(peer, ip_s)) {
+ continue;
+ }
- bool learn_from_arp_request = smap_get_bool(&op->od->nbr->options,
- "always_learn_from_arp_request", true);
+ ds_clear(match);
+ ds_put_format(match, "outport == %s && "
+ REG_NEXT_HOP_IPV6 " == %s",
+ peer->json_key, ip_s);
- /* Check if we need to learn mac-binding from ARP requests. */
- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
- if (!learn_from_arp_request) {
- /* ARP request to this address should always get learned,
- * so add a priority-110 flow to set
- * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT to 1. */
- ds_clear(match);
- ds_put_format(match,
- "inport == %s && arp.spa == %s/%u && "
- "arp.tpa == %s && arp.op == 1",
- op->json_key,
- op->lrp_networks.ipv4_addrs[i].network_s,
- op->lrp_networks.ipv4_addrs[i].plen,
- op->lrp_networks.ipv4_addrs[i].addr_s);
- if (op->od->l3dgw_port && op == op->od->l3dgw_port
- && op->od->l3redirect_port) {
- ds_put_format(match, " && is_chassis_resident(%s)",
- op->od->l3redirect_port->json_key);
+ ds_clear(actions);
+ ds_put_format(actions, "eth.dst = %s; next;", ea_s);
+ ovn_lflow_add_with_hint(lflows, peer->od,
+ S_ROUTER_IN_ARP_RESOLVE, 100,
+ ds_cstr(match),
+ ds_cstr(actions),
+ &op->nbsp->header_);
}
- const char *actions_s = REGBIT_LOOKUP_NEIGHBOR_RESULT
- " = lookup_arp(inport, arp.spa, arp.sha); "
- REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1;"
- " next;";
- ovn_lflow_add_with_hint(lflows, op->od,
- S_ROUTER_IN_LOOKUP_NEIGHBOR, 110,
- ds_cstr(match), actions_s,
- &op->nbrp->header_);
- }
- ds_clear(match);
- ds_put_format(match,
- "inport == %s && arp.spa == %s/%u && arp.op == 1",
- op->json_key,
- op->lrp_networks.ipv4_addrs[i].network_s,
- op->lrp_networks.ipv4_addrs[i].plen);
- if (op->od->l3dgw_port && op == op->od->l3dgw_port
- && op->od->l3redirect_port) {
- ds_put_format(match, " && is_chassis_resident(%s)",
- op->od->l3redirect_port->json_key);
}
- ds_clear(actions);
- ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
- " = lookup_arp(inport, arp.spa, arp.sha); %snext;",
- learn_from_arp_request ? "" :
- REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
- " = lookup_arp_ip(inport, arp.spa); ");
- ovn_lflow_add_with_hint(lflows, op->od,
- S_ROUTER_IN_LOOKUP_NEIGHBOR, 100,
- ds_cstr(match), ds_cstr(actions),
- &op->nbrp->header_);
}
- }
-}
-
-/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: IPv6 Router
- * Adv (RA) options and response. */
-static void
-build_ND_RA_flows_for_lrouter_port(
- struct ovn_port *op, struct hmap *lflows,
- struct ds *match, struct ds *actions)
-{
- if (!op->nbrp || op->nbrp->peer || !op->peer) {
- return;
- }
+ } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp)
+ && !strcmp(op->nbsp->type, "virtual")) {
+ /* This is a virtual port. Add ARP replies for the virtual ip with
+ * the mac of the present active virtual parent.
+ * If the logical port doesn't have virtual parent set in
+ * Port_Binding table, then add the flow to set eth.dst to
+ * 00:00:00:00:00:00 and advance to next table so that ARP is
+ * resolved by router pipeline using the arp{} action.
+ * The MAC_Binding entry for the virtual ip might be invalid. */
+ ovs_be32 ip;
- if (!op->lrp_networks.n_ipv6_addrs) {
- return;
- }
+ const char *vip = smap_get(&op->nbsp->options,
+ "virtual-ip");
+ const char *virtual_parents = smap_get(&op->nbsp->options,
+ "virtual-parents");
+ if (!vip || !virtual_parents ||
+ !ip_parse(vip, &ip) || !op->sb) {
+ return;
+ }
- struct smap options;
- smap_clone(&options, &op->sb->options);
+ if (!op->sb->virtual_parent || !op->sb->virtual_parent[0] ||
+ !op->sb->chassis) {
+ /* The virtual port is not claimed yet. */
+ for (size_t i = 0; i < op->od->n_router_ports; i++) {
+ const char *peer_name = smap_get(
+ &op->od->router_ports[i]->nbsp->options,
+ "router-port");
+ if (!peer_name) {
+ continue;
+ }
- /* enable IPv6 prefix delegation */
- bool prefix_delegation = smap_get_bool(&op->nbrp->options,
- "prefix_delegation", false);
- if (!lrport_is_enabled(op->nbrp)) {
- prefix_delegation = false;
- }
- smap_add(&options, "ipv6_prefix_delegation",
- prefix_delegation ? "true" : "false");
+ struct ovn_port *peer = ovn_port_find(ports, peer_name);
+ if (!peer || !peer->nbrp) {
+ continue;
+ }
- bool ipv6_prefix = smap_get_bool(&op->nbrp->options,
- "prefix", false);
- if (!lrport_is_enabled(op->nbrp)) {
- ipv6_prefix = false;
- }
- smap_add(&options, "ipv6_prefix",
- ipv6_prefix ? "true" : "false");
- sbrec_port_binding_set_options(op->sb, &options);
+ if (find_lrp_member_ip(peer, vip)) {
+ ds_clear(match);
+ ds_put_format(match, "outport == %s && "
+ REG_NEXT_HOP_IPV4 " == %s",
+ peer->json_key, vip);
- smap_destroy(&options);
+ const char *arp_actions =
+ "eth.dst = 00:00:00:00:00:00; next;";
+ ovn_lflow_add_with_hint(lflows, peer->od,
+ S_ROUTER_IN_ARP_RESOLVE, 100,
+ ds_cstr(match),
+ arp_actions,
+ &op->nbsp->header_);
+ break;
+ }
+ }
+ } else {
+ struct ovn_port *vp =
+ ovn_port_find(ports, op->sb->virtual_parent);
+ if (!vp || !vp->nbsp) {
+ return;
+ }
- const char *address_mode = smap_get(
- &op->nbrp->ipv6_ra_configs, "address_mode");
+ for (size_t i = 0; i < vp->n_lsp_addrs; i++) {
+ bool found_vip_network = false;
+ const char *ea_s = vp->lsp_addrs[i].ea_s;
+ for (size_t j = 0; j < vp->od->n_router_ports; j++) {
+ /* Get the Logical_Router_Port that the
+ * Logical_Switch_Port is connected to, as
+ * 'peer'. */
+ const char *peer_name = smap_get(
+ &vp->od->router_ports[j]->nbsp->options,
+ "router-port");
+ if (!peer_name) {
+ continue;
+ }
- if (!address_mode) {
- return;
- }
- if (strcmp(address_mode, "slaac") &&
- strcmp(address_mode, "dhcpv6_stateful") &&
- strcmp(address_mode, "dhcpv6_stateless")) {
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
- VLOG_WARN_RL(&rl, "Invalid address mode [%s] defined",
- address_mode);
- return;
- }
+ struct ovn_port *peer =
+ ovn_port_find(ports, peer_name);
+ if (!peer || !peer->nbrp) {
+ continue;
+ }
- if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic",
- false)) {
- copy_ra_to_sb(op, address_mode);
- }
+ if (!find_lrp_member_ip(peer, vip)) {
+ continue;
+ }
- ds_clear(match);
- ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
- op->json_key);
- ds_clear(actions);
+ ds_clear(match);
+ ds_put_format(match, "outport == %s && "
+ REG_NEXT_HOP_IPV4 " == %s",
+ peer->json_key, vip);
- const char *mtu_s = smap_get(
- &op->nbrp->ipv6_ra_configs, "mtu");
+ ds_clear(actions);
+ ds_put_format(actions, "eth.dst = %s; next;", ea_s);
+ ovn_lflow_add_with_hint(lflows, peer->od,
+ S_ROUTER_IN_ARP_RESOLVE, 100,
+ ds_cstr(match),
+ ds_cstr(actions),
+ &op->nbsp->header_);
+ found_vip_network = true;
+ break;
+ }
- /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
- uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0;
+ if (found_vip_network) {
+ break;
+ }
+ }
+ }
+ } else if (lsp_is_router(op->nbsp)) {
+ /* This is a logical switch port that connects to a router. */
- ds_put_format(actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts("
- "addr_mode = \"%s\", slla = %s",
- address_mode, op->lrp_networks.ea_s);
- if (mtu > 0) {
- ds_put_format(actions, ", mtu = %u", mtu);
- }
+ /* The peer of this switch port is the router port for which
+ * we need to add logical flows such that it can resolve
+ * ARP entries for all the other router ports connected to
+ * the switch in question. */
- const char *prf = smap_get_def(
- &op->nbrp->ipv6_ra_configs, "router_preference", "MEDIUM");
- if (strcmp(prf, "MEDIUM")) {
- ds_put_format(actions, ", router_preference = \"%s\"", prf);
- }
+ const char *peer_name = smap_get(&op->nbsp->options,
+ "router-port");
+ if (!peer_name) {
+ return;
+ }
- bool add_rs_response_flow = false;
+ struct ovn_port *peer = ovn_port_find(ports, peer_name);
+ if (!peer || !peer->nbrp) {
+ return;
+ }
- for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
- if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
- continue;
+ if (peer->od->nbr &&
+ smap_get_bool(&peer->od->nbr->options,
+ "dynamic_neigh_routers", false)) {
+ return;
}
- ds_put_format(actions, ", prefix = %s/%u",
- op->lrp_networks.ipv6_addrs[i].network_s,
- op->lrp_networks.ipv6_addrs[i].plen);
+ for (size_t i = 0; i < op->od->n_router_ports; i++) {
+ const char *router_port_name = smap_get(
+ &op->od->router_ports[i]->nbsp->options,
+ "router-port");
+ struct ovn_port *router_port = ovn_port_find(ports,
+ router_port_name);
+ if (!router_port || !router_port->nbrp) {
+ continue;
+ }
- add_rs_response_flow = true;
- }
+ /* Skip the router port under consideration. */
+ if (router_port == peer) {
+ continue;
+ }
- if (add_rs_response_flow) {
- ds_put_cstr(actions, "); next;");
- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS,
- 50, ds_cstr(match), ds_cstr(actions),
- &op->nbrp->header_);
- ds_clear(actions);
- ds_clear(match);
- ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && "
- "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key);
+ if (router_port->lrp_networks.n_ipv4_addrs) {
+ ds_clear(match);
+ ds_put_format(match, "outport == %s && "
+ REG_NEXT_HOP_IPV4 " == ",
+ peer->json_key);
+ op_put_v4_networks(match, router_port, false);
- char ip6_str[INET6_ADDRSTRLEN + 1];
- struct in6_addr lla;
- in6_generate_lla(op->lrp_networks.ea, &lla);
- memset(ip6_str, 0, sizeof(ip6_str));
- ipv6_string_mapped(ip6_str, &lla);
- ds_put_format(actions, "eth.dst = eth.src; eth.src = %s; "
- "ip6.dst = ip6.src; ip6.src = %s; "
- "outport = inport; flags.loopback = 1; "
- "output;",
- op->lrp_networks.ea_s, ip6_str);
- ovn_lflow_add_with_hint(lflows, op->od,
- S_ROUTER_IN_ND_RA_RESPONSE, 50,
- ds_cstr(match), ds_cstr(actions),
- &op->nbrp->header_);
- }
-}
+ ds_clear(actions);
+ ds_put_format(actions, "eth.dst = %s; next;",
+ router_port->lrp_networks.ea_s);
+ ovn_lflow_add_with_hint(lflows, peer->od,
+ S_ROUTER_IN_ARP_RESOLVE, 100,
+ ds_cstr(match), ds_cstr(actions),
+ &op->nbsp->header_);
+ }
-/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: RS
- * responder, by default goto next. (priority 0). */
-static void
-build_ND_RA_flows_for_lrouter(struct ovn_datapath *od, struct hmap *lflows)
-{
- if (od->nbr) {
- ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;");
+ if (router_port->lrp_networks.n_ipv6_addrs) {
+ ds_clear(match);
+ ds_put_format(match, "outport == %s && "
+ REG_NEXT_HOP_IPV6 " == ",
+ peer->json_key);
+ op_put_v6_networks(match, router_port);
+
+ ds_clear(actions);
+ ds_put_format(actions, "eth.dst = %s; next;",
+ router_port->lrp_networks.ea_s);
+ ovn_lflow_add_with_hint(lflows, peer->od,
+ S_ROUTER_IN_ARP_RESOLVE, 100,
+ ds_cstr(match), ds_cstr(actions),
+ &op->nbsp->header_);
+ }
+ }
}
+
}
-/* Logical router ingress table IP_ROUTING : IP Routing.
+/* Local router ingress table CHK_PKT_LEN: Check packet length.
*
- * A packet that arrives at this table is an IP packet that should be
- * routed to the address in 'ip[46].dst'.
+ * Any IPv4 packet with outport set to the distributed gateway
+ * router port, check the packet length and store the result in the
+ * 'REGBIT_PKT_LARGER' register bit.
*
- * For regular routes without ECMP, table IP_ROUTING sets outport to the
- * correct output port, eth.src to the output port's MAC address, and
- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
- * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
- * advances to the next table.
+ * Local router ingress table LARGER_PKTS: Handle larger packets.
*
- * For ECMP routes, i.e. multiple routes with same policy and prefix, table
- * IP_ROUTING remembers ECMP group id and selects a member id, and advances
- * to table IP_ROUTING_ECMP, which sets outport, eth.src and
- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 for the selected ECMP member.
- */
+ * Any IPv4 packet with outport set to the distributed gateway
+ * router port and the 'REGBIT_PKT_LARGER' register bit is set,
+ * generate ICMPv4 packet with type 3 (Destination Unreachable) and
+ * code 4 (Fragmentation needed).
+ * */
static void
-build_ip_routing_flows_for_lrouter_port(
- struct ovn_port *op, struct hmap *lflows)
+build_check_pkt_len_flows_for_lrouter(
+ struct ovn_datapath *od, struct hmap *lflows,
+ struct hmap *ports,
+ struct ds *match, struct ds *actions)
{
- if (op->nbrp) {
+ if (od->nbr) {
- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
- add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
- op->lrp_networks.ipv4_addrs[i].network_s,
- op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
- &op->nbrp->header_);
- }
+ /* Packets are allowed by default. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 0, "1",
+ "next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LARGER_PKTS, 0, "1",
+ "next;");
- for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
- add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
- op->lrp_networks.ipv6_addrs[i].network_s,
- op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
- &op->nbrp->header_);
+ if (od->l3dgw_port && od->l3redirect_port) {
+ int gw_mtu = 0;
+ if (od->l3dgw_port->nbrp) {
+ gw_mtu = smap_get_int(&od->l3dgw_port->nbrp->options,
+ "gateway_mtu", 0);
+ }
+ /* Add the flows only if gateway_mtu is configured. */
+ if (gw_mtu <= 0) {
+ return;
+ }
+
+ ds_clear(match);
+ ds_put_format(match, "outport == %s", od->l3dgw_port->json_key);
+
+ ds_clear(actions);
+ ds_put_format(actions,
+ REGBIT_PKT_LARGER" = check_pkt_larger(%d);"
+ " next;", gw_mtu + VLAN_ETH_HEADER_LEN);
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 50,
+ ds_cstr(match), ds_cstr(actions),
+ &od->l3dgw_port->nbrp->header_);
+
+ for (size_t i = 0; i < od->nbr->n_ports; i++) {
+ struct ovn_port *rp = ovn_port_find(ports,
+ od->nbr->ports[i]->name);
+ if (!rp || rp == od->l3dgw_port) {
+ continue;
+ }
+
+ if (rp->lrp_networks.ipv4_addrs) {
+ ds_clear(match);
+ ds_put_format(match, "inport == %s && outport == %s"
+ " && ip4 && "REGBIT_PKT_LARGER,
+ rp->json_key, od->l3dgw_port->json_key);
+
+ ds_clear(actions);
+ /* Set icmp4.frag_mtu to gw_mtu */
+ ds_put_format(actions,
+ "icmp4_error {"
+ REGBIT_EGRESS_LOOPBACK" = 1; "
+ "eth.dst = %s; "
+ "ip4.dst = ip4.src; "
+ "ip4.src = %s; "
+ "ip.ttl = 255; "
+ "icmp4.type = 3; /* Destination Unreachable. */ "
+ "icmp4.code = 4; /* Frag Needed and DF was Set. */ "
+ "icmp4.frag_mtu = %d; "
+ "next(pipeline=ingress, table=%d); };",
+ rp->lrp_networks.ea_s,
+ rp->lrp_networks.ipv4_addrs[0].addr_s,
+ gw_mtu,
+ ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
+ ovn_lflow_add_with_hint(lflows, od,
+ S_ROUTER_IN_LARGER_PKTS, 50,
+ ds_cstr(match), ds_cstr(actions),
+ &rp->nbrp->header_);
+ }
+
+ if (rp->lrp_networks.ipv6_addrs) {
+ ds_clear(match);
+ ds_put_format(match, "inport == %s && outport == %s"
+ " && ip6 && "REGBIT_PKT_LARGER,
+ rp->json_key, od->l3dgw_port->json_key);
+
+ ds_clear(actions);
+ /* Set icmp6.frag_mtu to gw_mtu */
+ ds_put_format(actions,
+ "icmp6_error {"
+ REGBIT_EGRESS_LOOPBACK" = 1; "
+ "eth.dst = %s; "
+ "ip6.dst = ip6.src; "
+ "ip6.src = %s; "
+ "ip.ttl = 255; "
+ "icmp6.type = 2; /* Packet Too Big. */ "
+ "icmp6.code = 0; "
+ "icmp6.frag_mtu = %d; "
+ "next(pipeline=ingress, table=%d); };",
+ rp->lrp_networks.ea_s,
+ rp->lrp_networks.ipv6_addrs[0].addr_s,
+ gw_mtu,
+ ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
+ ovn_lflow_add_with_hint(lflows, od,
+ S_ROUTER_IN_LARGER_PKTS, 50,
+ ds_cstr(match), ds_cstr(actions),
+ &rp->nbrp->header_);
+ }
+ }
}
}
}
+/* Logical router ingress table GW_REDIRECT: Gateway redirect.
+ *
+ * For traffic with outport equal to the l3dgw_port
+ * on a distributed router, this table redirects a subset
+ * of the traffic to the l3redirect_port which represents
+ * the central instance of the l3dgw_port.
+ */
static void
-build_static_route_flows_for_lrouter(
+build_gateway_redirect_flows_for_lrouter(
struct ovn_datapath *od, struct hmap *lflows,
- struct hmap *ports)
+ struct ds *match, struct ds *actions)
{
if (od->nbr) {
- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, 150,
- REG_ECMP_GROUP_ID" == 0", "next;");
+ if (od->l3dgw_port && od->l3redirect_port) {
+ const struct ovsdb_idl_row *stage_hint = NULL;
- struct hmap ecmp_groups = HMAP_INITIALIZER(&ecmp_groups);
- struct hmap unique_routes = HMAP_INITIALIZER(&unique_routes);
- struct ovs_list parsed_routes = OVS_LIST_INITIALIZER(&parsed_routes);
- struct ecmp_groups_node *group;
- for (int i = 0; i < od->nbr->n_static_routes; i++) {
- struct parsed_route *route =
- parsed_routes_add(&parsed_routes, od->nbr->static_routes[i]);
- if (!route) {
- continue;
+ if (od->l3dgw_port->nbrp) {
+ stage_hint = &od->l3dgw_port->nbrp->header_;
}
- group = ecmp_groups_find(&ecmp_groups, route);
- if (group) {
- ecmp_groups_add_route(group, route);
- } else {
- const struct parsed_route *existed_route =
- unique_routes_remove(&unique_routes, route);
- if (existed_route) {
- group = ecmp_groups_add(&ecmp_groups, existed_route);
- if (group) {
- ecmp_groups_add_route(group, route);
- }
- } else {
- unique_routes_add(&unique_routes, route);
- }
- }
- }
- HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) {
- /* add a flow in IP_ROUTING, and one flow for each member in
- * IP_ROUTING_ECMP. */
- build_ecmp_route_flow(lflows, od, ports, group);
- }
- const struct unique_routes_node *ur;
- HMAP_FOR_EACH (ur, hmap_node, &unique_routes) {
- build_static_route_flow(lflows, od, ports, ur->route);
+
+ /* For traffic with outport == l3dgw_port, if the
+ * packet did not match any higher priority redirect
+ * rule, then the traffic is redirected to the central
+ * instance of the l3dgw_port. */
+ ds_clear(match);
+ ds_put_format(match, "outport == %s",
+ od->l3dgw_port->json_key);
+ ds_clear(actions);
+ ds_put_format(actions, "outport = %s; next;",
+ od->l3redirect_port->json_key);
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50,
+ ds_cstr(match), ds_cstr(actions),
+ stage_hint);
}
- ecmp_groups_destroy(&ecmp_groups);
- unique_routes_destroy(&unique_routes);
- parsed_routes_destroy(&parsed_routes);
+
+ /* Packets are allowed by default. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
}
}
-/* IP Multicast lookup. Here we set the output port, adjust TTL and
- * advance to next table (priority 500).
- */
+/* Local router ingress table ARP_REQUEST: ARP request.
+ *
+ * In the common case where the Ethernet destination has been resolved,
+ * this table outputs the packet (priority 0). Otherwise, it composes
+ * and sends an ARP/IPv6 NA request (priority 100). */
static void
-build_mcast_lookup_flows_for_lrouter(
+build_arp_request_flows_for_lrouter(
struct ovn_datapath *od, struct hmap *lflows,
struct ds *match, struct ds *actions)
{
if (od->nbr) {
+ for (int i = 0; i < od->nbr->n_static_routes; i++) {
+ const struct nbrec_logical_router_static_route *route;
- /* Drop IPv6 multicast traffic that shouldn't be forwarded,
- * i.e., router solicitation and router advertisement.
- */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
- "nd_rs || nd_ra", "drop;");
- if (!od->mcast_info.rtr.relay) {
- return;
- }
-
- struct ovn_igmp_group *igmp_group;
+ route = od->nbr->static_routes[i];
+ struct in6_addr gw_ip6;
+ unsigned int plen;
+ char *error = ipv6_parse_cidr(route->nexthop, &gw_ip6, &plen);
+ if (error || plen != 128) {
+ free(error);
+ continue;
+ }
- LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) {
ds_clear(match);
+ ds_put_format(match, "eth.dst == 00:00:00:00:00:00 && "
+ "ip6 && " REG_NEXT_HOP_IPV6 " == %s",
+ route->nexthop);
+ struct in6_addr sn_addr;
+ struct eth_addr eth_dst;
+ in6_addr_solicited_node(&sn_addr, &gw_ip6);
+ ipv6_multicast_to_ethernet(ð_dst, &sn_addr);
+
+ char sn_addr_s[INET6_ADDRSTRLEN + 1];
+ ipv6_string_mapped(sn_addr_s, &sn_addr);
+
ds_clear(actions);
- if (IN6_IS_ADDR_V4MAPPED(&igmp_group->address)) {
- ds_put_format(match, "ip4 && ip4.dst == %s ",
- igmp_group->mcgroup.name);
- } else {
- ds_put_format(match, "ip6 && ip6.dst == %s ",
- igmp_group->mcgroup.name);
- }
- if (od->mcast_info.rtr.flood_static) {
- ds_put_cstr(actions,
- "clone { "
- "outport = \""MC_STATIC"\"; "
- "ip.ttl--; "
- "next; "
- "};");
- }
- ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
- igmp_group->mcgroup.name);
- ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
- ds_cstr(match), ds_cstr(actions));
- }
+ ds_put_format(actions,
+ "nd_ns { "
+ "eth.dst = "ETH_ADDR_FMT"; "
+ "ip6.dst = %s; "
+ "nd.target = %s; "
+ "output; "
+ "};", ETH_ADDR_ARGS(eth_dst), sn_addr_s,
+ route->nexthop);
- /* If needed, flood unregistered multicast on statically configured
- * ports. Otherwise drop any multicast traffic.
- */
- if (od->mcast_info.rtr.flood_static) {
- ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
- "ip4.mcast || ip6.mcast",
- "clone { "
- "outport = \""MC_STATIC"\"; "
- "ip.ttl--; "
- "next; "
- "};");
- } else {
- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
- "ip4.mcast || ip6.mcast", "drop;");
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ARP_REQUEST, 200,
+ ds_cstr(match), ds_cstr(actions),
+ &route->header_);
}
+
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
+ "eth.dst == 00:00:00:00:00:00 && ip4",
+ "arp { "
+ "eth.dst = ff:ff:ff:ff:ff:ff; "
+ "arp.spa = " REG_SRC_IPV4 "; "
+ "arp.tpa = " REG_NEXT_HOP_IPV4 "; "
+ "arp.op = 1; " /* ARP request */
+ "output; "
+ "};");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
+ "eth.dst == 00:00:00:00:00:00 && ip6",
+ "nd_ns { "
+ "nd.target = " REG_NEXT_HOP_IPV6 "; "
+ "output; "
+ "};");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
}
}
-/* Logical router ingress table POLICY: Policy.
+/* Logical router egress table DELIVERY: Delivery (priority 100-110).
*
- * A packet that arrives at this table is an IP packet that should be
- * permitted/denied/rerouted to the address in the rule's nexthop.
- * This table sets outport to the correct out_port,
- * eth.src to the output port's MAC address,
- * and REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
- * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
- * advances to the next table for ARP/ND resolution. */
+ * Priority 100 rules deliver packets to enabled logical ports.
+ * Priority 110 rules match multicast packets and update the source
+ * mac before delivering to enabled logical ports. IP multicast traffic
+ * bypasses S_ROUTER_IN_IP_ROUTING route lookups.
+ */
static void
-build_ingress_policy_flows_for_lrouter(
- struct ovn_datapath *od, struct hmap *lflows,
- struct hmap *ports)
+build_egress_delivery_flows_for_lrouter_port(
+ struct ovn_port *op, struct hmap *lflows,
+ struct ds *match, struct ds *actions)
{
- if (od->nbr) {
- /* This is a catch-all rule. It has the lowest priority (0)
- * does a match-all("1") and pass-through (next) */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1",
- REG_ECMP_GROUP_ID" = 0; next;");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY_ECMP, 150,
- REG_ECMP_GROUP_ID" == 0", "next;");
+ if (op->nbrp) {
+ if (!lrport_is_enabled(op->nbrp)) {
+ /* Drop packets to disabled logical ports (since logical flow
+ * tables are default-drop). */
+ return;
+ }
- /* Convert routing policies to flows. */
- uint16_t ecmp_group_id = 1;
- for (int i = 0; i < od->nbr->n_policies; i++) {
- const struct nbrec_logical_router_policy *rule
- = od->nbr->policies[i];
- bool is_ecmp_reroute =
- (!strcmp(rule->action, "reroute") && rule->n_nexthops > 1);
+ if (op->derived) {
+ /* No egress packets should be processed in the context of
+ * a chassisredirect port. The chassisredirect port should
+ * be replaced by the l3dgw port in the local output
+ * pipeline stage before egress processing. */
+ return;
+ }
- if (is_ecmp_reroute) {
- build_ecmp_routing_policy_flows(lflows, od, ports, rule,
- ecmp_group_id);
- ecmp_group_id++;
- } else {
- build_routing_policy_flow(lflows, od, ports, rule,
- &rule->header_);
- }
+ /* If multicast relay is enabled then also adjust source mac for IP
+ * multicast traffic.
+ */
+ if (op->od->mcast_info.rtr.relay) {
+ ds_clear(match);
+ ds_clear(actions);
+ ds_put_format(match, "(ip4.mcast || ip6.mcast) && outport == %s",
+ op->json_key);
+ ds_put_format(actions, "eth.src = %s; output;",
+ op->lrp_networks.ea_s);
+ ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 110,
+ ds_cstr(match), ds_cstr(actions));
}
+
+ ds_clear(match);
+ ds_put_format(match, "outport == %s", op->json_key);
+ ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
+ ds_cstr(match), "output;");
}
+
}
-/* Local router ingress table ARP_RESOLVE: ARP Resolution. */
static void
-build_arp_resolve_flows_for_lrouter(
+build_misc_local_traffic_drop_flows_for_lrouter(
struct ovn_datapath *od, struct hmap *lflows)
{
if (od->nbr) {
- /* Multicast packets already have the outport set so just advance to
- * next table (priority 500). */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 500,
- "ip4.mcast || ip6.mcast", "next;");
+ /* L3 admission control: drop multicast and broadcast source, localhost
+ * source or destination, and zero network source or destination
+ * (priority 100). */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
+ "ip4.src_mcast ||"
+ "ip4.src == 255.255.255.255 || "
+ "ip4.src == 127.0.0.0/8 || "
+ "ip4.dst == 127.0.0.0/8 || "
+ "ip4.src == 0.0.0.0/8 || "
+ "ip4.dst == 0.0.0.0/8",
+ "drop;");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
- "get_arp(outport, " REG_NEXT_HOP_IPV4 "); next;");
+ /* Drop ARP packets (priority 85). ARP request packets for router's own
+ * IPs are handled with priority-90 flows.
+ * Drop IPv6 ND packets (priority 85). ND NA packets for router's own
+ * IPs are handled with priority-90 flows.
+ */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 85,
+ "arp || nd", "drop;");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
- "get_nd(outport, " REG_NEXT_HOP_IPV6 "); next;");
- }
-}
+ /* Allow IPv6 multicast traffic that's supposed to reach the
+ * router pipeline (e.g., router solicitations).
+ */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 84, "nd_rs || nd_ra",
+ "next;");
+
+ /* Drop other reserved multicast. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 83,
+ "ip6.mcast_rsvd", "drop;");
+
+ /* Allow other multicast if relay enabled (priority 82). */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 82,
+ "ip4.mcast || ip6.mcast",
+ od->mcast_info.rtr.relay ? "next;" : "drop;");
+
+ /* Drop Ethernet local broadcast. By definition this traffic should
+ * not be forwarded.*/
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
+ "eth.bcast", "drop;");
+
+ /* TTL discard */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
+ "ip4 && ip.ttl == {0, 1}", "drop;");
+
+ /* Pass other traffic not already handled to the next table for
+ * routing. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
+ }
+}
-/* Local router ingress table ARP_RESOLVE: ARP Resolution.
- *
- * Any unicast packet that reaches this table is an IP packet whose
- * next-hop IP address is in REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6
- * (ip4.dst/ipv6.dst is the final destination).
- * This table resolves the IP address in
- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 into an output port in outport and
- * an Ethernet address in eth.dst.
- */
static void
-build_arp_resolve_flows_for_lrouter_port(
+build_dhcpv6_reply_flows_for_lrouter_port(
struct ovn_port *op, struct hmap *lflows,
- struct hmap *ports,
- struct ds *match, struct ds *actions)
+ struct ds *match)
{
- if (op->nbsp && !lsp_is_enabled(op->nbsp)) {
- return;
+ if (op->nbrp && (!op->derived)) {
+ for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+ ds_clear(match);
+ ds_put_format(match, "ip6.dst == %s && udp.src == 547 &&"
+ " udp.dst == 546",
+ op->lrp_networks.ipv6_addrs[i].addr_s);
+ ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
+ ds_cstr(match),
+ "reg0 = 0; handle_dhcpv6_reply;");
+ }
}
- if (op->nbrp) {
- /* This is a logical router port. If next-hop IP address in
- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 matches IP address of this
- * router port, then the packet is intended to eventually be sent
- * to this logical port. Set the destination mac address using
- * this port's mac address.
- *
- * The packet is still in peer's logical pipeline. So the match
- * should be on peer's outport. */
- if (op->peer && op->nbrp->peer) {
- if (op->lrp_networks.n_ipv4_addrs) {
- ds_clear(match);
- ds_put_format(match, "outport == %s && "
- REG_NEXT_HOP_IPV4 "== ",
- op->peer->json_key);
- op_put_v4_networks(match, op, false);
+}
- ds_clear(actions);
- ds_put_format(actions, "eth.dst = %s; next;",
- op->lrp_networks.ea_s);
- ovn_lflow_add_with_hint(lflows, op->peer->od,
- S_ROUTER_IN_ARP_RESOLVE, 100,
- ds_cstr(match), ds_cstr(actions),
- &op->nbrp->header_);
- }
+static void
+build_ipv6_input_flows_for_lrouter_port(
+ struct ovn_port *op, struct hmap *lflows,
+ struct ds *match, struct ds *actions)
+{
+ if (op->nbrp && (!op->derived)) {
+ /* No ingress packets are accepted on a chassisredirect
+ * port, so no need to program flows for that port. */
+ if (op->lrp_networks.n_ipv6_addrs) {
+ /* ICMPv6 echo reply. These flows reply to echo requests
+ * received for the router's IP address. */
+ ds_clear(match);
+ ds_put_cstr(match, "ip6.dst == ");
+ op_put_v6_networks(match, op);
+ ds_put_cstr(match, " && icmp6.type == 128 && icmp6.code == 0");
- if (op->lrp_networks.n_ipv6_addrs) {
- ds_clear(match);
- ds_put_format(match, "outport == %s && "
- REG_NEXT_HOP_IPV6 " == ",
- op->peer->json_key);
- op_put_v6_networks(match, op);
+ const char *lrp_actions =
+ "ip6.dst <-> ip6.src; "
+ "ip.ttl = 255; "
+ "icmp6.type = 129; "
+ "flags.loopback = 1; "
+ "next; ";
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
+ ds_cstr(match), lrp_actions,
+ &op->nbrp->header_);
+ }
- ds_clear(actions);
- ds_put_format(actions, "eth.dst = %s; next;",
- op->lrp_networks.ea_s);
- ovn_lflow_add_with_hint(lflows, op->peer->od,
- S_ROUTER_IN_ARP_RESOLVE, 100,
- ds_cstr(match), ds_cstr(actions),
- &op->nbrp->header_);
+ /* ND reply. These flows reply to ND solicitations for the
+ * router's own IP address. */
+ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+ ds_clear(match);
+ if (op->od->l3dgw_port && op == op->od->l3dgw_port
+ && op->od->l3redirect_port) {
+ /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
+ * should only be sent from the gateway chassi, so that
+ * upstream MAC learning points to the gateway chassis.
+ * Also need to avoid generation of multiple ND replies
+ * from different chassis. */
+ ds_put_format(match, "is_chassis_resident(%s)",
+ op->od->l3redirect_port->json_key);
}
+
+ build_lrouter_nd_flow(op->od, op, "nd_na_router",
+ op->lrp_networks.ipv6_addrs[i].addr_s,
+ op->lrp_networks.ipv6_addrs[i].sn_addr_s,
+ REG_INPORT_ETH_ADDR, match, false, 90,
+ &op->nbrp->header_, lflows);
}
- if (!op->derived && op->od->l3redirect_port) {
- const char *redirect_type = smap_get(&op->nbrp->options,
- "redirect-type");
- if (redirect_type && !strcasecmp(redirect_type, "bridged")) {
- /* Packet is on a non gateway chassis and
- * has an unresolved ARP on a network behind gateway
- * chassis attached router port. Since, redirect type
- * is "bridged", instead of calling "get_arp"
- * on this node, we will redirect the packet to gateway
- * chassis, by setting destination mac router port mac.*/
+ /* UDP/TCP port unreachable */
+ if (!smap_get(&op->od->nbr->options, "chassis")
+ && !op->od->l3dgw_port) {
+ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
ds_clear(match);
- ds_put_format(match, "outport == %s && "
- "!is_chassis_resident(%s)", op->json_key,
- op->od->l3redirect_port->json_key);
- ds_clear(actions);
- ds_put_format(actions, "eth.dst = %s; next;",
- op->lrp_networks.ea_s);
+ ds_put_format(match,
+ "ip6 && ip6.dst == %s && !ip.later_frag && tcp",
+ op->lrp_networks.ipv6_addrs[i].addr_s);
+ const char *action = "tcp_reset {"
+ "eth.dst <-> eth.src; "
+ "ip6.dst <-> ip6.src; "
+ "next; };";
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+ 80, ds_cstr(match), action,
+ &op->nbrp->header_);
- ovn_lflow_add_with_hint(lflows, op->od,
- S_ROUTER_IN_ARP_RESOLVE, 50,
- ds_cstr(match), ds_cstr(actions),
+ ds_clear(match);
+ ds_put_format(match,
+ "ip6 && ip6.dst == %s && !ip.later_frag && udp",
+ op->lrp_networks.ipv6_addrs[i].addr_s);
+ action = "icmp6 {"
+ "eth.dst <-> eth.src; "
+ "ip6.dst <-> ip6.src; "
+ "ip.ttl = 255; "
+ "icmp6.type = 1; "
+ "icmp6.code = 4; "
+ "next; };";
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+ 80, ds_cstr(match), action,
+ &op->nbrp->header_);
+
+ ds_clear(match);
+ ds_put_format(match,
+ "ip6 && ip6.dst == %s && !ip.later_frag",
+ op->lrp_networks.ipv6_addrs[i].addr_s);
+ action = "icmp6 {"
+ "eth.dst <-> eth.src; "
+ "ip6.dst <-> ip6.src; "
+ "ip.ttl = 255; "
+ "icmp6.type = 1; "
+ "icmp6.code = 3; "
+ "next; };";
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+ 70, ds_cstr(match), action,
&op->nbrp->header_);
}
}
- /* Drop IP traffic destined to router owned IPs. Part of it is dropped
- * in stage "lr_in_ip_input" but traffic that could have been unSNATed
- * but didn't match any existing session might still end up here.
- *
- * Priority 1.
- */
- build_lrouter_drop_own_dest(op, S_ROUTER_IN_ARP_RESOLVE, 1, true,
- lflows);
- } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp)
- && strcmp(op->nbsp->type, "virtual")) {
- /* This is a logical switch port that backs a VM or a container.
- * Extract its addresses. For each of the address, go through all
- * the router ports attached to the switch (to which this port
- * connects) and if the address in question is reachable from the
- * router port, add an ARP/ND entry in that router's pipeline. */
+ /* ICMPv6 time exceeded */
+ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+ /* skip link-local address */
+ if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
+ continue;
+ }
- for (size_t i = 0; i < op->n_lsp_addrs; i++) {
- const char *ea_s = op->lsp_addrs[i].ea_s;
- for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
- const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
- for (size_t k = 0; k < op->od->n_router_ports; k++) {
- /* Get the Logical_Router_Port that the
- * Logical_Switch_Port is connected to, as
- * 'peer'. */
- const char *peer_name = smap_get(
- &op->od->router_ports[k]->nbsp->options,
- "router-port");
- if (!peer_name) {
- continue;
- }
+ ds_clear(match);
+ ds_clear(actions);
- struct ovn_port *peer = ovn_port_find(ports, peer_name);
- if (!peer || !peer->nbrp) {
- continue;
- }
+ ds_put_format(match,
+ "inport == %s && ip6 && "
+ "ip6.src == %s/%d && "
+ "ip.ttl == {0, 1} && !ip.later_frag",
+ op->json_key,
+ op->lrp_networks.ipv6_addrs[i].network_s,
+ op->lrp_networks.ipv6_addrs[i].plen);
+ ds_put_format(actions,
+ "icmp6 {"
+ "eth.dst <-> eth.src; "
+ "ip6.dst = ip6.src; "
+ "ip6.src = %s; "
+ "ip.ttl = 255; "
+ "icmp6.type = 3; /* Time exceeded */ "
+ "icmp6.code = 0; /* TTL exceeded in transit */ "
+ "next; };",
+ op->lrp_networks.ipv6_addrs[i].addr_s);
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
+ ds_cstr(match), ds_cstr(actions),
+ &op->nbrp->header_);
+ }
+ }
- if (!find_lrp_member_ip(peer, ip_s)) {
- continue;
- }
+}
- ds_clear(match);
- ds_put_format(match, "outport == %s && "
- REG_NEXT_HOP_IPV4 " == %s",
- peer->json_key, ip_s);
+static void
+build_lrouter_arp_nd_for_datapath(struct ovn_datapath *od,
+ struct hmap *lflows)
+{
+ if (od->nbr) {
- ds_clear(actions);
- ds_put_format(actions, "eth.dst = %s; next;", ea_s);
- ovn_lflow_add_with_hint(lflows, peer->od,
- S_ROUTER_IN_ARP_RESOLVE, 100,
- ds_cstr(match),
- ds_cstr(actions),
- &op->nbsp->header_);
- }
+ /* Priority-90-92 flows handle ARP requests and ND packets. Most are
+ * per logical port but DNAT addresses can be handled per datapath
+ * for non gateway router ports.
+ *
+ * Priority 91 and 92 flows are added for each gateway router
+ * port to handle the special cases. In case we get the packet
+ * on a regular port, just reply with the port's ETH address.
+ */
+ for (int i = 0; i < od->nbr->n_nat; i++) {
+ struct ovn_nat *nat_entry = &od->nat_entries[i];
+
+ /* Skip entries we failed to parse. */
+ if (!nat_entry_is_valid(nat_entry)) {
+ continue;
}
- for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
- const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
- for (size_t k = 0; k < op->od->n_router_ports; k++) {
- /* Get the Logical_Router_Port that the
- * Logical_Switch_Port is connected to, as
- * 'peer'. */
- const char *peer_name = smap_get(
- &op->od->router_ports[k]->nbsp->options,
- "router-port");
- if (!peer_name) {
- continue;
- }
+ /* Skip SNAT entries for now, we handle unique SNAT IPs separately
+ * below.
+ */
+ if (!strcmp(nat_entry->nb->type, "snat")) {
+ continue;
+ }
+ build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
+ }
- struct ovn_port *peer = ovn_port_find(ports, peer_name);
- if (!peer || !peer->nbrp) {
- continue;
- }
+ /* Now handle SNAT entries too, one per unique SNAT IP. */
+ struct shash_node *snat_snode;
+ SHASH_FOR_EACH (snat_snode, &od->snat_ips) {
+ struct ovn_snat_ip *snat_ip = snat_snode->data;
- if (!find_lrp_member_ip(peer, ip_s)) {
- continue;
- }
+ if (ovs_list_is_empty(&snat_ip->snat_entries)) {
+ continue;
+ }
- ds_clear(match);
- ds_put_format(match, "outport == %s && "
- REG_NEXT_HOP_IPV6 " == %s",
- peer->json_key, ip_s);
+ struct ovn_nat *nat_entry =
+ CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
+ struct ovn_nat, ext_addr_list_node);
+ build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
+ }
+ }
+}
- ds_clear(actions);
- ds_put_format(actions, "eth.dst = %s; next;", ea_s);
- ovn_lflow_add_with_hint(lflows, peer->od,
- S_ROUTER_IN_ARP_RESOLVE, 100,
- ds_cstr(match),
- ds_cstr(actions),
- &op->nbsp->header_);
- }
- }
+/* Logical router ingress table 3: IP Input for IPv4. */
+static void
+build_lrouter_ipv4_ip_input(struct ovn_port *op,
+ struct hmap *lflows,
+ struct ds *match, struct ds *actions)
+{
+ /* No ingress packets are accepted on a chassisredirect
+ * port, so no need to program flows for that port. */
+ if (op->nbrp && (!op->derived)) {
+ if (op->lrp_networks.n_ipv4_addrs) {
+ /* L3 admission control: drop packets that originate from an
+ * IPv4 address owned by the router or a broadcast address
+ * known to the router (priority 100). */
+ ds_clear(match);
+ ds_put_cstr(match, "ip4.src == ");
+ op_put_v4_networks(match, op, true);
+ ds_put_cstr(match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
+ ds_cstr(match), "drop;",
+ &op->nbrp->header_);
+
+ /* ICMP echo reply. These flows reply to ICMP echo requests
+ * received for the router's IP address. Since packets only
+ * get here as part of the logical router datapath, the inport
+ * (i.e. the incoming locally attached net) does not matter.
+ * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
+ ds_clear(match);
+ ds_put_cstr(match, "ip4.dst == ");
+ op_put_v4_networks(match, op, false);
+ ds_put_cstr(match, " && icmp4.type == 8 && icmp4.code == 0");
+
+ const char * icmp_actions = "ip4.dst <-> ip4.src; "
+ "ip.ttl = 255; "
+ "icmp4.type = 0; "
+ "flags.loopback = 1; "
+ "next; ";
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
+ ds_cstr(match), icmp_actions,
+ &op->nbrp->header_);
}
- } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp)
- && !strcmp(op->nbsp->type, "virtual")) {
- /* This is a virtual port. Add ARP replies for the virtual ip with
- * the mac of the present active virtual parent.
- * If the logical port doesn't have virtual parent set in
- * Port_Binding table, then add the flow to set eth.dst to
- * 00:00:00:00:00:00 and advance to next table so that ARP is
- * resolved by router pipeline using the arp{} action.
- * The MAC_Binding entry for the virtual ip might be invalid. */
- ovs_be32 ip;
- const char *vip = smap_get(&op->nbsp->options,
- "virtual-ip");
- const char *virtual_parents = smap_get(&op->nbsp->options,
- "virtual-parents");
- if (!vip || !virtual_parents ||
- !ip_parse(vip, &ip) || !op->sb) {
- return;
+ /* ICMP time exceeded */
+ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+ ds_clear(match);
+ ds_clear(actions);
+
+ ds_put_format(match,
+ "inport == %s && ip4 && "
+ "ip.ttl == {0, 1} && !ip.later_frag", op->json_key);
+ ds_put_format(actions,
+ "icmp4 {"
+ "eth.dst <-> eth.src; "
+ "icmp4.type = 11; /* Time exceeded */ "
+ "icmp4.code = 0; /* TTL exceeded in transit */ "
+ "ip4.dst = ip4.src; "
+ "ip4.src = %s; "
+ "ip.ttl = 255; "
+ "next; };",
+ op->lrp_networks.ipv4_addrs[i].addr_s);
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
+ ds_cstr(match), ds_cstr(actions),
+ &op->nbrp->header_);
}
- if (!op->sb->virtual_parent || !op->sb->virtual_parent[0] ||
- !op->sb->chassis) {
- /* The virtual port is not claimed yet. */
- for (size_t i = 0; i < op->od->n_router_ports; i++) {
- const char *peer_name = smap_get(
- &op->od->router_ports[i]->nbsp->options,
- "router-port");
- if (!peer_name) {
- continue;
- }
+ /* ARP reply. These flows reply to ARP requests for the router's own
+ * IP address. */
+ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+ ds_clear(match);
+ ds_put_format(match, "arp.spa == %s/%u",
+ op->lrp_networks.ipv4_addrs[i].network_s,
+ op->lrp_networks.ipv4_addrs[i].plen);
- struct ovn_port *peer = ovn_port_find(ports, peer_name);
- if (!peer || !peer->nbrp) {
- continue;
+ if (op->od->l3dgw_port && op->od->l3redirect_port && op->peer
+ && op->peer->od->n_localnet_ports) {
+ bool add_chassis_resident_check = false;
+ if (op == op->od->l3dgw_port) {
+ /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
+ * should only be sent from the gateway chassis, so that
+ * upstream MAC learning points to the gateway chassis.
+ * Also need to avoid generation of multiple ARP responses
+ * from different chassis. */
+ add_chassis_resident_check = true;
+ } else {
+ /* Check if the option 'reside-on-redirect-chassis'
+ * is set to true on the router port. If set to true
+ * and if peer's logical switch has a localnet port, it
+ * means the router pipeline for the packets from
+ * peer's logical switch is be run on the chassis
+ * hosting the gateway port and it should reply to the
+ * ARP requests for the router port IPs.
+ */
+ add_chassis_resident_check = smap_get_bool(
+ &op->nbrp->options,
+ "reside-on-redirect-chassis", false);
}
- if (find_lrp_member_ip(peer, vip)) {
- ds_clear(match);
- ds_put_format(match, "outport == %s && "
- REG_NEXT_HOP_IPV4 " == %s",
- peer->json_key, vip);
-
- const char *arp_actions =
- "eth.dst = 00:00:00:00:00:00; next;";
- ovn_lflow_add_with_hint(lflows, peer->od,
- S_ROUTER_IN_ARP_RESOLVE, 100,
- ds_cstr(match),
- arp_actions,
- &op->nbsp->header_);
- break;
+ if (add_chassis_resident_check) {
+ ds_put_format(match, " && is_chassis_resident(%s)",
+ op->od->l3redirect_port->json_key);
}
}
- } else {
- struct ovn_port *vp =
- ovn_port_find(ports, op->sb->virtual_parent);
- if (!vp || !vp->nbsp) {
- return;
- }
- for (size_t i = 0; i < vp->n_lsp_addrs; i++) {
- bool found_vip_network = false;
- const char *ea_s = vp->lsp_addrs[i].ea_s;
- for (size_t j = 0; j < vp->od->n_router_ports; j++) {
- /* Get the Logical_Router_Port that the
- * Logical_Switch_Port is connected to, as
- * 'peer'. */
- const char *peer_name = smap_get(
- &vp->od->router_ports[j]->nbsp->options,
- "router-port");
- if (!peer_name) {
- continue;
- }
+ build_lrouter_arp_flow(op->od, op,
+ op->lrp_networks.ipv4_addrs[i].addr_s,
+ REG_INPORT_ETH_ADDR, match, false, 90,
+ &op->nbrp->header_, lflows);
+ }
- struct ovn_port *peer =
- ovn_port_find(ports, peer_name);
- if (!peer || !peer->nbrp) {
- continue;
- }
+ /* A set to hold all load-balancer vips that need ARP responses. */
+ struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
+ struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
+ get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6);
- if (!find_lrp_member_ip(peer, vip)) {
- continue;
- }
-
- ds_clear(match);
- ds_put_format(match, "outport == %s && "
- REG_NEXT_HOP_IPV4 " == %s",
- peer->json_key, vip);
+ const char *ip_address;
+ SSET_FOR_EACH (ip_address, &all_ips_v4) {
+ ds_clear(match);
+ if (op == op->od->l3dgw_port) {
+ ds_put_format(match, "is_chassis_resident(%s)",
+ op->od->l3redirect_port->json_key);
+ }
- ds_clear(actions);
- ds_put_format(actions, "eth.dst = %s; next;", ea_s);
- ovn_lflow_add_with_hint(lflows, peer->od,
- S_ROUTER_IN_ARP_RESOLVE, 100,
- ds_cstr(match),
- ds_cstr(actions),
- &op->nbsp->header_);
- found_vip_network = true;
- break;
- }
+ build_lrouter_arp_flow(op->od, op,
+ ip_address, REG_INPORT_ETH_ADDR,
+ match, false, 90, NULL, lflows);
+ }
- if (found_vip_network) {
- break;
- }
+ SSET_FOR_EACH (ip_address, &all_ips_v6) {
+ ds_clear(match);
+ if (op == op->od->l3dgw_port) {
+ ds_put_format(match, "is_chassis_resident(%s)",
+ op->od->l3redirect_port->json_key);
}
+
+ build_lrouter_nd_flow(op->od, op, "nd_na",
+ ip_address, NULL, REG_INPORT_ETH_ADDR,
+ match, false, 90, NULL, lflows);
}
- } else if (lsp_is_router(op->nbsp)) {
- /* This is a logical switch port that connects to a router. */
- /* The peer of this switch port is the router port for which
- * we need to add logical flows such that it can resolve
- * ARP entries for all the other router ports connected to
- * the switch in question. */
+ sset_destroy(&all_ips_v4);
+ sset_destroy(&all_ips_v6);
- const char *peer_name = smap_get(&op->nbsp->options,
- "router-port");
- if (!peer_name) {
- return;
- }
+ if (!smap_get(&op->od->nbr->options, "chassis")
+ && !op->od->l3dgw_port) {
+ /* UDP/TCP port unreachable. */
+ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+ ds_clear(match);
+ ds_put_format(match,
+ "ip4 && ip4.dst == %s && !ip.later_frag && udp",
+ op->lrp_networks.ipv4_addrs[i].addr_s);
+ const char *action = "icmp4 {"
+ "eth.dst <-> eth.src; "
+ "ip4.dst <-> ip4.src; "
+ "ip.ttl = 255; "
+ "icmp4.type = 3; "
+ "icmp4.code = 3; "
+ "next; };";
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+ 80, ds_cstr(match), action,
+ &op->nbrp->header_);
- struct ovn_port *peer = ovn_port_find(ports, peer_name);
- if (!peer || !peer->nbrp) {
- return;
+ ds_clear(match);
+ ds_put_format(match,
+ "ip4 && ip4.dst == %s && !ip.later_frag && tcp",
+ op->lrp_networks.ipv4_addrs[i].addr_s);
+ action = "tcp_reset {"
+ "eth.dst <-> eth.src; "
+ "ip4.dst <-> ip4.src; "
+ "next; };";
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+ 80, ds_cstr(match), action,
+ &op->nbrp->header_);
+
+ ds_clear(match);
+ ds_put_format(match,
+ "ip4 && ip4.dst == %s && !ip.later_frag",
+ op->lrp_networks.ipv4_addrs[i].addr_s);
+ action = "icmp4 {"
+ "eth.dst <-> eth.src; "
+ "ip4.dst <-> ip4.src; "
+ "ip.ttl = 255; "
+ "icmp4.type = 3; "
+ "icmp4.code = 2; "
+ "next; };";
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+ 70, ds_cstr(match), action,
+ &op->nbrp->header_);
+ }
}
- if (peer->od->nbr &&
- smap_get_bool(&peer->od->nbr->options,
- "dynamic_neigh_routers", false)) {
+ /* Drop IP traffic destined to router owned IPs except if the IP is
+ * also a SNAT IP. Those are dropped later, in stage
+ * "lr_in_arp_resolve", if unSNAT was unsuccessful.
+ *
+ * Priority 60.
+ */
+ build_lrouter_drop_own_dest(op, S_ROUTER_IN_IP_INPUT, 60, false,
+ lflows);
+
+ /* ARP / ND handling for external IP addresses.
+ *
+ * DNAT and SNAT IP addresses are external IP addresses that need ARP
+ * handling.
+ *
+ * These are already taken care globally, per router. The only
+ * exception is on the l3dgw_port where we might need to use a
+ * different ETH address.
+ */
+ if (op != op->od->l3dgw_port) {
return;
}
- for (size_t i = 0; i < op->od->n_router_ports; i++) {
- const char *router_port_name = smap_get(
- &op->od->router_ports[i]->nbsp->options,
- "router-port");
- struct ovn_port *router_port = ovn_port_find(ports,
- router_port_name);
- if (!router_port || !router_port->nbrp) {
+ for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
+ struct ovn_nat *nat_entry = &op->od->nat_entries[i];
+
+ /* Skip entries we failed to parse. */
+ if (!nat_entry_is_valid(nat_entry)) {
continue;
}
- /* Skip the router port under consideration. */
- if (router_port == peer) {
- continue;
+ /* Skip SNAT entries for now, we handle unique SNAT IPs separately
+ * below.
+ */
+ if (!strcmp(nat_entry->nb->type, "snat")) {
+ continue;
}
+ build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows);
+ }
- if (router_port->lrp_networks.n_ipv4_addrs) {
- ds_clear(match);
- ds_put_format(match, "outport == %s && "
- REG_NEXT_HOP_IPV4 " == ",
- peer->json_key);
- op_put_v4_networks(match, router_port, false);
+ /* Now handle SNAT entries too, one per unique SNAT IP. */
+ struct shash_node *snat_snode;
+ SHASH_FOR_EACH (snat_snode, &op->od->snat_ips) {
+ struct ovn_snat_ip *snat_ip = snat_snode->data;
- ds_clear(actions);
- ds_put_format(actions, "eth.dst = %s; next;",
- router_port->lrp_networks.ea_s);
- ovn_lflow_add_with_hint(lflows, peer->od,
- S_ROUTER_IN_ARP_RESOLVE, 100,
- ds_cstr(match), ds_cstr(actions),
- &op->nbsp->header_);
+ if (ovs_list_is_empty(&snat_ip->snat_entries)) {
+ continue;
}
- if (router_port->lrp_networks.n_ipv6_addrs) {
- ds_clear(match);
- ds_put_format(match, "outport == %s && "
- REG_NEXT_HOP_IPV6 " == ",
- peer->json_key);
- op_put_v6_networks(match, router_port);
-
- ds_clear(actions);
- ds_put_format(actions, "eth.dst = %s; next;",
- router_port->lrp_networks.ea_s);
- ovn_lflow_add_with_hint(lflows, peer->od,
- S_ROUTER_IN_ARP_RESOLVE, 100,
- ds_cstr(match), ds_cstr(actions),
- &op->nbsp->header_);
- }
+ struct ovn_nat *nat_entry =
+ CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
+ struct ovn_nat, ext_addr_list_node);
+ build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows);
}
}
-
}
-/* Local router ingress table CHK_PKT_LEN: Check packet length.
- *
- * Any IPv4 packet with outport set to the distributed gateway
- * router port, check the packet length and store the result in the
- * 'REGBIT_PKT_LARGER' register bit.
- *
- * Local router ingress table LARGER_PKTS: Handle larger packets.
- *
- * Any IPv4 packet with outport set to the distributed gateway
- * router port and the 'REGBIT_PKT_LARGER' register bit is set,
- * generate ICMPv4 packet with type 3 (Destination Unreachable) and
- * code 4 (Fragmentation needed).
- * */
+/* NAT, Defrag and load balancing. */
static void
-build_check_pkt_len_flows_for_lrouter(
- struct ovn_datapath *od, struct hmap *lflows,
- struct hmap *ports,
- struct ds *match, struct ds *actions)
+build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od,
+ struct hmap *lflows,
+ struct shash *meter_groups,
+ struct hmap *lbs,
+ struct ds *match, struct ds *actions)
{
if (od->nbr) {
/* Packets are allowed by default. */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 0, "1",
- "next;");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_LARGER_PKTS, 0, "1",
- "next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;");
- if (od->l3dgw_port && od->l3redirect_port) {
- int gw_mtu = 0;
- if (od->l3dgw_port->nbrp) {
- gw_mtu = smap_get_int(&od->l3dgw_port->nbrp->options,
- "gateway_mtu", 0);
- }
- /* Add the flows only if gateway_mtu is configured. */
- if (gw_mtu <= 0) {
- return;
- }
+ /* Send the IPv6 NS packets to next table. When ovn-controller
+ * generates IPv6 NS (for the action - nd_ns{}), the injected
+ * packet would go through conntrack - which is not required. */
+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, "nd_ns", "next;");
- ds_clear(match);
- ds_put_format(match, "outport == %s", od->l3dgw_port->json_key);
+ /* NAT rules are only valid on Gateway routers and routers with
+ * l3dgw_port (router has a port with gateway chassis
+ * specified). */
+ if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
+ return;
+ }
- ds_clear(actions);
- ds_put_format(actions,
- REGBIT_PKT_LARGER" = check_pkt_larger(%d);"
- " next;", gw_mtu + VLAN_ETH_HEADER_LEN);
- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 50,
- ds_cstr(match), ds_cstr(actions),
- &od->l3dgw_port->nbrp->header_);
+ struct sset nat_entries = SSET_INITIALIZER(&nat_entries);
- for (size_t i = 0; i < od->nbr->n_ports; i++) {
- struct ovn_port *rp = ovn_port_find(ports,
- od->nbr->ports[i]->name);
- if (!rp || rp == od->l3dgw_port) {
- continue;
- }
+ bool dnat_force_snat_ip =
+ !lport_addresses_is_empty(&od->dnat_force_snat_addrs);
+ bool lb_force_snat_ip =
+ !lport_addresses_is_empty(&od->lb_force_snat_addrs);
- if (rp->lrp_networks.ipv4_addrs) {
- ds_clear(match);
- ds_put_format(match, "inport == %s && outport == %s"
- " && ip4 && "REGBIT_PKT_LARGER,
- rp->json_key, od->l3dgw_port->json_key);
+ for (int i = 0; i < od->nbr->n_nat; i++) {
+ const struct nbrec_nat *nat;
- ds_clear(actions);
- /* Set icmp4.frag_mtu to gw_mtu */
- ds_put_format(actions,
- "icmp4_error {"
- REGBIT_EGRESS_LOOPBACK" = 1; "
- "eth.dst = %s; "
- "ip4.dst = ip4.src; "
- "ip4.src = %s; "
- "ip.ttl = 255; "
- "icmp4.type = 3; /* Destination Unreachable. */ "
- "icmp4.code = 4; /* Frag Needed and DF was Set. */ "
- "icmp4.frag_mtu = %d; "
- "next(pipeline=ingress, table=%d); };",
- rp->lrp_networks.ea_s,
- rp->lrp_networks.ipv4_addrs[0].addr_s,
- gw_mtu,
- ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
- ovn_lflow_add_with_hint(lflows, od,
- S_ROUTER_IN_LARGER_PKTS, 50,
- ds_cstr(match), ds_cstr(actions),
- &rp->nbrp->header_);
- }
+ nat = od->nbr->nat[i];
- if (rp->lrp_networks.ipv6_addrs) {
- ds_clear(match);
- ds_put_format(match, "inport == %s && outport == %s"
- " && ip6 && "REGBIT_PKT_LARGER,
- rp->json_key, od->l3dgw_port->json_key);
+ ovs_be32 ip, mask;
+ struct in6_addr ipv6, mask_v6, v6_exact = IN6ADDR_EXACT_INIT;
+ bool is_v6 = false;
+ bool stateless = lrouter_nat_is_stateless(nat);
+ struct nbrec_address_set *allowed_ext_ips =
+ nat->allowed_ext_ips;
+ struct nbrec_address_set *exempted_ext_ips =
+ nat->exempted_ext_ips;
- ds_clear(actions);
- /* Set icmp6.frag_mtu to gw_mtu */
- ds_put_format(actions,
- "icmp6_error {"
- REGBIT_EGRESS_LOOPBACK" = 1; "
- "eth.dst = %s; "
- "ip6.dst = ip6.src; "
- "ip6.src = %s; "
- "ip.ttl = 255; "
- "icmp6.type = 2; /* Packet Too Big. */ "
- "icmp6.code = 0; "
- "icmp6.frag_mtu = %d; "
- "next(pipeline=ingress, table=%d); };",
- rp->lrp_networks.ea_s,
- rp->lrp_networks.ipv6_addrs[0].addr_s,
- gw_mtu,
- ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
- ovn_lflow_add_with_hint(lflows, od,
- S_ROUTER_IN_LARGER_PKTS, 50,
- ds_cstr(match), ds_cstr(actions),
- &rp->nbrp->header_);
- }
+ if (allowed_ext_ips && exempted_ext_ips) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+ VLOG_WARN_RL(&rl, "NAT rule: "UUID_FMT" not applied, since "
+ "both allowed and exempt external ips set",
+ UUID_ARGS(&(nat->header_.uuid)));
+ continue;
}
- }
- }
-}
-/* Logical router ingress table GW_REDIRECT: Gateway redirect.
- *
- * For traffic with outport equal to the l3dgw_port
- * on a distributed router, this table redirects a subset
- * of the traffic to the l3redirect_port which represents
- * the central instance of the l3dgw_port.
- */
-static void
-build_gateway_redirect_flows_for_lrouter(
- struct ovn_datapath *od, struct hmap *lflows,
- struct ds *match, struct ds *actions)
-{
- if (od->nbr) {
- if (od->l3dgw_port && od->l3redirect_port) {
- const struct ovsdb_idl_row *stage_hint = NULL;
+ char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
+ if (error || mask != OVS_BE32_MAX) {
+ free(error);
+ error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6);
+ if (error || memcmp(&mask_v6, &v6_exact, sizeof(mask_v6))) {
+ /* Invalid for both IPv4 and IPv6 */
+ static struct vlog_rate_limit rl =
+ VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "bad external ip %s for nat",
+ nat->external_ip);
+ free(error);
+ continue;
+ }
+ /* It was an invalid IPv4 address, but valid IPv6.
+ * Treat the rest of the handling of this NAT rule
+ * as IPv6. */
+ is_v6 = true;
+ }
- if (od->l3dgw_port->nbrp) {
- stage_hint = &od->l3dgw_port->nbrp->header_;
+ /* Check the validity of nat->logical_ip. 'logical_ip' can
+ * be a subnet when the type is "snat". */
+ int cidr_bits;
+ if (is_v6) {
+ error = ipv6_parse_masked(nat->logical_ip, &ipv6, &mask_v6);
+ cidr_bits = ipv6_count_cidr_bits(&mask_v6);
+ } else {
+ error = ip_parse_masked(nat->logical_ip, &ip, &mask);
+ cidr_bits = ip_count_cidr_bits(mask);
+ }
+ if (!strcmp(nat->type, "snat")) {
+ if (error) {
+ /* Invalid for both IPv4 and IPv6 */
+ static struct vlog_rate_limit rl =
+ VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
+ "in router "UUID_FMT"",
+ nat->logical_ip, UUID_ARGS(&od->key));
+ free(error);
+ continue;
+ }
+ } else {
+ if (error || (!is_v6 && mask != OVS_BE32_MAX)
+ || (is_v6 && memcmp(&mask_v6, &v6_exact,
+ sizeof mask_v6))) {
+ /* Invalid for both IPv4 and IPv6 */
+ static struct vlog_rate_limit rl =
+ VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
+ ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
+ free(error);
+ continue;
+ }
}
- /* For traffic with outport == l3dgw_port, if the
- * packet did not match any higher priority redirect
- * rule, then the traffic is redirected to the central
- * instance of the l3dgw_port. */
- ds_clear(match);
- ds_put_format(match, "outport == %s",
- od->l3dgw_port->json_key);
- ds_clear(actions);
- ds_put_format(actions, "outport = %s; next;",
- od->l3redirect_port->json_key);
- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50,
- ds_cstr(match), ds_cstr(actions),
- stage_hint);
- }
+ /* For distributed router NAT, determine whether this NAT rule
+ * satisfies the conditions for distributed NAT processing. */
+ bool distributed = false;
+ struct eth_addr mac;
+ if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
+ nat->logical_port && nat->external_mac) {
+ if (eth_addr_from_string(nat->external_mac, &mac)) {
+ distributed = true;
+ } else {
+ static struct vlog_rate_limit rl =
+ VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
+ ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
+ continue;
+ }
+ }
- /* Packets are allowed by default. */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
- }
-}
+ /* Ingress UNSNAT table: It is for already established connections'
+ * reverse traffic. i.e., SNAT has already been done in egress
+ * pipeline and now the packet has entered the ingress pipeline as
+ * part of a reply. We undo the SNAT here.
+ *
+ * Undoing SNAT has to happen before DNAT processing. This is
+ * because when the packet was DNATed in ingress pipeline, it did
+ * not know about the possibility of eventual additional SNAT in
+ * egress pipeline. */
+ if (!strcmp(nat->type, "snat")
+ || !strcmp(nat->type, "dnat_and_snat")) {
+ if (!od->l3dgw_port) {
+ /* Gateway router. */
+ ds_clear(match);
+ ds_clear(actions);
+ ds_put_format(match, "ip && ip%s.dst == %s",
+ is_v6 ? "6" : "4",
+ nat->external_ip);
+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+ ds_put_format(actions, "ip%s.dst=%s; next;",
+ is_v6 ? "6" : "4", nat->logical_ip);
+ } else {
+ ds_put_cstr(actions, "ct_snat;");
+ }
-/* Local router ingress table ARP_REQUEST: ARP request.
- *
- * In the common case where the Ethernet destination has been resolved,
- * this table outputs the packet (priority 0). Otherwise, it composes
- * and sends an ARP/IPv6 NA request (priority 100). */
-static void
-build_arp_request_flows_for_lrouter(
- struct ovn_datapath *od, struct hmap *lflows,
- struct ds *match, struct ds *actions)
-{
- if (od->nbr) {
- for (int i = 0; i < od->nbr->n_static_routes; i++) {
- const struct nbrec_logical_router_static_route *route;
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT,
+ 90, ds_cstr(match),
+ ds_cstr(actions),
+ &nat->header_);
+ } else {
+ /* Distributed router. */
- route = od->nbr->static_routes[i];
- struct in6_addr gw_ip6;
- unsigned int plen;
- char *error = ipv6_parse_cidr(route->nexthop, &gw_ip6, &plen);
- if (error || plen != 128) {
- free(error);
- continue;
- }
+ /* Traffic received on l3dgw_port is subject to NAT. */
+ ds_clear(match);
+ ds_clear(actions);
+ ds_put_format(match, "ip && ip%s.dst == %s"
+ " && inport == %s",
+ is_v6 ? "6" : "4",
+ nat->external_ip,
+ od->l3dgw_port->json_key);
+ if (!distributed && od->l3redirect_port) {
+ /* Flows for NAT rules that are centralized are only
+ * programmed on the gateway chassis. */
+ ds_put_format(match, " && is_chassis_resident(%s)",
+ od->l3redirect_port->json_key);
+ }
- ds_clear(match);
- ds_put_format(match, "eth.dst == 00:00:00:00:00:00 && "
- "ip6 && " REG_NEXT_HOP_IPV6 " == %s",
- route->nexthop);
- struct in6_addr sn_addr;
- struct eth_addr eth_dst;
- in6_addr_solicited_node(&sn_addr, &gw_ip6);
- ipv6_multicast_to_ethernet(ð_dst, &sn_addr);
+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+ ds_put_format(actions, "ip%s.dst=%s; next;",
+ is_v6 ? "6" : "4", nat->logical_ip);
+ } else {
+ ds_put_cstr(actions, "ct_snat;");
+ }
- char sn_addr_s[INET6_ADDRSTRLEN + 1];
- ipv6_string_mapped(sn_addr_s, &sn_addr);
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT,
+ 100,
+ ds_cstr(match), ds_cstr(actions),
+ &nat->header_);
+ }
+ }
- ds_clear(actions);
- ds_put_format(actions,
- "nd_ns { "
- "eth.dst = "ETH_ADDR_FMT"; "
- "ip6.dst = %s; "
- "nd.target = %s; "
- "output; "
- "};", ETH_ADDR_ARGS(eth_dst), sn_addr_s,
- route->nexthop);
-
- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ARP_REQUEST, 200,
- ds_cstr(match), ds_cstr(actions),
- &route->header_);
- }
-
- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
- "eth.dst == 00:00:00:00:00:00 && ip4",
- "arp { "
- "eth.dst = ff:ff:ff:ff:ff:ff; "
- "arp.spa = " REG_SRC_IPV4 "; "
- "arp.tpa = " REG_NEXT_HOP_IPV4 "; "
- "arp.op = 1; " /* ARP request */
- "output; "
- "};");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
- "eth.dst == 00:00:00:00:00:00 && ip6",
- "nd_ns { "
- "nd.target = " REG_NEXT_HOP_IPV6 "; "
- "output; "
- "};");
- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
- }
-}
-
-/* Logical router egress table DELIVERY: Delivery (priority 100-110).
- *
- * Priority 100 rules deliver packets to enabled logical ports.
- * Priority 110 rules match multicast packets and update the source
- * mac before delivering to enabled logical ports. IP multicast traffic
- * bypasses S_ROUTER_IN_IP_ROUTING route lookups.
- */
-static void
-build_egress_delivery_flows_for_lrouter_port(
- struct ovn_port *op, struct hmap *lflows,
- struct ds *match, struct ds *actions)
-{
- if (op->nbrp) {
- if (!lrport_is_enabled(op->nbrp)) {
- /* Drop packets to disabled logical ports (since logical flow
- * tables are default-drop). */
- return;
- }
-
- if (op->derived) {
- /* No egress packets should be processed in the context of
- * a chassisredirect port. The chassisredirect port should
- * be replaced by the l3dgw port in the local output
- * pipeline stage before egress processing. */
- return;
- }
-
- /* If multicast relay is enabled then also adjust source mac for IP
- * multicast traffic.
- */
- if (op->od->mcast_info.rtr.relay) {
- ds_clear(match);
- ds_clear(actions);
- ds_put_format(match, "(ip4.mcast || ip6.mcast) && outport == %s",
- op->json_key);
- ds_put_format(actions, "eth.src = %s; output;",
- op->lrp_networks.ea_s);
- ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 110,
- ds_cstr(match), ds_cstr(actions));
- }
-
- ds_clear(match);
- ds_put_format(match, "outport == %s", op->json_key);
- ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
- ds_cstr(match), "output;");
- }
-
-}
-
-static void
-build_misc_local_traffic_drop_flows_for_lrouter(
- struct ovn_datapath *od, struct hmap *lflows)
-{
- if (od->nbr) {
- /* L3 admission control: drop multicast and broadcast source, localhost
- * source or destination, and zero network source or destination
- * (priority 100). */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
- "ip4.src_mcast ||"
- "ip4.src == 255.255.255.255 || "
- "ip4.src == 127.0.0.0/8 || "
- "ip4.dst == 127.0.0.0/8 || "
- "ip4.src == 0.0.0.0/8 || "
- "ip4.dst == 0.0.0.0/8",
- "drop;");
-
- /* Drop ARP packets (priority 85). ARP request packets for router's own
- * IPs are handled with priority-90 flows.
- * Drop IPv6 ND packets (priority 85). ND NA packets for router's own
- * IPs are handled with priority-90 flows.
- */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 85,
- "arp || nd", "drop;");
-
- /* Allow IPv6 multicast traffic that's supposed to reach the
- * router pipeline (e.g., router solicitations).
- */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 84, "nd_rs || nd_ra",
- "next;");
-
- /* Drop other reserved multicast. */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 83,
- "ip6.mcast_rsvd", "drop;");
-
- /* Allow other multicast if relay enabled (priority 82). */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 82,
- "ip4.mcast || ip6.mcast",
- od->mcast_info.rtr.relay ? "next;" : "drop;");
-
- /* Drop Ethernet local broadcast. By definition this traffic should
- * not be forwarded.*/
- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
- "eth.bcast", "drop;");
+ /* Ingress DNAT table: Packets enter the pipeline with destination
+ * IP address that needs to be DNATted from a external IP address
+ * to a logical IP address. */
+ if (!strcmp(nat->type, "dnat")
+ || !strcmp(nat->type, "dnat_and_snat")) {
+ if (!od->l3dgw_port) {
+ /* Gateway router. */
+ /* Packet when it goes from the initiator to destination.
+ * We need to set flags.loopback because the router can
+ * send the packet back through the same interface. */
+ ds_clear(match);
+ ds_put_format(match, "ip && ip%s.dst == %s",
+ is_v6 ? "6" : "4",
+ nat->external_ip);
+ ds_clear(actions);
+ if (allowed_ext_ips || exempted_ext_ips) {
+ lrouter_nat_add_ext_ip_match(od, lflows, match, nat,
+ is_v6, true, mask);
+ }
- /* TTL discard */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
- "ip4 && ip.ttl == {0, 1}", "drop;");
+ if (dnat_force_snat_ip) {
+ /* Indicate to the future tables that a DNAT has taken
+ * place and a force SNAT needs to be done in the
+ * Egress SNAT table. */
+ ds_put_format(actions,
+ "flags.force_snat_for_dnat = 1; ");
+ }
- /* Pass other traffic not already handled to the next table for
- * routing. */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
- }
-}
+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+ ds_put_format(actions, "flags.loopback = 1; "
+ "ip%s.dst=%s; next;",
+ is_v6 ? "6" : "4", nat->logical_ip);
+ } else {
+ ds_put_format(actions, "flags.loopback = 1; "
+ "ct_dnat(%s", nat->logical_ip);
-static void
-build_dhcpv6_reply_flows_for_lrouter_port(
- struct ovn_port *op, struct hmap *lflows,
- struct ds *match)
-{
- if (op->nbrp && (!op->derived)) {
- for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
- ds_clear(match);
- ds_put_format(match, "ip6.dst == %s && udp.src == 547 &&"
- " udp.dst == 546",
- op->lrp_networks.ipv6_addrs[i].addr_s);
- ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
- ds_cstr(match),
- "reg0 = 0; handle_dhcpv6_reply;");
- }
- }
+ if (nat->external_port_range[0]) {
+ ds_put_format(actions, ",%s",
+ nat->external_port_range);
+ }
+ ds_put_format(actions, ");");
+ }
-}
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100,
+ ds_cstr(match), ds_cstr(actions),
+ &nat->header_);
+ } else {
+ /* Distributed router. */
-static void
-build_ipv6_input_flows_for_lrouter_port(
- struct ovn_port *op, struct hmap *lflows,
- struct ds *match, struct ds *actions)
-{
- if (op->nbrp && (!op->derived)) {
- /* No ingress packets are accepted on a chassisredirect
- * port, so no need to program flows for that port. */
- if (op->lrp_networks.n_ipv6_addrs) {
- /* ICMPv6 echo reply. These flows reply to echo requests
- * received for the router's IP address. */
- ds_clear(match);
- ds_put_cstr(match, "ip6.dst == ");
- op_put_v6_networks(match, op);
- ds_put_cstr(match, " && icmp6.type == 128 && icmp6.code == 0");
+ /* Traffic received on l3dgw_port is subject to NAT. */
+ ds_clear(match);
+ ds_put_format(match, "ip && ip%s.dst == %s"
+ " && inport == %s",
+ is_v6 ? "6" : "4",
+ nat->external_ip,
+ od->l3dgw_port->json_key);
+ if (!distributed && od->l3redirect_port) {
+ /* Flows for NAT rules that are centralized are only
+ * programmed on the gateway chassis. */
+ ds_put_format(match, " && is_chassis_resident(%s)",
+ od->l3redirect_port->json_key);
+ }
+ ds_clear(actions);
+ if (allowed_ext_ips || exempted_ext_ips) {
+ lrouter_nat_add_ext_ip_match(od, lflows, match, nat,
+ is_v6, true, mask);
+ }
- const char *lrp_actions =
- "ip6.dst <-> ip6.src; "
- "ip.ttl = 255; "
- "icmp6.type = 129; "
- "flags.loopback = 1; "
- "next; ";
- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
- ds_cstr(match), lrp_actions,
- &op->nbrp->header_);
- }
+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+ ds_put_format(actions, "ip%s.dst=%s; next;",
+ is_v6 ? "6" : "4", nat->logical_ip);
+ } else {
+ ds_put_format(actions, "ct_dnat(%s", nat->logical_ip);
+ if (nat->external_port_range[0]) {
+ ds_put_format(actions, ",%s",
+ nat->external_port_range);
+ }
+ ds_put_format(actions, ");");
+ }
- /* ND reply. These flows reply to ND solicitations for the
- * router's own IP address. */
- for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
- ds_clear(match);
- if (op->od->l3dgw_port && op == op->od->l3dgw_port
- && op->od->l3redirect_port) {
- /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
- * should only be sent from the gateway chassi, so that
- * upstream MAC learning points to the gateway chassis.
- * Also need to avoid generation of multiple ND replies
- * from different chassis. */
- ds_put_format(match, "is_chassis_resident(%s)",
- op->od->l3redirect_port->json_key);
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100,
+ ds_cstr(match), ds_cstr(actions),
+ &nat->header_);
+ }
}
- build_lrouter_nd_flow(op->od, op, "nd_na_router",
- op->lrp_networks.ipv6_addrs[i].addr_s,
- op->lrp_networks.ipv6_addrs[i].sn_addr_s,
- REG_INPORT_ETH_ADDR, match, false, 90,
- &op->nbrp->header_, lflows);
- }
-
- /* UDP/TCP port unreachable */
- if (!smap_get(&op->od->nbr->options, "chassis")
- && !op->od->l3dgw_port) {
- for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
- ds_clear(match);
- ds_put_format(match,
- "ip6 && ip6.dst == %s && !ip.later_frag && tcp",
- op->lrp_networks.ipv6_addrs[i].addr_s);
- const char *action = "tcp_reset {"
- "eth.dst <-> eth.src; "
- "ip6.dst <-> ip6.src; "
- "next; };";
- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
- 80, ds_cstr(match), action,
- &op->nbrp->header_);
-
- ds_clear(match);
- ds_put_format(match,
- "ip6 && ip6.dst == %s && !ip.later_frag && udp",
- op->lrp_networks.ipv6_addrs[i].addr_s);
- action = "icmp6 {"
- "eth.dst <-> eth.src; "
- "ip6.dst <-> ip6.src; "
- "ip.ttl = 255; "
- "icmp6.type = 1; "
- "icmp6.code = 4; "
- "next; };";
- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
- 80, ds_cstr(match), action,
- &op->nbrp->header_);
-
- ds_clear(match);
- ds_put_format(match,
- "ip6 && ip6.dst == %s && !ip.later_frag",
- op->lrp_networks.ipv6_addrs[i].addr_s);
- action = "icmp6 {"
- "eth.dst <-> eth.src; "
- "ip6.dst <-> ip6.src; "
- "ip.ttl = 255; "
- "icmp6.type = 1; "
- "icmp6.code = 3; "
- "next; };";
- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
- 70, ds_cstr(match), action,
- &op->nbrp->header_);
- }
- }
+ /* ARP resolve for NAT IPs. */
+ if (od->l3dgw_port) {
+ if (!strcmp(nat->type, "snat")) {
+ ds_clear(match);
+ ds_put_format(
+ match, "inport == %s && %s == %s",
+ od->l3dgw_port->json_key,
+ is_v6 ? "ip6.src" : "ip4.src", nat->external_ip);
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT,
+ 120, ds_cstr(match), "next;",
+ &nat->header_);
+ }
- /* ICMPv6 time exceeded */
- for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
- /* skip link-local address */
- if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
- continue;
+ if (!sset_contains(&nat_entries, nat->external_ip)) {
+ ds_clear(match);
+ ds_put_format(
+ match, "outport == %s && %s == %s",
+ od->l3dgw_port->json_key,
+ is_v6 ? REG_NEXT_HOP_IPV6 : REG_NEXT_HOP_IPV4,
+ nat->external_ip);
+ ds_clear(actions);
+ ds_put_format(
+ actions, "eth.dst = %s; next;",
+ distributed ? nat->external_mac :
+ od->l3dgw_port->lrp_networks.ea_s);
+ ovn_lflow_add_with_hint(lflows, od,
+ S_ROUTER_IN_ARP_RESOLVE,
+ 100, ds_cstr(match),
+ ds_cstr(actions),
+ &nat->header_);
+ sset_add(&nat_entries, nat->external_ip);
+ }
+ } else {
+ /* Add the NAT external_ip to the nat_entries even for
+ * gateway routers. This is required for adding load balancer
+ * flows.*/
+ sset_add(&nat_entries, nat->external_ip);
}
- ds_clear(match);
- ds_clear(actions);
-
- ds_put_format(match,
- "inport == %s && ip6 && "
- "ip6.src == %s/%d && "
- "ip.ttl == {0, 1} && !ip.later_frag",
- op->json_key,
- op->lrp_networks.ipv6_addrs[i].network_s,
- op->lrp_networks.ipv6_addrs[i].plen);
- ds_put_format(actions,
- "icmp6 {"
- "eth.dst <-> eth.src; "
- "ip6.dst = ip6.src; "
- "ip6.src = %s; "
- "ip.ttl = 255; "
- "icmp6.type = 3; /* Time exceeded */ "
- "icmp6.code = 0; /* TTL exceeded in transit */ "
- "next; };",
- op->lrp_networks.ipv6_addrs[i].addr_s);
- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
- ds_cstr(match), ds_cstr(actions),
- &op->nbrp->header_);
- }
- }
+ /* Egress UNDNAT table: It is for already established connections'
+ * reverse traffic. i.e., DNAT has already been done in ingress
+ * pipeline and now the packet has entered the egress pipeline as
+ * part of a reply. We undo the DNAT here.
+ *
+ * Note that this only applies for NAT on a distributed router.
+ * Undo DNAT on a gateway router is done in the ingress DNAT
+ * pipeline stage. */
+ if (od->l3dgw_port && (!strcmp(nat->type, "dnat")
+ || !strcmp(nat->type, "dnat_and_snat"))) {
+ ds_clear(match);
+ ds_put_format(match, "ip && ip%s.src == %s"
+ " && outport == %s",
+ is_v6 ? "6" : "4",
+ nat->logical_ip,
+ od->l3dgw_port->json_key);
+ if (!distributed && od->l3redirect_port) {
+ /* Flows for NAT rules that are centralized are only
+ * programmed on the gateway chassis. */
+ ds_put_format(match, " && is_chassis_resident(%s)",
+ od->l3redirect_port->json_key);
+ }
+ ds_clear(actions);
+ if (distributed) {
+ ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ",
+ ETH_ADDR_ARGS(mac));
+ }
-}
+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+ ds_put_format(actions, "ip%s.src=%s; next;",
+ is_v6 ? "6" : "4", nat->external_ip);
+ } else {
+ ds_put_format(actions, "ct_dnat;");
+ }
-static void
-build_lrouter_arp_nd_for_datapath(struct ovn_datapath *od,
- struct hmap *lflows)
-{
- if (od->nbr) {
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 100,
+ ds_cstr(match), ds_cstr(actions),
+ &nat->header_);
+ }
- /* Priority-90-92 flows handle ARP requests and ND packets. Most are
- * per logical port but DNAT addresses can be handled per datapath
- * for non gateway router ports.
- *
- * Priority 91 and 92 flows are added for each gateway router
- * port to handle the special cases. In case we get the packet
- * on a regular port, just reply with the port's ETH address.
- */
- for (int i = 0; i < od->nbr->n_nat; i++) {
- struct ovn_nat *nat_entry = &od->nat_entries[i];
+ /* Egress SNAT table: Packets enter the egress pipeline with
+ * source ip address that needs to be SNATted to a external ip
+ * address. */
+ if (!strcmp(nat->type, "snat")
+ || !strcmp(nat->type, "dnat_and_snat")) {
+ if (!od->l3dgw_port) {
+ /* Gateway router. */
+ ds_clear(match);
+ ds_put_format(match, "ip && ip%s.src == %s",
+ is_v6 ? "6" : "4",
+ nat->logical_ip);
+ ds_clear(actions);
- /* Skip entries we failed to parse. */
- if (!nat_entry_is_valid(nat_entry)) {
- continue;
- }
+ if (allowed_ext_ips || exempted_ext_ips) {
+ lrouter_nat_add_ext_ip_match(od, lflows, match, nat,
+ is_v6, false, mask);
+ }
- /* Skip SNAT entries for now, we handle unique SNAT IPs separately
- * below.
- */
- if (!strcmp(nat_entry->nb->type, "snat")) {
- continue;
- }
- build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
- }
+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+ ds_put_format(actions, "ip%s.src=%s; next;",
+ is_v6 ? "6" : "4", nat->external_ip);
+ } else {
+ ds_put_format(actions, "ct_snat(%s",
+ nat->external_ip);
- /* Now handle SNAT entries too, one per unique SNAT IP. */
- struct shash_node *snat_snode;
- SHASH_FOR_EACH (snat_snode, &od->snat_ips) {
- struct ovn_snat_ip *snat_ip = snat_snode->data;
+ if (nat->external_port_range[0]) {
+ ds_put_format(actions, ",%s",
+ nat->external_port_range);
+ }
+ ds_put_format(actions, ");");
+ }
- if (ovs_list_is_empty(&snat_ip->snat_entries)) {
- continue;
- }
+ /* The priority here is calculated such that the
+ * nat->logical_ip with the longest mask gets a higher
+ * priority. */
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT,
+ cidr_bits + 1,
+ ds_cstr(match), ds_cstr(actions),
+ &nat->header_);
+ } else {
+ uint16_t priority = cidr_bits + 1;
- struct ovn_nat *nat_entry =
- CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
- struct ovn_nat, ext_addr_list_node);
- build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
- }
- }
-}
+ /* Distributed router. */
+ ds_clear(match);
+ ds_put_format(match, "ip && ip%s.src == %s"
+ " && outport == %s",
+ is_v6 ? "6" : "4",
+ nat->logical_ip,
+ od->l3dgw_port->json_key);
+ if (!distributed && od->l3redirect_port) {
+ /* Flows for NAT rules that are centralized are only
+ * programmed on the gateway chassis. */
+ priority += 128;
+ ds_put_format(match, " && is_chassis_resident(%s)",
+ od->l3redirect_port->json_key);
+ }
+ ds_clear(actions);
-/* Logical router ingress table 3: IP Input for IPv4. */
-static void
-build_lrouter_ipv4_ip_input(struct ovn_port *op,
- struct hmap *lflows,
- struct ds *match, struct ds *actions)
-{
- /* No ingress packets are accepted on a chassisredirect
- * port, so no need to program flows for that port. */
- if (op->nbrp && (!op->derived)) {
- if (op->lrp_networks.n_ipv4_addrs) {
- /* L3 admission control: drop packets that originate from an
- * IPv4 address owned by the router or a broadcast address
- * known to the router (priority 100). */
- ds_clear(match);
- ds_put_cstr(match, "ip4.src == ");
- op_put_v4_networks(match, op, true);
- ds_put_cstr(match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
- ds_cstr(match), "drop;",
- &op->nbrp->header_);
+ if (allowed_ext_ips || exempted_ext_ips) {
+ lrouter_nat_add_ext_ip_match(od, lflows, match, nat,
+ is_v6, false, mask);
+ }
- /* ICMP echo reply. These flows reply to ICMP echo requests
- * received for the router's IP address. Since packets only
- * get here as part of the logical router datapath, the inport
- * (i.e. the incoming locally attached net) does not matter.
- * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
- ds_clear(match);
- ds_put_cstr(match, "ip4.dst == ");
- op_put_v4_networks(match, op, false);
- ds_put_cstr(match, " && icmp4.type == 8 && icmp4.code == 0");
+ if (distributed) {
+ ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ",
+ ETH_ADDR_ARGS(mac));
+ }
- const char * icmp_actions = "ip4.dst <-> ip4.src; "
- "ip.ttl = 255; "
- "icmp4.type = 0; "
- "flags.loopback = 1; "
- "next; ";
- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
- ds_cstr(match), icmp_actions,
- &op->nbrp->header_);
- }
+ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+ ds_put_format(actions, "ip%s.src=%s; next;",
+ is_v6 ? "6" : "4", nat->external_ip);
+ } else {
+ ds_put_format(actions, "ct_snat(%s",
+ nat->external_ip);
+ if (nat->external_port_range[0]) {
+ ds_put_format(actions, ",%s",
+ nat->external_port_range);
+ }
+ ds_put_format(actions, ");");
+ }
- /* ICMP time exceeded */
- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
- ds_clear(match);
- ds_clear(actions);
+ /* The priority here is calculated such that the
+ * nat->logical_ip with the longest mask gets a higher
+ * priority. */
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT,
+ priority, ds_cstr(match),
+ ds_cstr(actions),
+ &nat->header_);
+ }
+ }
- ds_put_format(match,
- "inport == %s && ip4 && "
- "ip.ttl == {0, 1} && !ip.later_frag", op->json_key);
- ds_put_format(actions,
- "icmp4 {"
- "eth.dst <-> eth.src; "
- "icmp4.type = 11; /* Time exceeded */ "
- "icmp4.code = 0; /* TTL exceeded in transit */ "
- "ip4.dst = ip4.src; "
- "ip4.src = %s; "
- "ip.ttl = 255; "
- "next; };",
- op->lrp_networks.ipv4_addrs[i].addr_s);
- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
- ds_cstr(match), ds_cstr(actions),
- &op->nbrp->header_);
- }
+ /* Logical router ingress table 0:
+ * For NAT on a distributed router, add rules allowing
+ * ingress traffic with eth.dst matching nat->external_mac
+ * on the l3dgw_port instance where nat->logical_port is
+ * resident. */
+ if (distributed) {
+ /* Store the ethernet address of the port receiving the packet.
+ * This will save us from having to match on inport further
+ * down in the pipeline.
+ */
+ ds_clear(actions);
+ ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
+ od->l3dgw_port->lrp_networks.ea_s);
- /* ARP reply. These flows reply to ARP requests for the router's own
- * IP address. */
- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
- ds_clear(match);
- ds_put_format(match, "arp.spa == %s/%u",
- op->lrp_networks.ipv4_addrs[i].network_s,
- op->lrp_networks.ipv4_addrs[i].plen);
+ ds_clear(match);
+ ds_put_format(match,
+ "eth.dst == "ETH_ADDR_FMT" && inport == %s"
+ " && is_chassis_resident(\"%s\")",
+ ETH_ADDR_ARGS(mac),
+ od->l3dgw_port->json_key,
+ nat->logical_port);
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ADMISSION, 50,
+ ds_cstr(match), ds_cstr(actions),
+ &nat->header_);
+ }
- if (op->od->l3dgw_port && op->od->l3redirect_port && op->peer
- && op->peer->od->n_localnet_ports) {
- bool add_chassis_resident_check = false;
- if (op == op->od->l3dgw_port) {
- /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
- * should only be sent from the gateway chassis, so that
- * upstream MAC learning points to the gateway chassis.
- * Also need to avoid generation of multiple ARP responses
- * from different chassis. */
- add_chassis_resident_check = true;
+ /* Ingress Gateway Redirect Table: For NAT on a distributed
+ * router, add flows that are specific to a NAT rule. These
+ * flows indicate the presence of an applicable NAT rule that
+ * can be applied in a distributed manner.
+ * In particulr REG_SRC_IPV4/REG_SRC_IPV6 and eth.src are set to
+ * NAT external IP and NAT external mac so the ARP request
+ * generated in the following stage is sent out with proper IP/MAC
+ * src addresses.
+ */
+ if (distributed) {
+ ds_clear(match);
+ ds_clear(actions);
+ ds_put_format(match,
+ "ip%s.src == %s && outport == %s && "
+ "is_chassis_resident(\"%s\")",
+ is_v6 ? "6" : "4", nat->logical_ip,
+ od->l3dgw_port->json_key, nat->logical_port);
+ ds_put_format(actions, "eth.src = %s; %s = %s; next;",
+ nat->external_mac,
+ is_v6 ? REG_SRC_IPV6 : REG_SRC_IPV4,
+ nat->external_ip);
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT,
+ 100, ds_cstr(match),
+ ds_cstr(actions), &nat->header_);
+ }
+
+ /* Egress Loopback table: For NAT on a distributed router.
+ * If packets in the egress pipeline on the distributed
+ * gateway port have ip.dst matching a NAT external IP, then
+ * loop a clone of the packet back to the beginning of the
+ * ingress pipeline with inport = outport. */
+ if (od->l3dgw_port) {
+ /* Distributed router. */
+ ds_clear(match);
+ ds_put_format(match, "ip%s.dst == %s && outport == %s",
+ is_v6 ? "6" : "4",
+ nat->external_ip,
+ od->l3dgw_port->json_key);
+ if (!distributed) {
+ ds_put_format(match, " && is_chassis_resident(%s)",
+ od->l3redirect_port->json_key);
} else {
- /* Check if the option 'reside-on-redirect-chassis'
- * is set to true on the router port. If set to true
- * and if peer's logical switch has a localnet port, it
- * means the router pipeline for the packets from
- * peer's logical switch is be run on the chassis
- * hosting the gateway port and it should reply to the
- * ARP requests for the router port IPs.
- */
- add_chassis_resident_check = smap_get_bool(
- &op->nbrp->options,
- "reside-on-redirect-chassis", false);
+ ds_put_format(match, " && is_chassis_resident(\"%s\")",
+ nat->logical_port);
}
- if (add_chassis_resident_check) {
- ds_put_format(match, " && is_chassis_resident(%s)",
- op->od->l3redirect_port->json_key);
+ ds_clear(actions);
+ ds_put_format(actions,
+ "clone { ct_clear; "
+ "inport = outport; outport = \"\"; "
+ "flags = 0; flags.loopback = 1; ");
+ for (int j = 0; j < MFF_N_LOG_REGS; j++) {
+ ds_put_format(actions, "reg%d = 0; ", j);
}
+ ds_put_format(actions, REGBIT_EGRESS_LOOPBACK" = 1; "
+ "next(pipeline=ingress, table=%d); };",
+ ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100,
+ ds_cstr(match), ds_cstr(actions),
+ &nat->header_);
}
-
- build_lrouter_arp_flow(op->od, op,
- op->lrp_networks.ipv4_addrs[i].addr_s,
- REG_INPORT_ETH_ADDR, match, false, 90,
- &op->nbrp->header_, lflows);
}
- /* A set to hold all load-balancer vips that need ARP responses. */
- struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
- struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
- get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6);
-
- const char *ip_address;
- SSET_FOR_EACH (ip_address, &all_ips_v4) {
- ds_clear(match);
- if (op == op->od->l3dgw_port) {
- ds_put_format(match, "is_chassis_resident(%s)",
- op->od->l3redirect_port->json_key);
+ /* Handle force SNAT options set in the gateway router. */
+ if (!od->l3dgw_port) {
+ if (dnat_force_snat_ip) {
+ if (od->dnat_force_snat_addrs.n_ipv4_addrs) {
+ build_lrouter_force_snat_flows(lflows, od, "4",
+ od->dnat_force_snat_addrs.ipv4_addrs[0].addr_s,
+ "dnat");
+ }
+ if (od->dnat_force_snat_addrs.n_ipv6_addrs) {
+ build_lrouter_force_snat_flows(lflows, od, "6",
+ od->dnat_force_snat_addrs.ipv6_addrs[0].addr_s,
+ "dnat");
+ }
}
-
- build_lrouter_arp_flow(op->od, op,
- ip_address, REG_INPORT_ETH_ADDR,
- match, false, 90, NULL, lflows);
- }
-
- SSET_FOR_EACH (ip_address, &all_ips_v6) {
- ds_clear(match);
- if (op == op->od->l3dgw_port) {
- ds_put_format(match, "is_chassis_resident(%s)",
- op->od->l3redirect_port->json_key);
+ if (lb_force_snat_ip) {
+ if (od->lb_force_snat_addrs.n_ipv4_addrs) {
+ build_lrouter_force_snat_flows(lflows, od, "4",
+ od->lb_force_snat_addrs.ipv4_addrs[0].addr_s, "lb");
+ }
+ if (od->lb_force_snat_addrs.n_ipv6_addrs) {
+ build_lrouter_force_snat_flows(lflows, od, "6",
+ od->lb_force_snat_addrs.ipv6_addrs[0].addr_s, "lb");
+ }
}
- build_lrouter_nd_flow(op->od, op, "nd_na",
- ip_address, NULL, REG_INPORT_ETH_ADDR,
- match, false, 90, NULL, lflows);
+ /* For gateway router, re-circulate every packet through
+ * the DNAT zone. This helps with the following.
+ *
+ * Any packet that needs to be unDNATed in the reverse
+ * direction gets unDNATed. Ideally this could be done in
+ * the egress pipeline. But since the gateway router
+ * does not have any feature that depends on the source
+ * ip address being external IP address for IP routing,
+ * we can do it here, saving a future re-circulation. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
+ "ip", "flags.loopback = 1; ct_dnat;");
}
- sset_destroy(&all_ips_v4);
- sset_destroy(&all_ips_v6);
-
- if (!smap_get(&op->od->nbr->options, "chassis")
- && !op->od->l3dgw_port) {
- /* UDP/TCP port unreachable. */
- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
- ds_clear(match);
- ds_put_format(match,
- "ip4 && ip4.dst == %s && !ip.later_frag && udp",
- op->lrp_networks.ipv4_addrs[i].addr_s);
- const char *action = "icmp4 {"
- "eth.dst <-> eth.src; "
- "ip4.dst <-> ip4.src; "
- "ip.ttl = 255; "
- "icmp4.type = 3; "
- "icmp4.code = 3; "
- "next; };";
- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
- 80, ds_cstr(match), action,
- &op->nbrp->header_);
-
- ds_clear(match);
- ds_put_format(match,
- "ip4 && ip4.dst == %s && !ip.later_frag && tcp",
- op->lrp_networks.ipv4_addrs[i].addr_s);
- action = "tcp_reset {"
- "eth.dst <-> eth.src; "
- "ip4.dst <-> ip4.src; "
- "next; };";
- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
- 80, ds_cstr(match), action,
- &op->nbrp->header_);
-
- ds_clear(match);
- ds_put_format(match,
- "ip4 && ip4.dst == %s && !ip.later_frag",
- op->lrp_networks.ipv4_addrs[i].addr_s);
- action = "icmp4 {"
- "eth.dst <-> eth.src; "
- "ip4.dst <-> ip4.src; "
- "ip.ttl = 255; "
- "icmp4.type = 3; "
- "icmp4.code = 2; "
- "next; };";
- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
- 70, ds_cstr(match), action,
- &op->nbrp->header_);
- }
+ /* Load balancing and packet defrag are only valid on
+ * Gateway routers or router with gateway port. */
+ if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
+ sset_destroy(&nat_entries);
+ return;
}
- /* Drop IP traffic destined to router owned IPs except if the IP is
- * also a SNAT IP. Those are dropped later, in stage
- * "lr_in_arp_resolve", if unSNAT was unsuccessful.
- *
- * Priority 60.
- */
- build_lrouter_drop_own_dest(op, S_ROUTER_IN_IP_INPUT, 60, false,
- lflows);
+ /* A set to hold all ips that need defragmentation and tracking. */
+ struct sset all_ips = SSET_INITIALIZER(&all_ips);
- /* ARP / ND handling for external IP addresses.
- *
- * DNAT and SNAT IP addresses are external IP addresses that need ARP
- * handling.
- *
- * These are already taken care globally, per router. The only
- * exception is on the l3dgw_port where we might need to use a
- * different ETH address.
- */
- if (op != op->od->l3dgw_port) {
- return;
- }
+ for (int i = 0; i < od->nbr->n_load_balancer; i++) {
+ struct nbrec_load_balancer *nb_lb = od->nbr->load_balancer[i];
+ struct ovn_northd_lb *lb =
+ ovn_northd_lb_find(lbs, &nb_lb->header_.uuid);
+ ovs_assert(lb);
- for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
- struct ovn_nat *nat_entry = &op->od->nat_entries[i];
+ for (size_t j = 0; j < lb->n_vips; j++) {
+ struct ovn_lb_vip *lb_vip = &lb->vips[j];
+ struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[j];
+ ds_clear(actions);
+ build_lb_vip_actions(lb_vip, lb_vip_nb, actions,
+ lb->selection_fields, false);
- /* Skip entries we failed to parse. */
- if (!nat_entry_is_valid(nat_entry)) {
- continue;
- }
+ if (!sset_contains(&all_ips, lb_vip->vip_str)) {
+ sset_add(&all_ips, lb_vip->vip_str);
+ /* If there are any load balancing rules, we should send
+ * the packet to conntrack for defragmentation and
+ * tracking. This helps with two things.
+ *
+ * 1. With tracking, we can send only new connections to
+ * pick a DNAT ip address from a group.
+ * 2. If there are L4 ports in load balancing rules, we
+ * need the defragmentation to match on L4 ports. */
+ ds_clear(match);
+ if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) {
+ ds_put_format(match, "ip && ip4.dst == %s",
+ lb_vip->vip_str);
+ } else {
+ ds_put_format(match, "ip && ip6.dst == %s",
+ lb_vip->vip_str);
+ }
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG,
+ 100, ds_cstr(match), "ct_next;",
+ &nb_lb->header_);
+ }
- /* Skip SNAT entries for now, we handle unique SNAT IPs separately
- * below.
- */
- if (!strcmp(nat_entry->nb->type, "snat")) {
- continue;
- }
- build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows);
- }
+ /* Higher priority rules are added for load-balancing in DNAT
+ * table. For every match (on a VIP[:port]), we add two flows
+ * via add_router_lb_flow(). One flow is for specific matching
+ * on ct.new with an action of "ct_lb($targets);". The other
+ * flow is for ct.est with an action of "ct_dnat;". */
+ ds_clear(match);
+ if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) {
+ ds_put_format(match, "ip && ip4.dst == %s",
+ lb_vip->vip_str);
+ } else {
+ ds_put_format(match, "ip && ip6.dst == %s",
+ lb_vip->vip_str);
+ }
- /* Now handle SNAT entries too, one per unique SNAT IP. */
- struct shash_node *snat_snode;
- SHASH_FOR_EACH (snat_snode, &op->od->snat_ips) {
- struct ovn_snat_ip *snat_ip = snat_snode->data;
+ int prio = 110;
+ bool is_udp = nullable_string_is_equal(nb_lb->protocol, "udp");
+ bool is_sctp = nullable_string_is_equal(nb_lb->protocol,
+ "sctp");
+ const char *proto = is_udp ? "udp" : is_sctp ? "sctp" : "tcp";
- if (ovs_list_is_empty(&snat_ip->snat_entries)) {
- continue;
- }
+ if (lb_vip->vip_port) {
+ ds_put_format(match, " && %s && %s.dst == %d", proto,
+ proto, lb_vip->vip_port);
+ prio = 120;
+ }
- struct ovn_nat *nat_entry =
- CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
- struct ovn_nat, ext_addr_list_node);
- build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows);
+ if (od->l3redirect_port &&
+ (lb_vip->n_backends || !lb_vip->empty_backend_rej)) {
+ ds_put_format(match, " && is_chassis_resident(%s)",
+ od->l3redirect_port->json_key);
+ }
+ add_router_lb_flow(lflows, od, match, actions, prio,
+ lb_force_snat_ip, lb_vip, proto,
+ nb_lb, meter_groups, &nat_entries);
+ }
}
+ sset_destroy(&all_ips);
+ sset_destroy(&nat_entries);
}
}
+
struct lswitch_flow_build_info {
struct hmap *datapaths;
struct hmap *ports;
@@ -11361,6 +11350,8 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od,
&lsi->actions);
build_misc_local_traffic_drop_flows_for_lrouter(od, lsi->lflows);
build_lrouter_arp_nd_for_datapath(od, lsi->lflows);
+ build_lrouter_nat_defrag_and_lb(od, lsi->lflows, lsi->meter_groups,
+ lsi->lbs, &lsi->match, &lsi->actions);
}
/* Helper function to combine all lflow generation which is iterated by port.
@@ -11459,9 +11450,6 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
ds_destroy(&lsi.actions);
build_lswitch_flows(datapaths, lflows);
-
- /* Legacy lrouter build - to be migrated. */
- build_lrouter_flows(datapaths, lflows, meter_groups, lbs);
}
struct ovn_dp_group {
--
2.29.2