From c41499498db66fbe54155206a513e418d75f8d49 Mon Sep 17 00:00:00 2001
From: Numan Siddique <numans@ovn.org>
Date: Wed, 2 Dec 2020 23:57:56 +0530
Subject: [PATCH 5/7] northd: Add ECMP support to router policies.
A user can add a policy now like:
ovn-nbctl lr-policy-add <LR> 100 "ip4.src == 10.0.0.4" reroute 172.0.0.5,172.0.0.6
We do have ECMP support for logical router static routes, but since
policies are applied after the routing stage, ECMP support for
policies is desired by ovn-kubernetes.
A new column 'nexthops' is added to the Logical_Router_Policy table
instead of modifying the existing column 'nexthop' to preserve
backward compatibility and avoid any upgrade issues.
Change-Id: Ib5723d1de30f0ad86ee740bb4e3b593f1cca98eb
Requested-by: Alexander Constantinescu <aconstan@redhat.com>
Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1881826
Acked-by: Mark Michelson <mmichels@redhat.com>
Signed-off-by: Numan Siddique <numans@ovn.org>
---
northd/ovn-northd.8.xml | 80 +++++++++++++++++++--
northd/ovn-northd.c | 148 ++++++++++++++++++++++++++++++++++----
ovn-nb.ovsschema | 6 +-
ovn-nb.xml | 18 ++++-
tests/ovn-northd.at | 124 ++++++++++++++++++++++++++++++++
tests/ovn.at | 16 ++---
utilities/ovn-nbctl.8.xml | 12 ++--
utilities/ovn-nbctl.c | 73 +++++++++++++++----
8 files changed, 429 insertions(+), 48 deletions(-)
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index d86f36ea6..1f0f71f34 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -3041,14 +3041,36 @@ outport = <var>P</var>;
<li>
<p>
- If the policy action is <code>reroute</code>, then the logical
- flow is added with the following actions:
+ If the policy action is <code>reroute</code> with 2 or more nexthops
+ defined, then the logical flow is added with the following actions:
+ </p>
+
+ <pre>
+reg8[0..15] = <var>GID</var>;
+reg8[16..31] = select(1,..n);
+ </pre>
+
+ <p>
+ where <var>GID</var> is the ECMP group id generated by
+ <code>ovn-northd</code> for this policy and <var>n</var>
+ is the number of nexthops. <code>select</code> action
+ selects one of the nexthop member id, stores it in the register
+ <code>reg8[16..31]</code> and advances the packet to the
+ next stage.
+ </p>
+ </li>
+
+ <li>
+ <p>
+ If the policy action is <code>reroute</code> with just one nexhop,
+ then the logical flow is added with the following actions:
</p>
<pre>
[xx]reg0 = <var>H</var>;
eth.src = <var>E</var>;
outport = <var>P</var>;
+reg8[0..15] = 0;
flags.loopback = 1;
next;
</pre>
@@ -3072,7 +3094,51 @@ next;
</li>
</ul>
- <h3>Ingress Table 13: ARP/ND Resolution</h3>
+ <h3>Ingress Table 13: ECMP handling for router policies</h3>
+ <p>
+ This table handles the ECMP for the router policies configured
+ with multiple nexthops.
+ </p>
+
+ <ul>
+ <li>
+ <p>
+ A priority-150 flow is added to advance the packet to the next stage
+ if the ECMP group id register <code>reg8[0..15]</code> is 0.
+ </p>
+ </li>
+
+ <li>
+ <p>
+ For each ECMP reroute router policy with multiple nexthops,
+ a priority-100 flow is added for each nexthop <var>H</var>
+ with the match <code>reg8[0..15] == <var>GID</var> &&
+ reg8[16..31] == <var>M</var></code> where <var>GID</var>
+ is the router policy group id generated by <code>ovn-northd</code>
+ and <var>M</var> is the member id of the nexthop <var>H</var>
+ generated by <code>ovn-northd</code>. The following actions are added
+ to the flow:
+ </p>
+
+ <pre>
+[xx]reg0 = <var>H</var>;
+eth.src = <var>E</var>;
+outport = <var>P</var>
+"flags.loopback = 1; "
+"next;"
+ </pre>
+
+ <p>
+ where <var>H</var> is the <code>nexthop </code> defined in the
+ router policy, <var>E</var> is the ethernet address of the
+ logical router port from which the <code>nexthop</code> is
+ reachable and <var>P</var> is the logical router port from
+ which the <code>nexthop</code> is reachable.
+ </p>
+ </li>
+ </ul>
+
+ <h3>Ingress Table 14: ARP/ND Resolution</h3>
<p>
Any packet that reaches this table is an IP packet whose next-hop
@@ -3258,7 +3324,7 @@ next;
</ul>
- <h3>Ingress Table 14: Check packet length</h3>
+ <h3>Ingress Table 15: Check packet length</h3>
<p>
For distributed logical routers with distributed gateway port configured
@@ -3288,7 +3354,7 @@ REGBIT_PKT_LARGER = check_pkt_larger(<var>L</var>); next;
and advances to the next table.
</p>
- <h3>Ingress Table 15: Handle larger packets</h3>
+ <h3>Ingress Table 16: Handle larger packets</h3>
<p>
For distributed logical routers with distributed gateway port configured
@@ -3349,7 +3415,7 @@ icmp6 {
and advances to the next table.
</p>
- <h3>Ingress Table 16: Gateway Redirect</h3>
+ <h3>Ingress Table 17: Gateway Redirect</h3>
<p>
For distributed logical routers where one of the logical router
@@ -3389,7 +3455,7 @@ icmp6 {
</li>
</ul>
- <h3>Ingress Table 17: ARP Request</h3>
+ <h3>Ingress Table 18: ARP Request</h3>
<p>
In the common case where the Ethernet destination has been resolved, this
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 478f1a339..dfd7d69d0 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -188,11 +188,12 @@ enum ovn_stage {
PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 10, "lr_in_ip_routing") \
PIPELINE_STAGE(ROUTER, IN, IP_ROUTING_ECMP, 11, "lr_in_ip_routing_ecmp") \
PIPELINE_STAGE(ROUTER, IN, POLICY, 12, "lr_in_policy") \
- PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 13, "lr_in_arp_resolve") \
- PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN , 14, "lr_in_chk_pkt_len") \
- PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 15,"lr_in_larger_pkts") \
- PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 16, "lr_in_gw_redirect") \
- PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 17, "lr_in_arp_request") \
+ PIPELINE_STAGE(ROUTER, IN, POLICY_ECMP, 13, "lr_in_policy_ecmp") \
+ PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 14, "lr_in_arp_resolve") \
+ PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN , 15, "lr_in_chk_pkt_len") \
+ PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 16, "lr_in_larger_pkts") \
+ PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 17, "lr_in_gw_redirect") \
+ PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 18, "lr_in_arp_request") \
\
/* Logical router egress stages. */ \
PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \
@@ -7562,33 +7563,39 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
struct ds actions = DS_EMPTY_INITIALIZER;
if (!strcmp(rule->action, "reroute")) {
+ ovs_assert(rule->n_nexthops <= 1);
+
+ char *nexthop =
+ (rule->n_nexthops == 1 ? rule->nexthops[0] : rule->nexthop);
struct ovn_port *out_port = get_outport_for_routing_policy_nexthop(
- od, ports, rule->priority, rule->nexthop);
+ od, ports, rule->priority, nexthop);
if (!out_port) {
return;
}
- const char *lrp_addr_s = find_lrp_member_ip(out_port, rule->nexthop);
+ const char *lrp_addr_s = find_lrp_member_ip(out_port, nexthop);
if (!lrp_addr_s) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
VLOG_WARN_RL(&rl, "lrp_addr not found for routing policy "
" priority %"PRId64" nexthop %s",
- rule->priority, rule->nexthop);
+ rule->priority, nexthop);
return;
}
uint32_t pkt_mark = ovn_smap_get_uint(&rule->options, "pkt_mark", 0);
if (pkt_mark) {
ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark);
}
- bool is_ipv4 = strchr(rule->nexthop, '.') ? true : false;
+
+ bool is_ipv4 = strchr(nexthop, '.') ? true : false;
ds_put_format(&actions, "%s = %s; "
"%s = %s; "
"eth.src = %s; "
"outport = %s; "
"flags.loopback = 1; "
+ REG_ECMP_GROUP_ID" = 0; "
"next;",
is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6,
- rule->nexthop,
+ nexthop,
is_ipv4 ? REG_SRC_IPV4 : REG_SRC_IPV6,
lrp_addr_s,
out_port->lrp_networks.ea_s,
@@ -7601,7 +7608,7 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
if (pkt_mark) {
ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark);
}
- ds_put_cstr(&actions, "next;");
+ ds_put_cstr(&actions, REG_ECMP_GROUP_ID" = 0; next;");
}
ds_put_format(&match, "%s", rule->match);
@@ -7611,6 +7618,107 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
ds_destroy(&actions);
}
+static void
+build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
+ struct hmap *ports,
+ const struct nbrec_logical_router_policy *rule,
+ uint16_t ecmp_group_id)
+{
+ ovs_assert(rule->n_nexthops > 1);
+
+ bool nexthops_is_ipv4 = true;
+
+ /* Check that all the nexthops belong to the same addr family before
+ * adding logical flows. */
+ for (uint16_t i = 0; i < rule->n_nexthops; i++) {
+ bool is_ipv4 = strchr(rule->nexthops[i], '.') ? true : false;
+
+ if (i == 0) {
+ nexthops_is_ipv4 = is_ipv4;
+ }
+
+ if (is_ipv4 != nexthops_is_ipv4) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "nexthop [%s] of the router policy with "
+ "the match [%s] do not belong to the same address "
+ "family as other next hops",
+ rule->nexthops[i], rule->match);
+ return;
+ }
+ }
+
+ struct ds match = DS_EMPTY_INITIALIZER;
+ struct ds actions = DS_EMPTY_INITIALIZER;
+
+ for (uint16_t i = 0; i < rule->n_nexthops; i++) {
+ struct ovn_port *out_port = get_outport_for_routing_policy_nexthop(
+ od, ports, rule->priority, rule->nexthops[i]);
+ if (!out_port) {
+ goto cleanup;
+ }
+
+ const char *lrp_addr_s =
+ find_lrp_member_ip(out_port, rule->nexthops[i]);
+ if (!lrp_addr_s) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "lrp_addr not found for routing policy "
+ " priority %"PRId64" nexthop %s",
+ rule->priority, rule->nexthops[i]);
+ goto cleanup;
+ }
+
+ ds_clear(&actions);
+ uint32_t pkt_mark = ovn_smap_get_uint(&rule->options, "pkt_mark", 0);
+ if (pkt_mark) {
+ ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark);
+ }
+
+ bool is_ipv4 = strchr(rule->nexthops[i], '.') ? true : false;
+
+ ds_put_format(&actions, "%s = %s; "
+ "%s = %s; "
+ "eth.src = %s; "
+ "outport = %s; "
+ "flags.loopback = 1; "
+ "next;",
+ is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6,
+ rule->nexthops[i],
+ is_ipv4 ? REG_SRC_IPV4 : REG_SRC_IPV6,
+ lrp_addr_s,
+ out_port->lrp_networks.ea_s,
+ out_port->json_key);
+
+ ds_clear(&match);
+ ds_put_format(&match, REG_ECMP_GROUP_ID" == %"PRIu16" && "
+ REG_ECMP_MEMBER_ID" == %"PRIu16,
+ ecmp_group_id, i + 1);
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY_ECMP,
+ 100, ds_cstr(&match),
+ ds_cstr(&actions), &rule->header_);
+ }
+
+ ds_clear(&actions);
+ ds_put_format(&actions, "%s = %"PRIu16
+ "; %s = select(", REG_ECMP_GROUP_ID, ecmp_group_id,
+ REG_ECMP_MEMBER_ID);
+
+ for (uint16_t i = 0; i < rule->n_nexthops; i++) {
+ if (i > 0) {
+ ds_put_cstr(&actions, ", ");
+ }
+
+ ds_put_format(&actions, "%"PRIu16, i + 1);
+ }
+ ds_put_cstr(&actions, ");");
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY,
+ rule->priority, rule->match,
+ ds_cstr(&actions), &rule->header_);
+
+cleanup:
+ ds_destroy(&match);
+ ds_destroy(&actions);
+}
+
struct parsed_route {
struct ovs_list list_node;
struct in6_addr prefix;
@@ -10300,13 +10408,27 @@ build_ingress_policy_flows_for_lrouter(
if (od->nbr) {
/* This is a catch-all rule. It has the lowest priority (0)
* does a match-all("1") and pass-through (next) */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1", "next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1",
+ REG_ECMP_GROUP_ID" = 0; next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY_ECMP, 150,
+ REG_ECMP_GROUP_ID" == 0", "next;");
/* Convert routing policies to flows. */
+ uint16_t ecmp_group_id = 1;
for (int i = 0; i < od->nbr->n_policies; i++) {
const struct nbrec_logical_router_policy *rule
= od->nbr->policies[i];
- build_routing_policy_flow(lflows, od, ports, rule, &rule->header_);
+ bool is_ecmp_reroute =
+ (!strcmp(rule->action, "reroute") && rule->n_nexthops > 1);
+
+ if (is_ecmp_reroute) {
+ build_ecmp_routing_policy_flows(lflows, od, ports, rule,
+ ecmp_group_id);
+ ecmp_group_id++;
+ } else {
+ build_routing_policy_flow(lflows, od, ports, rule,
+ &rule->header_);
+ }
}
}
}
diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
index af77dd138..b77a2308c 100644
--- a/ovn-nb.ovsschema
+++ b/ovn-nb.ovsschema
@@ -1,7 +1,7 @@
{
"name": "OVN_Northbound",
- "version": "5.29.0",
- "cksum": "328602112 27064",
+ "version": "5.30.0",
+ "cksum": "3273824429 27172",
"tables": {
"NB_Global": {
"columns": {
@@ -391,6 +391,8 @@
"key": {"type": "string",
"enum": ["set", ["allow", "drop", "reroute"]]}}},
"nexthop": {"type": {"key": "string", "min": 0, "max": 1}},
+ "nexthops": {"type": {
+ "key": "string", "min": 0, "max": "unlimited"}},
"options": {
"type": {"key": "string", "value": "string",
"min": 0, "max": "unlimited"}},
diff --git a/ovn-nb.xml b/ovn-nb.xml
index e7a8d6833..0cf043790 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -2723,18 +2723,34 @@
</li>
<li>
- <code>reroute</code>: Reroute packet to <ref column="nexthop"/>.
+ <code>reroute</code>: Reroute packet to <ref column="nexthop"/> or
+ <ref column="nexthops"/>.
</li>
</ul>
</column>
<column name="nexthop">
+ <p>
+ Note: This column is deprecated in favor of <ref column="nexthops"/>.
+ </p>
<p>
Next-hop IP address for this route, which should be the IP
address of a connected router port or the IP address of a logical port.
</p>
</column>
+ <column name="nexthops">
+ <p>
+ Next-hop ECMP IP addresses for this route. Each IP in the list should
+ be the IP address of a connected router port or the IP address of a
+ logical port.
+ </p>
+
+ <p>
+ One IP from the list is selected as next hop.
+ </p>
+ </column>
+
<column name="options" key="pkt_mark">
<p>
Marks the packet with the value specified when the router policy
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 50a4cae76..ce6c44db4 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -2198,3 +2198,127 @@ dnl Number of common flows should be the same.
check_row_count Logical_Flow ${n_flows_common} logical_dp_group=${dp_group_uuid}
AT_CLEANUP
+
+AT_SETUP([ovn -- Router policies - ECMP reroute])
+AT_KEYWORDS([router policies ecmp reroute])
+ovn_start
+
+check ovn-nbctl ls-add sw0
+check ovn-nbctl lsp-add sw0 sw0-port1
+check ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3"
+
+check ovn-nbctl ls-add sw1
+check ovn-nbctl lsp-add sw1 sw1-port1
+check ovn-nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3"
+
+# Create a logical router and attach both logical switches
+check ovn-nbctl lr-add lr0
+check ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
+check ovn-nbctl lsp-add sw0 sw0-lr0
+check ovn-nbctl lsp-set-type sw0-lr0 router
+check ovn-nbctl lsp-set-addresses sw0-lr0 00:00:00:00:ff:01
+check ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
+
+check ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
+check ovn-nbctl lsp-add sw1 sw1-lr0
+check ovn-nbctl lsp-set-type sw1-lr0 router
+check ovn-nbctl lsp-set-addresses sw1-lr0 00:00:00:00:ff:02
+check ovn-nbctl lsp-set-options sw1-lr0 router-port=lr-sw1
+
+check ovn-nbctl ls-add public
+check ovn-nbctl lrp-add lr0 lr0-public 00:00:20:20:12:13 172.168.0.100/24
+check ovn-nbctl lsp-add public public-lr0
+check ovn-nbctl lsp-set-type public-lr0 router
+check ovn-nbctl lsp-set-addresses public-lr0 router
+check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
+
+check ovn-nbctl --wait=sb lr-policy-add lr0 10 "ip4.src == 10.0.0.3" reroute 172.168.0.101,172.168.0.102
+
+ovn-sbctl dump-flows lr0 > lr0flows3
+AT_CAPTURE_FILE([lr0flows3])
+
+AT_CHECK([grep "lr_in_policy" lr0flows3 | sort], [0], [dnl
+ table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = 0; next;)
+ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.3), action=(reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
+ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+ table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == 0), action=(next;)
+])
+
+check ovn-nbctl --wait=sb lr-policy-add lr0 10 "ip4.src == 10.0.0.4" reroute 172.168.0.101,172.168.0.102,172.168.0.103
+ovn-sbctl dump-flows lr0 > lr0flows3
+AT_CAPTURE_FILE([lr0flows3])
+
+AT_CHECK([grep "lr_in_policy" lr0flows3 | \
+sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = <cleared>/' | \
+sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == <cleared>/' | sort], [0], [dnl
+ table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = <cleared>; next;)
+ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.3), action=(reg8[[0..15]] = <cleared>; reg8[[16..31]] = select(1, 2);)
+ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.4), action=(reg8[[0..15]] = <cleared>; reg8[[16..31]] = select(1, 2, 3);)
+ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 3), action=(reg0 = 172.168.0.103; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+ table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == <cleared>), action=(next;)
+])
+
+check ovn-nbctl --wait=sb lr-policy-add lr0 10 "ip4.src == 10.0.0.5" reroute 172.168.0.110
+ovn-sbctl dump-flows lr0 > lr0flows3
+AT_CAPTURE_FILE([lr0flows3])
+
+AT_CHECK([grep "lr_in_policy" lr0flows3 | \
+sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = <cleared>/' | \
+sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == <cleared>/' | sort], [0], [dnl
+ table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = <cleared>; next;)
+ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.3), action=(reg8[[0..15]] = <cleared>; reg8[[16..31]] = select(1, 2);)
+ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.4), action=(reg8[[0..15]] = <cleared>; reg8[[16..31]] = select(1, 2, 3);)
+ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.5), action=(reg0 = 172.168.0.110; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg8[[0..15]] = <cleared>; next;)
+ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 3), action=(reg0 = 172.168.0.103; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+ table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == <cleared>), action=(next;)
+])
+
+check ovn-nbctl --wait=sb lr-policy-del lr0 10 "ip4.src == 10.0.0.3"
+ovn-sbctl dump-flows lr0 > lr0flows3
+AT_CAPTURE_FILE([lr0flows3])
+
+AT_CHECK([grep "lr_in_policy" lr0flows3 | \
+sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = <cleared>/' | \
+sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == <cleared>/' | sort], [0], [dnl
+ table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = <cleared>; next;)
+ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.4), action=(reg8[[0..15]] = <cleared>; reg8[[16..31]] = select(1, 2, 3);)
+ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.5), action=(reg0 = 172.168.0.110; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg8[[0..15]] = <cleared>; next;)
+ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 3), action=(reg0 = 172.168.0.103; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+ table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == <cleared>), action=(next;)
+])
+
+check ovn-nbctl --wait=sb lr-policy-del lr0 10 "ip4.src == 10.0.0.4"
+ovn-sbctl dump-flows lr0 > lr0flows3
+AT_CAPTURE_FILE([lr0flows3])
+
+AT_CHECK([grep "lr_in_policy" lr0flows3 | \
+sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = <cleared>/' | \
+sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == <cleared>/' | sort], [0], [dnl
+ table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = <cleared>; next;)
+ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.5), action=(reg0 = 172.168.0.110; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg8[[0..15]] = <cleared>; next;)
+ table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == <cleared>), action=(next;)
+])
+
+check ovn-nbctl --wait=sb add logical_router_policy . nexthops "2000\:\:b"
+ovn-sbctl dump-flows lr0 > lr0flows3
+AT_CAPTURE_FILE([lr0flows3])
+
+AT_CHECK([grep "lr_in_policy" lr0flows3 | \
+sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = <cleared>/' | \
+sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == <cleared>/' | sort], [0], [dnl
+ table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = <cleared>; next;)
+ table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == <cleared>), action=(next;)
+])
+
+AT_CLEANUP
diff --git a/tests/ovn.at b/tests/ovn.at
index 2e0bc9c53..66088a7f5 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -21156,31 +21156,31 @@ AT_CHECK([
AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl
table=12(lr_in_policy ), priority=1001 , dnl
-match=(ip6), action=(pkt.mark = 4294967295; next;)
+match=(ip6), action=(pkt.mark = 4294967295; reg8[[0..15]] = 0; next;)
])
ovn-nbctl --wait=hv set logical_router_policy $pol5 options:pkt_mark=-1
AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl
table=12(lr_in_policy ), priority=1001 , dnl
-match=(ip6), action=(next;)
+match=(ip6), action=(reg8[[0..15]] = 0; next;)
])
ovn-nbctl --wait=hv set logical_router_policy $pol5 options:pkt_mark=2147483648
AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl
table=12(lr_in_policy ), priority=1001 , dnl
-match=(ip6), action=(pkt.mark = 2147483648; next;)
+match=(ip6), action=(pkt.mark = 2147483648; reg8[[0..15]] = 0; next;)
])
ovn-nbctl --wait=hv set logical_router_policy $pol5 options:pkt_mark=foo
AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl
table=12(lr_in_policy ), priority=1001 , dnl
-match=(ip6), action=(next;)
+match=(ip6), action=(reg8[[0..15]] = 0; next;)
])
ovn-nbctl --wait=hv set logical_router_policy $pol5 options:pkt_mark=4294967296
AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl
table=12(lr_in_policy ), priority=1001 , dnl
-match=(ip6), action=(next;)
+match=(ip6), action=(reg8[[0..15]] = 0; next;)
])
OVN_CLEANUP([hv1])
@@ -21759,7 +21759,7 @@ AT_CHECK([
grep "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" -c)
])
AT_CHECK([
- test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=21 | \
+ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=22 | \
grep "priority=200" | \
grep "actions=move:NXM_NX_CT_LABEL\\[[32..79\\]]->NXM_OF_ETH_DST\\[[\\]]" -c)
])
@@ -21770,7 +21770,7 @@ AT_CHECK([
grep "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" -c)
])
AT_CHECK([
- test 0 -eq $(as hv2 ovs-ofctl dump-flows br-int table=21 | \
+ test 0 -eq $(as hv2 ovs-ofctl dump-flows br-int table=22 | \
grep "priority=200" | \
grep "actions=move:NXM_NX_CT_LABEL\\[[32..79\\]]->NXM_OF_ETH_DST\\[[\\]]" -c)
])
@@ -22208,7 +22208,7 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int | grep "actions=controller" | grep
])
# The packet should've been dropped in the lr_in_arp_resolve stage.
-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int | grep -E "table=21, n_packets=1,.* priority=1,ip,metadata=0x${sw_key},nw_dst=10.0.1.1 actions=drop" -c], [0], [dnl
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int | grep -E "table=22, n_packets=1,.* priority=1,ip,metadata=0x${sw_key},nw_dst=10.0.1.1 actions=drop" -c], [0], [dnl
1
])
diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml
index e5a35f307..e6fec9980 100644
--- a/utilities/ovn-nbctl.8.xml
+++ b/utilities/ovn-nbctl.8.xml
@@ -739,7 +739,7 @@
<dl>
<dt>[<code>--may-exist</code>]<code>lr-policy-add</code>
<var>router</var> <var>priority</var> <var>match</var>
- <var>action</var> [<var>nexthop</var>]
+ <var>action</var> [<var>nexthop</var>[,<var>nexthop</var>,...]]
[<var>options key=value]</var>] </dt>
<dd>
<p>
@@ -748,10 +748,12 @@
are similar to OVN ACLs, but exist on the logical-router. Reroute
policies are needed for service-insertion and service-chaining.
<var>nexthop</var> is an optional parameter. It needs to be provided
- only when <var>action</var> is <var>reroute</var>. A policy is
- uniquely identified by <var>priority</var> and <var>match</var>.
- Multiple policies can have the same <var>priority</var>.
- <var>options</var> sets the router policy options as key-value pair.
+ only when <var>action</var> is <var>reroute</var>. Multiple
+ <code>nexthops</code> can be specified for ECMP routing.
+ A policy is uniquely identified by <var>priority</var> and
+ <var>match</var>. Multiple policies can have the same
+ <var>priority</var>. <var>options</var> sets the router policy
+ options as key-value pair.
The supported option is : <code>pkt_mark</code>.
</p>
diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
index 835161f25..94e7eedeb 100644
--- a/utilities/ovn-nbctl.c
+++ b/utilities/ovn-nbctl.c
@@ -766,7 +766,7 @@ Route commands:\n\
lr-route-list ROUTER print routes for ROUTER\n\
\n\
Policy commands:\n\
- lr-policy-add ROUTER PRIORITY MATCH ACTION [NEXTHOP] \
+ lr-policy-add ROUTER PRIORITY MATCH ACTION [NEXTHOP,[NEXTHOP,...]] \
[OPTIONS KEY=VALUE ...] \n\
add a policy to router\n\
lr-policy-del ROUTER [{PRIORITY | UUID} [MATCH]]\n\
@@ -3634,7 +3634,8 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
return;
}
const char *action = ctx->argv[4];
- char *next_hop = NULL;
+ size_t n_nexthops = 0;
+ char **nexthops = NULL;
bool reroute = false;
/* Validate action. */
@@ -3654,7 +3655,8 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
/* Check if same routing policy already exists.
* A policy is uniquely identified by priority and match */
bool may_exist = !!shash_find(&ctx->options, "--may-exist");
- for (int i = 0; i < lr->n_policies; i++) {
+ size_t i;
+ for (i = 0; i < lr->n_policies; i++) {
const struct nbrec_logical_router_policy *policy = lr->policies[i];
if (policy->priority == priority &&
!strcmp(policy->match, ctx->argv[3])) {
@@ -3665,12 +3667,53 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
return;
}
}
+
if (reroute) {
- next_hop = normalize_prefix_str(ctx->argv[5]);
- if (!next_hop) {
- ctl_error(ctx, "bad next hop argument: %s", ctx->argv[5]);
- return;
+ char *nexthops_arg = xstrdup(ctx->argv[5]);
+ char *save_ptr, *next_hop, *token;
+
+ n_nexthops = 0;
+ size_t n_allocs = 0;
+
+ bool nexthops_is_ipv4 = true;
+ for (token = strtok_r(nexthops_arg, ",", &save_ptr);
+ token != NULL; token = strtok_r(NULL, ",", &save_ptr)) {
+ next_hop = normalize_addr_str(token);
+
+ if (!next_hop) {
+ ctl_error(ctx, "bad next hop argument: %s", ctx->argv[5]);
+ free(nexthops_arg);
+ for (i = 0; i < n_nexthops; i++) {
+ free(nexthops[i]);
+ }
+ free(nexthops);
+ return;
+ }
+ if (n_nexthops == n_allocs) {
+ nexthops = x2nrealloc(nexthops, &n_allocs, sizeof *nexthops);
+ }
+
+ bool is_ipv4 = strchr(next_hop, '.') ? true : false;
+ if (n_nexthops == 0) {
+ nexthops_is_ipv4 = is_ipv4;
+ }
+
+ if (is_ipv4 != nexthops_is_ipv4) {
+ ctl_error(ctx, "bad next hops argument, not in the same "
+ "addr family : %s", ctx->argv[5]);
+ free(nexthops_arg);
+ free(next_hop);
+ for (i = 0; i < n_nexthops; i++) {
+ free(nexthops[i]);
+ }
+ free(nexthops);
+ return;
+ }
+ nexthops[n_nexthops] = next_hop;
+ n_nexthops++;
}
+
+ free(nexthops_arg);
}
struct nbrec_logical_router_policy *policy;
@@ -3679,12 +3722,13 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
nbrec_logical_router_policy_set_match(policy, ctx->argv[3]);
nbrec_logical_router_policy_set_action(policy, action);
if (reroute) {
- nbrec_logical_router_policy_set_nexthop(policy, next_hop);
+ nbrec_logical_router_policy_set_nexthops(
+ policy, (const char **)nexthops, n_nexthops);
}
/* Parse the options. */
struct smap options = SMAP_INITIALIZER(&options);
- for (size_t i = reroute ? 6 : 5; i < ctx->argc; i++) {
+ for (i = reroute ? 6 : 5; i < ctx->argc; i++) {
char *key, *value;
value = xstrdup(ctx->argv[i]);
key = strsep(&value, "=");
@@ -3694,7 +3738,10 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
ctl_error(ctx, "No value specified for the option : %s", key);
smap_destroy(&options);
free(key);
- free(next_hop);
+ for (i = 0; i < n_nexthops; i++) {
+ free(nexthops[i]);
+ }
+ free(nexthops);
return;
}
free(key);
@@ -3703,9 +3750,11 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
smap_destroy(&options);
nbrec_logical_router_update_policies_addvalue(lr, policy);
- if (next_hop != NULL) {
- free(next_hop);
+
+ for (i = 0; i < n_nexthops; i++) {
+ free(nexthops[i]);
}
+ free(nexthops);
}
static void
--
2.28.0