From dddd63bb72012f8b779e7935d3178cecf70620d0 Mon Sep 17 00:00:00 2001
From: Alfredo Moralejo
-- The DHCPv4 option code for this option is 66. --
--- The DHCPv4 option code for this option is 121. -@@ -2984,6 +2978,18 @@ -
--+ These options accept either an IPv4 address or a string value. -+
-+ -+-+ The DHCPv4 option code for this option is 66. -+
-+value: host_id
-+ This indicates that the value of the DHCP option is a host_id. -+ It can either be a host_name or an IP address. -+
-+ -+-+ Example. "name=tftp_server", "code=66", "type=host_id". -+
-+--ovn-controller-wrapper=WRAPPER
--ovn-ic-priority=NICE
--ovn-ic-wrapper=WRAPPER
--ovsdb-nb-wrapper=WRAPPER
--ovsdb-sb-wrapper=WRAPPER
--ovn-user=USER:GROUP
--ovs-user=USER:GROUP
-h
| --help
lr-policy-add
router priority
+- match action [nexthop]
++ --may-exist
]lr-policy-add
++ router priority match
++ action [nexthop]
+ [options key=value]]
+@@ -754,6 +755,13 @@
+ The supported option is : pkt_mark
.
+
++ If --may-exist
is specified, adding a duplicated
++ routing policy with the same priority and match string is not
++ really created. Without --may-exist
, adding a
++ duplicated routing policy results in error.
++
+ The following example shows a policy to lr1, which will drop packets
+ from192.168.100.0/24
.
+@@ -771,8 +779,8 @@
+
lr-policy-del
router [{priority | uuid}
+- [match]]--if-exists
] lr-policy-del
++ router [{priority | uuid} [match]]+ Deletes polices from router. If only router +@@ -784,7 +792,9 @@ + +
+ If router and uuid are supplied, then the
+- policy with sepcified uuid is deleted.
++ policy with sepcified uuid is deleted. It is an error if
++ uuid does not exist, unless --if-exists
++ is specified.
+
-+ These options accept string value which is a comma separated -+ list of domain names. The domain names are encoded based on RFC 1035. -+
-+ -+-+ The DHCPv4 option code for this option is 119. -+
-+value: domains
-+ This indicates that the value of the DHCP option is a domain name -+ or a comma separated list of domain names. -+
-+ -+-+ Example. "name=domain_search_list", "code=119", "type=domains". -+
-+-+ Marks the packet with the value specified when the router policy -+ is applied. CMS can inspect this packet marker and take some decisions -+ if desired. This value is not preserved when the packet goes out on the -+ wire. -+
-+xxreg0
xxreg1
inport
outport
flags.loopback
pkt.mark
eth.src
eth.dst
eth.type
vlan.tci
vlan.vid
vlan.pcp
vlan.present
ip.proto
ip.dscp
ip.ecn
ip.ttl
ip.frag
lr-policy-add
router priority
-- match action [nexthop]
- Add Policy to router which provides a way to configure
-@@ -732,6 +733,8 @@
- only when action is reroute. A policy is
- uniquely identified by priority and match.
- Multiple policies can have the same priority.
-+ options sets the router policy options as key-value pair.
-+ The supported option is : pkt_mark
.
-
-@@ -743,6 +746,12 @@
- lr-policy-add lr1 100 ip4.src == 192.168.100.0/24 drop
.
-
-+
-+ lr-policy-add lr1 100 ip4.src == 192.168.100.0/24 allow
-+ pkt_mark=100
-+
.
-+
lr-policy-del
router [{priority | uuid}
-diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
-index 159a44960..7578b9928 100644
---- a/utilities/ovn-nbctl.c
-+++ b/utilities/ovn-nbctl.c
-@@ -694,7 +694,8 @@ Route commands:\n\
- lr-route-list ROUTER print routes for ROUTER\n\
- \n\
- Policy commands:\n\
-- lr-policy-add ROUTER PRIORITY MATCH ACTION [NEXTHOP]\n\
-+ lr-policy-add ROUTER PRIORITY MATCH ACTION [NEXTHOP] \
-+[OPTIONS KEY=VALUE ...] \n\
- add a policy to router\n\
- lr-policy-del ROUTER [{PRIORITY | UUID} [MATCH]]\n\
- remove policies from ROUTER\n\
-@@ -3609,16 +3610,19 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
- const char *action = ctx->argv[4];
- char *next_hop = NULL;
-
-+ bool reroute = false;
- /* Validate action. */
- if (strcmp(action, "allow") && strcmp(action, "drop")
- && strcmp(action, "reroute")) {
- ctl_error(ctx, "%s: action must be one of \"allow\", \"drop\", "
- "and \"reroute\"", action);
-+ return;
- }
- if (!strcmp(action, "reroute")) {
- if (ctx->argc < 6) {
- ctl_error(ctx, "Nexthop is required when action is reroute.");
- }
-+ reroute = true;
- }
-
- /* Check if same routing policy already exists.
-@@ -3629,12 +3633,14 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
- !strcmp(policy->match, ctx->argv[3])) {
- ctl_error(ctx, "Same routing policy already existed on the "
- "logical router %s.", ctx->argv[1]);
-+ return;
- }
- }
-- if (ctx->argc == 6) {
-+ if (reroute) {
- next_hop = normalize_prefix_str(ctx->argv[5]);
- if (!next_hop) {
- ctl_error(ctx, "bad next hop argument: %s", ctx->argv[5]);
-+ return;
- }
- }
-
-@@ -3643,9 +3649,28 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
- nbrec_logical_router_policy_set_priority(policy, priority);
- nbrec_logical_router_policy_set_match(policy, ctx->argv[3]);
- nbrec_logical_router_policy_set_action(policy, action);
-- if (ctx->argc == 6) {
-+ if (reroute) {
- nbrec_logical_router_policy_set_nexthop(policy, next_hop);
- }
-+
-+ /* Parse the options. */
-+ struct smap options = SMAP_INITIALIZER(&options);
-+ for (size_t i = reroute ? 6 : 5; i < ctx->argc; i++) {
-+ char *key, *value;
-+ value = xstrdup(ctx->argv[i]);
-+ key = strsep(&value, "=");
-+ if (value && value[0]) {
-+ smap_add(&options, key, value);
-+ } else {
-+ ctl_error(ctx, "No value specified for the option : %s", key);
-+ free(key);
-+ return;
-+ }
-+ free(key);
-+ }
-+ nbrec_logical_router_policy_set_options(policy, &options);
-+ smap_destroy(&options);
-+
- nbrec_logical_router_verify_policies(lr);
- struct nbrec_logical_router_policy **new_policies
- = xmalloc(sizeof *new_policies * (lr->n_policies + 1));
-@@ -3773,6 +3798,16 @@ print_routing_policy(const struct nbrec_logical_router_policy *policy,
- ds_put_format(s, "%10"PRId64" %50s %15s", policy->priority,
- policy->match, policy->action);
- }
-+
-+ if (!smap_is_empty(&policy->options)) {
-+ ds_put_format(s, "%15s", "");
-+ struct smap_node *node;
-+ SMAP_FOR_EACH (node, &policy->options) {
-+ ds_put_format(s, "%s=%s,", node->key, node->value);
-+ }
-+ ds_chomp(s, ',');
-+ }
-+
- ds_put_char(s, '\n');
- }
-
-@@ -3788,7 +3823,7 @@ nbctl_lr_policy_list(struct ctl_context *ctx)
- return;
- }
- policies = xmalloc(sizeof *policies * lr->n_policies);
-- for (int i = 0; i < lr->n_policies; i++) {
-+ for (int i = 0; i < lr->n_policies; i++) {
- const struct nbrec_logical_router_policy *policy
- = lr->policies[i];
- policies[n_policies].priority = policy->priority;
-@@ -6362,8 +6397,9 @@ static const struct ctl_command_syntax nbctl_commands[] = {
- "", RO },
-
- /* Policy commands */
-- { "lr-policy-add", 4, 5, "ROUTER PRIORITY MATCH ACTION [NEXTHOP]", NULL,
-- nbctl_lr_policy_add, NULL, "", RW },
-+ { "lr-policy-add", 4, INT_MAX,
-+ "ROUTER PRIORITY MATCH ACTION [NEXTHOP] [OPTIONS - KEY=VALUE ...]",
-+ NULL, nbctl_lr_policy_add, NULL, "", RW },
- { "lr-policy-del", 1, 3, "ROUTER [{PRIORITY | UUID} [MATCH]]", NULL,
- nbctl_lr_policy_del, NULL, "", RW },
- { "lr-policy-list", 1, 1, "ROUTER", NULL, nbctl_lr_policy_list, NULL,
---
-2.26.2
-
diff --git a/SOURCES/0004-ovsdb-idl-Add-function-to-reset-min_index.patch b/SOURCES/0004-ovsdb-idl-Add-function-to-reset-min_index.patch
new file mode 100644
index 0000000..656edca
--- /dev/null
+++ b/SOURCES/0004-ovsdb-idl-Add-function-to-reset-min_index.patch
@@ -0,0 +1,68 @@
+From 1de31c3a531f5db6793819fa18f6e69304db929c Mon Sep 17 00:00:00 2001
+From: Mark Michelson icmp6 { action;
... };
icmp6_error { action;
... };
-+ - Temporarily replaces the IPv6 packet being processed by an ICMPv6 -@@ -2112,6 +2115,11 @@ -
icmp6.code = 1
(administratively prohibited)
-+ icmp6_error
action is expected to be used to
-+ generate an ICMPv6 packet in response to an error in original
-+ IPv6 packet.
-+
Prerequisite: ip6
icmp4.frag_mtu
-+ icmp6.frag_mtu
- -- This field sets the low-order 16 bits of the ICMP4 header field -- that is labelled "unused" in the ICMP specification as defined -- in the RFC 1191 with the value specified in -+ This field sets the low-order 16 bits of the ICMP{4,6} header -+ field that is labelled "unused" in the ICMP specification as -+ defined in the RFC 1191 with the value specified in - constant. -
- -diff --git a/tests/ovn.at b/tests/ovn.at -index cfcfa0915..80cd62c49 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -1557,6 +1557,14 @@ icmp6_error { }; - encodes as controller(userdata=00.00.00.14.00.00.00.00) - has prereqs ip6 - -+# icmp6_error with icmp6.frag_mtu -+icmp6_error { eth.dst = ff:ff:ff:ff:ff:ff; icmp6.frag_mtu = 1500; output; }; output; -+ encodes as controller(userdata=00.00.00.14.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.28.00.00.23.20.00.25.00.00.00.00.00.00.00.03.00.10.00.00.00.15.00.00.00.00.00.00.05.dc.00.04.00.04.00.00.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64) -+ has prereqs ip6 -+ -+icmp6.frag_mtu = 1500; -+ encodes as controller(userdata=00.00.00.15.00.00.00.00.00.00.05.dc,pause) -+ - # tcp_reset - tcp_reset { eth.dst = ff:ff:ff:ff:ff:ff; output; }; output; - encodes as controller(userdata=00.00.00.0b.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64) -diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c -index c34517aaa..50a32b714 100644 ---- a/utilities/ovn-trace.c -+++ b/utilities/ovn-trace.c -@@ -2119,7 +2119,12 @@ execute_ovnfield_load(const struct ovnact_load *load, - ntohs(load->imm.value.be16_int)); - break; - } -- -+ case OVN_ICMP6_FRAG_MTU: { -+ ovntrace_node_append(super, OVNTRACE_NODE_MODIFY, -+ "icmp6.frag_mtu = %u", -+ ntohs(load->imm.value.be16_int)); -+ break; -+ } - case OVN_FIELD_N_IDS: - default: - OVS_NOT_REACHED(); --- -2.26.2 - diff --git a/SOURCES/0008-northd-introduce-icmp6_error-logical-flows-in-router.patch b/SOURCES/0008-northd-introduce-icmp6_error-logical-flows-in-router.patch deleted file mode 100644 index 246780e..0000000 --- a/SOURCES/0008-northd-introduce-icmp6_error-logical-flows-in-router.patch +++ /dev/null @@ -1,263 +0,0 @@ -From a1a0c7061850d78edb74a7977d0241121575be0e Mon Sep 17 00:00:00 2001 -From: Lorenzo Bianconioptions:gateway_mtu
to a valid integer value, this
- table adds the following priority-50 logical flow for each
-- logical router port with the match ip4 &&
-- inport == LRP && outport == GW_PORT
-- && REGBIT_PKT_LARGER
, where LRP is the logical
-- router port and GW_PORT is the distributed gateway router port
-- and applies the following action
-+ logical router port with the match inport == LRP
-+ && outport == GW_PORT &&
-+ REGBIT_PKT_LARGER
, where LRP is the logical
-+ router port and GW_PORT is the distributed gateway router
-+ port and applies the following action for ipv4 and ipv6 respectively:
-
-
- -@@ -2941,6 +2941,18 @@ icmp4 { - REGBIT_EGRESS_LOOPBACK = 1; - next(pipeline=ingress, table=0); - }; -+ -+icmp6 { -+ icmp6.type = 2; -+ icmp6.code = 0; -+ icmp6.frag_mtu = M; -+ eth.dst = E; -+ ip6.dst = ip6.src; -+ ip6.src = I; -+ ip.ttl = 255; -+ REGBIT_EGRESS_LOOPBACK = 1; -+ next(pipeline=ingress, table=0); -+}; -- -
ct_commit;
ct_commit(ct_mark=value[/mask]);
ct_commit(ct_label=value[/mask]);
ct_commit(ct_mark=value[/mask], ct_label=value[/mask]);
ct_commit { };
ct_commit { ct_mark=value[/mask]; };
ct_commit { ct_label=value[/mask]; };
ct_commit { ct_mark=value[/mask]; ct_label=value[/mask]; };
- Commit the flow to the connection tracking entry associated with it
-@@ -1276,6 +1276,9 @@
- tracking entry. ct_mark
is a 32-bit field.
- ct_label
is a 128-bit field. The value[/mask]
- should be specified in hex string if more than 64bits are to be used.
-+ Registers and other named fields can be used for value.
-+ ct_mark
and ct_label
may be sub-addressed
-+ in order to have specific bits set.
-
-diff --git a/tests/ovn.at b/tests/ovn.at
-index 905112a8d..4c68b77d8 100644
---- a/tests/ovn.at
-+++ b/tests/ovn.at
-@@ -1045,51 +1045,60 @@ ct_next;
- has prereqs ip
-
- # ct_commit
--ct_commit;
-+ct_commit { };
-+ formats as ct_commit { drop; };
- encodes as ct(commit,zone=NXM_NX_REG13[0..15])
- has prereqs ip
--ct_commit();
-- formats as ct_commit;
-- encodes as ct(commit,zone=NXM_NX_REG13[0..15])
-- has prereqs ip
--ct_commit(ct_mark=1);
-- formats as ct_commit(ct_mark=0x1);
-+ct_commit { ct_mark=1; };
-+ formats as ct_commit { ct_mark = 1; };
- encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0x1->ct_mark))
- has prereqs ip
--ct_commit(ct_mark=1/1);
-- formats as ct_commit(ct_mark=0x1/0x1);
-+ct_commit { ct_mark=1/1; };
-+ formats as ct_commit { ct_mark = 1/1; };
- encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0x1/0x1->ct_mark))
- has prereqs ip
--ct_commit(ct_label=1);
-- formats as ct_commit(ct_label=0x1);
-+ct_commit { ct_label=1; };
-+ formats as ct_commit { ct_label = 1; };
- encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0x1->ct_label))
- has prereqs ip
--ct_commit(ct_label=1/1);
-- formats as ct_commit(ct_label=0x1/0x1);
-+ct_commit { ct_label=1/1; };
-+ formats as ct_commit { ct_label = 1/1; };
- encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0x1/0x1->ct_label))
- has prereqs ip
--ct_commit(ct_mark=1, ct_label=2);
-- formats as ct_commit(ct_mark=0x1, ct_label=0x2);
-+ct_commit { ct_mark=1; ct_label=2; };
-+ formats as ct_commit { ct_mark = 1; ct_label = 2; };
- encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0x1->ct_mark,set_field:0x2->ct_label))
- has prereqs ip
-
--ct_commit(ct_label=0x01020304050607080910111213141516);
-- formats as ct_commit(ct_label=0x1020304050607080910111213141516);
-+ct_commit { ct_label=0x01020304050607080910111213141516; };
-+ formats as ct_commit { ct_label = 0x1020304050607080910111213141516; };
- encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0x1020304050607080910111213141516->ct_label))
- has prereqs ip
--ct_commit(ct_label=0x181716151413121110090807060504030201);
-- formats as ct_commit(ct_label=0x16151413121110090807060504030201);
-- encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0x16151413121110090807060504030201->ct_label))
-- has prereqs ip
--ct_commit(ct_label=0x1000000000000000000000000000000/0x1000000000000000000000000000000);
-+ct_commit { ct_label=0x1000000000000000000000000000000/0x1000000000000000000000000000000; };
-+ formats as ct_commit { ct_label = 0x1000000000000000000000000000000/0x1000000000000000000000000000000; };
- encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0x1000000000000000000000000000000/0x1000000000000000000000000000000->ct_label))
- has prereqs ip
--ct_commit(ct_label=18446744073709551615);
-- formats as ct_commit(ct_label=0xffffffffffffffff);
-+ct_commit { ct_label=18446744073709551615; };
-+ formats as ct_commit { ct_label = 18446744073709551615; };
- encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0xffffffffffffffff->ct_label))
- has prereqs ip
--ct_commit(ct_label=18446744073709551616);
-+ct_commit { ct_label[0..47] = 0x00000f040201; ct_label[48..63] = 0x0002; };
-+ formats as ct_commit { ct_label[0..47] = 0xf040201; ct_label[48..63] = 0x2; };
-+ encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0xf040201/0xffffffffffff->ct_label,set_field:0x2000000000000/0xffff000000000000->ct_label))
-+ has prereqs ip
-+ct_commit { ct_label=18446744073709551616; };
- Decimal constants must be less than 2**64.
-+ct_commit { ct_label=0x181716151413121110090807060504030201; };
-+ 141-bit constant is not compatible with 128-bit field ct_label.
-+ct_commit { ip4.dst = 192.168.0.1; };
-+ Field ip4.dst is not modifiable.
-+
-+ct_mark = 12345
-+ Field ct_mark is not modifiable.
-+ct_label = 0xcafe
-+ Field ct_label is not modifiable.
-+ct_label.blocked = 1/1
-+ Field ct_label.blocked is not modifiable.
-
- # ct_dnat
- ct_dnat;
---
-2.26.2
-
diff --git a/SOURCES/0013-Add-ECMP-symmetric-replies.patch b/SOURCES/0013-Add-ECMP-symmetric-replies.patch
deleted file mode 100644
index 6a0a858..0000000
--- a/SOURCES/0013-Add-ECMP-symmetric-replies.patch
+++ /dev/null
@@ -1,922 +0,0 @@
-From 750e47ec508977af7bb37e9d0c98dd13984e9002 Mon Sep 17 00:00:00 2001
-From: Mark Michelson
- This is to send packets to connection tracker for tracking and
- defragmentation. It contains a priority-0 flow that simply moves traffic
-- to the next table. If load balancing rules with virtual IP addresses
-- (and ports) are configured in
-+ If load balancing rules with virtual IP addresses (and ports) are
-+ configured in
-+ If ECMP routes with symmetric reply are configured in the
-+ OVN_Northbound
database for a
-- Gateway router, a priority-100 flow is added for each configured virtual
-- IP address VIP. For IPv4 VIPs the flow matches
-- ip && ip4.dst == VIP
. For IPv6
-- VIPs, the flow matches ip && ip6.dst ==
-- VIP
. The flow uses the action ct_next;
-- to send IP packets to the connection tracker for packet de-fragmentation
-- and tracking before sending it to the next table.
-+ to the next table.
-+ OVN_Northbound
database for a Gateway router,
-+ a priority-100 flow is added for each configured virtual IP address
-+ VIP. For IPv4 VIPs the flow matches ip
-+ && ip4.dst == VIP
. For IPv6 VIPs,
-+ the flow matches ip && ip6.dst == VIP
.
-+ The flow uses the action ct_next;
to send IP packets to the
-+ connection tracker for packet de-fragmentation and tracking before
-+ sending it to the next table.
-+ OVN_Northbound
database for a gateway router, a priority-100
-+ flow is added for each router port on which symmetric replies are
-+ configured. The matching logic for these ports essentially reverses the
-+ configured logic of the ECMP route. So for instance, a route with a
-+ destination routing policy will instead match if the source IP address
-+ matches the static route's prefix. The flow uses the action
-+ ct_next
to send IP packets to the connection tracker for
-+ packet de-fragmentation and tracking before sending it to the next table.
- Ingress Table 5: UNSNAT
-@@ -2544,7 +2560,15 @@ output;
- table. This table, instead, is responsible for determine the ECMP
- group id and select a member id within the group based on 5-tuple
- hashing. It stores group id in reg8[0..15]
and member id in
-- reg8[16..31]
.
-+ reg8[16..31]
. This step is skipped if the traffic going
-+ out the ECMP route is reply traffic, and the ECMP route was configured
-+ to use symmetric replies. Instead, the stored ct_label
value
-+ is used to choose the destination. The least significant 48 bits of the
-+ ct_label
tell the destination MAC address to which the
-+ packet should be sent. The next 16 bits tell the logical router port on
-+ which the packet should be sent. These values in the
-+ ct_label
are set when the initial ingress traffic is
-+ received over the ECMP route.
-
-@@ -2694,6 +2718,11 @@ select(reg8[16..31], MID1, MID2, ...);
- address and reg1
as the source protocol address).
-
-+ This processing is skipped for reply traffic being sent out of an ECMP -+ route if the route was configured to use symmetric replies. -+
-+ -- This table contains the following logical flows: -
-diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index 44e7d9365..cb8e25bdf 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -171,16 +171,17 @@ enum ovn_stage { - PIPELINE_STAGE(ROUTER, IN, DEFRAG, 4, "lr_in_defrag") \ - PIPELINE_STAGE(ROUTER, IN, UNSNAT, 5, "lr_in_unsnat") \ - PIPELINE_STAGE(ROUTER, IN, DNAT, 6, "lr_in_dnat") \ -- PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 7, "lr_in_nd_ra_options") \ -- PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 8, "lr_in_nd_ra_response") \ -- PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 9, "lr_in_ip_routing") \ -- PIPELINE_STAGE(ROUTER, IN, IP_ROUTING_ECMP, 10, "lr_in_ip_routing_ecmp") \ -- PIPELINE_STAGE(ROUTER, IN, POLICY, 11, "lr_in_policy") \ -- PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 12, "lr_in_arp_resolve") \ -- PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN , 13, "lr_in_chk_pkt_len") \ -- PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 14,"lr_in_larger_pkts") \ -- PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 15, "lr_in_gw_redirect") \ -- PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 16, "lr_in_arp_request") \ -+ PIPELINE_STAGE(ROUTER, IN, ECMP_STATEFUL, 7, "lr_in_ecmp_stateful") \ -+ PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 8, "lr_in_nd_ra_options") \ -+ PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 9, "lr_in_nd_ra_response") \ -+ PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 10, "lr_in_ip_routing") \ -+ PIPELINE_STAGE(ROUTER, IN, IP_ROUTING_ECMP, 11, "lr_in_ip_routing_ecmp") \ -+ PIPELINE_STAGE(ROUTER, IN, POLICY, 12, "lr_in_policy") \ -+ PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 13, "lr_in_arp_resolve") \ -+ PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN , 14, "lr_in_chk_pkt_len") \ -+ PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 15,"lr_in_larger_pkts") \ -+ PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 16, "lr_in_gw_redirect") \ -+ PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 17, "lr_in_arp_request") \ - \ - /* Logical router egress stages. */ \ - PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \ -@@ -7430,6 +7431,7 @@ struct parsed_route { - bool is_src_route; - uint32_t hash; - const struct nbrec_logical_router_static_route *route; -+ bool ecmp_symmetric_reply; - }; - - static uint32_t -@@ -7491,6 +7493,8 @@ parsed_routes_add(struct ovs_list *routes, - "src-ip")); - pr->hash = route_hash(pr); - pr->route = route; -+ pr->ecmp_symmetric_reply = smap_get_bool(&route->options, -+ "ecmp_symmetric_reply", false); - ovs_list_insert(routes, &pr->list_node); - return pr; - } -@@ -7739,18 +7743,95 @@ find_static_route_outport(struct ovn_datapath *od, struct hmap *ports, - return true; - } - -+static void -+add_ecmp_symmetric_reply_flows(struct hmap *lflows, -+ struct ovn_datapath *od, -+ const char *port_ip, -+ struct ovn_port *out_port, -+ const struct parsed_route *route, -+ struct ds *route_match) -+{ -+ const struct nbrec_logical_router_static_route *st_route = route->route; -+ struct ds match = DS_EMPTY_INITIALIZER; -+ struct ds actions = DS_EMPTY_INITIALIZER; -+ struct ds ecmp_reply = DS_EMPTY_INITIALIZER; -+ char *cidr = normalize_v46_prefix(&route->prefix, route->plen); -+ -+ /* If symmetric ECMP replies are enabled, then packets that arrive over -+ * an ECMP route need to go through conntrack. -+ */ -+ ds_put_format(&match, "inport == %s && ip%s.%s == %s", -+ out_port->json_key, -+ route->prefix.family == AF_INET ? "4" : "6", -+ route->is_src_route ? "dst" : "src", -+ cidr); -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, 100, -+ ds_cstr(&match), "ct_next;", -+ &st_route->header_); -+ -+ /* And packets that go out over an ECMP route need conntrack */ -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, 100, -+ ds_cstr(route_match), "ct_next;", -+ &st_route->header_); -+ -+ /* Save src eth and inport in ct_label for packets that arrive over -+ * an ECMP route. -+ * -+ * NOTE: we purposely are not clearing match before this -+ * ds_put_cstr() call. The previous contents are needed. -+ */ -+ ds_put_cstr(&match, " && (ct.new && !ct.est)"); -+ -+ ds_put_format(&actions, "ct_commit { ct_label.ecmp_reply_eth = eth.src;" -+ " ct_label.ecmp_reply_port = %" PRId64 ";}; next;", -+ out_port->sb->tunnel_key); -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 100, -+ ds_cstr(&match), ds_cstr(&actions), -+ &st_route->header_); -+ -+ /* Bypass ECMP selection if we already have ct_label information -+ * for where to route the packet. -+ */ -+ ds_put_format(&ecmp_reply, "ct.rpl && ct_label.ecmp_reply_port == %" -+ PRId64, out_port->sb->tunnel_key); -+ ds_clear(&match); -+ ds_put_format(&match, "%s && %s", ds_cstr(&ecmp_reply), -+ ds_cstr(route_match)); -+ ds_clear(&actions); -+ ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; " -+ "eth.src = %s; %sreg1 = %s; outport = %s; next;", -+ out_port->lrp_networks.ea_s, -+ route->prefix.family == AF_INET ? "" : "xx", -+ port_ip, out_port->json_key); -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 100, -+ ds_cstr(&match), ds_cstr(&actions), -+ &st_route->header_); -+ -+ /* Egress reply traffic for symmetric ECMP routes skips router policies. */ -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY, 65535, -+ ds_cstr(&ecmp_reply), "next;", -+ &st_route->header_); -+ -+ ds_clear(&actions); -+ ds_put_cstr(&actions, "eth.dst = ct_label.ecmp_reply_eth; next;"); -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ARP_RESOLVE, -+ 200, ds_cstr(&ecmp_reply), -+ ds_cstr(&actions), &st_route->header_); -+} -+ - static void - build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od, - struct hmap *ports, struct ecmp_groups_node *eg) - - { - bool is_ipv4 = (eg->prefix.family == AF_INET); -- struct ds match = DS_EMPTY_INITIALIZER; - uint16_t priority; -+ struct ecmp_route_list_node *er; -+ struct ds route_match = DS_EMPTY_INITIALIZER; - - char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen); - build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route, is_ipv4, -- &match, &priority); -+ &route_match, &priority); - free(prefix_s); - - struct ds actions = DS_EMPTY_INITIALIZER; -@@ -7758,7 +7839,6 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od, - "; %s = select(", REG_ECMP_GROUP_ID, eg->id, - REG_ECMP_MEMBER_ID); - -- struct ecmp_route_list_node *er; - bool is_first = true; - LIST_FOR_EACH (er, list_node, &eg->route_list) { - if (is_first) { -@@ -7772,11 +7852,12 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od, - ds_put_cstr(&actions, ");"); - - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, priority, -- ds_cstr(&match), ds_cstr(&actions)); -+ ds_cstr(&route_match), ds_cstr(&actions)); - - /* Add per member flow */ -+ struct ds match = DS_EMPTY_INITIALIZER; -+ struct sset visited_ports = SSET_INITIALIZER(&visited_ports); - LIST_FOR_EACH (er, list_node, &eg->route_list) { -- - const struct parsed_route *route_ = er->route; - const struct nbrec_logical_router_static_route *route = route_->route; - /* Find the outgoing port. */ -@@ -7786,6 +7867,15 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od, - &out_port)) { - continue; - } -+ /* Symmetric ECMP reply is only usable on gateway routers. -+ * It is NOT usable on distributed routers with a gateway port. -+ */ -+ if (smap_get(&od->nbr->options, "chassis") && -+ route_->ecmp_symmetric_reply && sset_add(&visited_ports, -+ out_port->key)) { -+ add_ecmp_symmetric_reply_flows(lflows, od, lrp_addr_s, out_port, -+ route_, &route_match); -+ } - ds_clear(&match); - ds_put_format(&match, REG_ECMP_GROUP_ID" == %"PRIu16" && " - REG_ECMP_MEMBER_ID" == %"PRIu16, -@@ -7806,7 +7896,9 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od, - ds_cstr(&match), ds_cstr(&actions), - &route->header_); - } -+ sset_destroy(&visited_ports); - ds_destroy(&match); -+ ds_destroy(&route_match); - ds_destroy(&actions); - } - -@@ -9161,6 +9253,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;"); - ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;"); - ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;"); -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;"); - - /* Send the IPv6 NS packets to next table. When ovn-controller - * generates IPv6 NS (for the action - nd_ns{}), the injected -diff --git a/ovn-architecture.7.xml b/ovn-architecture.7.xml -index 246cebc19..b1a462933 100644 ---- a/ovn-architecture.7.xml -+++ b/ovn-architecture.7.xml -@@ -1210,11 +1210,12 @@ -