7f7b2c
From c41499498db66fbe54155206a513e418d75f8d49 Mon Sep 17 00:00:00 2001
7f7b2c
From: Numan Siddique <numans@ovn.org>
7f7b2c
Date: Wed, 2 Dec 2020 23:57:56 +0530
7f7b2c
Subject: [PATCH 5/7] northd: Add ECMP support to router policies.
7f7b2c
7f7b2c
A user can add a policy now like:
7f7b2c
7f7b2c
ovn-nbctl lr-policy-add <LR> 100 "ip4.src == 10.0.0.4" reroute 172.0.0.5,172.0.0.6
7f7b2c
7f7b2c
We do have ECMP support for logical router static routes, but since
7f7b2c
policies are applied after the routing stage, ECMP support for
7f7b2c
policies is desired by ovn-kubernetes.
7f7b2c
7f7b2c
A new column 'nexthops' is added to the Logical_Router_Policy table
7f7b2c
instead of modifying the existing column 'nexthop' to preserve
7f7b2c
backward compatibility and avoid any upgrade issues.
7f7b2c
7f7b2c
Change-Id: Ib5723d1de30f0ad86ee740bb4e3b593f1cca98eb
7f7b2c
Requested-by: Alexander Constantinescu <aconstan@redhat.com>
7f7b2c
Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1881826
7f7b2c
Acked-by: Mark Michelson <mmichels@redhat.com>
7f7b2c
Signed-off-by: Numan Siddique <numans@ovn.org>
7f7b2c
---
7f7b2c
 northd/ovn-northd.8.xml   |  80 +++++++++++++++++++--
7f7b2c
 northd/ovn-northd.c       | 148 ++++++++++++++++++++++++++++++++++----
7f7b2c
 ovn-nb.ovsschema          |   6 +-
7f7b2c
 ovn-nb.xml                |  18 ++++-
7f7b2c
 tests/ovn-northd.at       | 124 ++++++++++++++++++++++++++++++++
7f7b2c
 tests/ovn.at              |  16 ++---
7f7b2c
 utilities/ovn-nbctl.8.xml |  12 ++--
7f7b2c
 utilities/ovn-nbctl.c     |  73 +++++++++++++++----
7f7b2c
 8 files changed, 429 insertions(+), 48 deletions(-)
7f7b2c
7f7b2c
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
7f7b2c
index d86f36ea6..1f0f71f34 100644
7f7b2c
--- a/northd/ovn-northd.8.xml
7f7b2c
+++ b/northd/ovn-northd.8.xml
7f7b2c
@@ -3041,14 +3041,36 @@ outport = P;
7f7b2c
 
7f7b2c
       
  • 7f7b2c
             

    7f7b2c
    -          If the policy action is reroute, then the logical
    7f7b2c
    -          flow is added with the following actions:
    7f7b2c
    +          If the policy action is reroute with 2 or more nexthops
    7f7b2c
    +          defined, then the logical flow is added with the following actions:
    7f7b2c
    +        

    7f7b2c
    +
    7f7b2c
    +         
    7f7b2c
    +reg8[0..15] = GID;
    7f7b2c
    +reg8[16..31] = select(1,..n);
    7f7b2c
    +        
    7f7b2c
    +
    7f7b2c
    +        

    7f7b2c
    +          where GID is the ECMP group id generated by
    7f7b2c
    +          ovn-northd for this policy and n
    7f7b2c
    +          is the number of nexthops. select action
    7f7b2c
    +          selects one of the nexthop member id, stores it in the register
    7f7b2c
    +          reg8[16..31] and advances the packet to the
    7f7b2c
    +          next stage.
    7f7b2c
    +        

    7f7b2c
    +      
    7f7b2c
    +
    7f7b2c
    +      
  • 7f7b2c
    +        

    7f7b2c
    +          If the policy action is reroute with just one nexhop,
    7f7b2c
    +          then the logical flow is added with the following actions:
    7f7b2c
             

    7f7b2c
     
    7f7b2c
              
    7f7b2c
     [xx]reg0 = H;
    7f7b2c
     eth.src = E;
    7f7b2c
     outport = P;
    7f7b2c
    +reg8[0..15] = 0;
    7f7b2c
     flags.loopback = 1;
    7f7b2c
     next;
    7f7b2c
             
    7f7b2c
    @@ -3072,7 +3094,51 @@ next;
    7f7b2c
           
    7f7b2c
         
    7f7b2c
     
    7f7b2c
    -    

    Ingress Table 13: ARP/ND Resolution

    7f7b2c
    +    

    Ingress Table 13: ECMP handling for router policies

    7f7b2c
    +    

    7f7b2c
    +      This table handles the ECMP for the router policies configured
    7f7b2c
    +      with multiple nexthops.
    7f7b2c
    +    

    7f7b2c
    +
    7f7b2c
    +    
      7f7b2c
      +      
    • 7f7b2c
      +        

      7f7b2c
      +          A priority-150 flow is added to advance the packet to the next stage
      7f7b2c
      +          if the ECMP group id register reg8[0..15] is 0.
      7f7b2c
      +        

      7f7b2c
      +      
      7f7b2c
      +
      7f7b2c
      +      
    • 7f7b2c
      +        

      7f7b2c
      +          For each ECMP reroute router policy with multiple nexthops,
      7f7b2c
      +          a priority-100 flow is added for each nexthop H
      7f7b2c
      +          with the match reg8[0..15] == GID &&
      7f7b2c
      +          reg8[16..31] == M where GID
      7f7b2c
      +          is the router policy group id generated by ovn-northd
      7f7b2c
      +          and M is the member id of the nexthop H
      7f7b2c
      +          generated by ovn-northd. The following actions are added
      7f7b2c
      +          to the flow:
      7f7b2c
      +        

      7f7b2c
      +
      7f7b2c
      +        
      7f7b2c
      +[xx]reg0 = H;
      7f7b2c
      +eth.src = E;
      7f7b2c
      +outport = P
      7f7b2c
      +"flags.loopback = 1; "
      7f7b2c
      +"next;"
      7f7b2c
      +        
      7f7b2c
      +
      7f7b2c
      +        

      7f7b2c
      +          where H is the nexthop  defined in the
      7f7b2c
      +          router policy, E is the ethernet address of the
      7f7b2c
      +          logical router port from which the nexthop is
      7f7b2c
      +          reachable and P is the logical router port from
      7f7b2c
      +          which the nexthop is reachable.
      7f7b2c
      +        

      7f7b2c
      +      
      7f7b2c
      +    
      7f7b2c
      +
      7f7b2c
      +    

      Ingress Table 14: ARP/ND Resolution

      7f7b2c
       
      7f7b2c
           

      7f7b2c
             Any packet that reaches this table is an IP packet whose next-hop
      7f7b2c
      @@ -3258,7 +3324,7 @@ next;
      7f7b2c
       
      7f7b2c
           
      7f7b2c
       
      7f7b2c
      -    

      Ingress Table 14: Check packet length

      7f7b2c
      +    

      Ingress Table 15: Check packet length

      7f7b2c
       
      7f7b2c
           

      7f7b2c
             For distributed logical routers with distributed gateway port configured
      7f7b2c
      @@ -3288,7 +3354,7 @@ REGBIT_PKT_LARGER = check_pkt_larger(L); next;
      7f7b2c
             and advances to the next table.
      7f7b2c
           

      7f7b2c
       
      7f7b2c
      -    

      Ingress Table 15: Handle larger packets

      7f7b2c
      +    

      Ingress Table 16: Handle larger packets

      7f7b2c
       
      7f7b2c
           

      7f7b2c
             For distributed logical routers with distributed gateway port configured
      7f7b2c
      @@ -3349,7 +3415,7 @@ icmp6 {
      7f7b2c
             and advances to the next table.
      7f7b2c
           

      7f7b2c
       
      7f7b2c
      -    

      Ingress Table 16: Gateway Redirect

      7f7b2c
      +    

      Ingress Table 17: Gateway Redirect

      7f7b2c
       
      7f7b2c
           

      7f7b2c
             For distributed logical routers where one of the logical router
      7f7b2c
      @@ -3389,7 +3455,7 @@ icmp6 {
      7f7b2c
             
      7f7b2c
           
      7f7b2c
       
      7f7b2c
      -    

      Ingress Table 17: ARP Request

      7f7b2c
      +    

      Ingress Table 18: ARP Request

      7f7b2c
       
      7f7b2c
           

      7f7b2c
             In the common case where the Ethernet destination has been resolved, this
      7f7b2c
      diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
      7f7b2c
      index 478f1a339..dfd7d69d0 100644
      7f7b2c
      --- a/northd/ovn-northd.c
      7f7b2c
      +++ b/northd/ovn-northd.c
      7f7b2c
      @@ -188,11 +188,12 @@ enum ovn_stage {
      7f7b2c
           PIPELINE_STAGE(ROUTER, IN,  IP_ROUTING,      10, "lr_in_ip_routing")   \
      7f7b2c
           PIPELINE_STAGE(ROUTER, IN,  IP_ROUTING_ECMP, 11, "lr_in_ip_routing_ecmp") \
      7f7b2c
           PIPELINE_STAGE(ROUTER, IN,  POLICY,          12, "lr_in_policy")       \
      7f7b2c
      -    PIPELINE_STAGE(ROUTER, IN,  ARP_RESOLVE,     13, "lr_in_arp_resolve")  \
      7f7b2c
      -    PIPELINE_STAGE(ROUTER, IN,  CHK_PKT_LEN   ,  14, "lr_in_chk_pkt_len")   \
      7f7b2c
      -    PIPELINE_STAGE(ROUTER, IN,  LARGER_PKTS,     15,"lr_in_larger_pkts")   \
      7f7b2c
      -    PIPELINE_STAGE(ROUTER, IN,  GW_REDIRECT,     16, "lr_in_gw_redirect")  \
      7f7b2c
      -    PIPELINE_STAGE(ROUTER, IN,  ARP_REQUEST,     17, "lr_in_arp_request")  \
      7f7b2c
      +    PIPELINE_STAGE(ROUTER, IN,  POLICY_ECMP,     13, "lr_in_policy_ecmp")  \
      7f7b2c
      +    PIPELINE_STAGE(ROUTER, IN,  ARP_RESOLVE,     14, "lr_in_arp_resolve")  \
      7f7b2c
      +    PIPELINE_STAGE(ROUTER, IN,  CHK_PKT_LEN   ,  15, "lr_in_chk_pkt_len")  \
      7f7b2c
      +    PIPELINE_STAGE(ROUTER, IN,  LARGER_PKTS,     16, "lr_in_larger_pkts")  \
      7f7b2c
      +    PIPELINE_STAGE(ROUTER, IN,  GW_REDIRECT,     17, "lr_in_gw_redirect")  \
      7f7b2c
      +    PIPELINE_STAGE(ROUTER, IN,  ARP_REQUEST,     18, "lr_in_arp_request")  \
      7f7b2c
                                                                             \
      7f7b2c
           /* Logical router egress stages. */                               \
      7f7b2c
           PIPELINE_STAGE(ROUTER, OUT, UNDNAT,    0, "lr_out_undnat")        \
      7f7b2c
      @@ -7562,33 +7563,39 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
      7f7b2c
           struct ds actions = DS_EMPTY_INITIALIZER;
      7f7b2c
       
      7f7b2c
           if (!strcmp(rule->action, "reroute")) {
      7f7b2c
      +        ovs_assert(rule->n_nexthops <= 1);
      7f7b2c
      +
      7f7b2c
      +        char *nexthop =
      7f7b2c
      +            (rule->n_nexthops == 1 ? rule->nexthops[0] : rule->nexthop);
      7f7b2c
               struct ovn_port *out_port = get_outport_for_routing_policy_nexthop(
      7f7b2c
      -             od, ports, rule->priority, rule->nexthop);
      7f7b2c
      +             od, ports, rule->priority, nexthop);
      7f7b2c
               if (!out_port) {
      7f7b2c
                   return;
      7f7b2c
               }
      7f7b2c
       
      7f7b2c
      -        const char *lrp_addr_s = find_lrp_member_ip(out_port, rule->nexthop);
      7f7b2c
      +        const char *lrp_addr_s = find_lrp_member_ip(out_port, nexthop);
      7f7b2c
               if (!lrp_addr_s) {
      7f7b2c
                   static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
      7f7b2c
                   VLOG_WARN_RL(&rl, "lrp_addr not found for routing policy "
      7f7b2c
                                " priority %"PRId64" nexthop %s",
      7f7b2c
      -                         rule->priority, rule->nexthop);
      7f7b2c
      +                         rule->priority, nexthop);
      7f7b2c
                   return;
      7f7b2c
               }
      7f7b2c
               uint32_t pkt_mark = ovn_smap_get_uint(&rule->options, "pkt_mark", 0);
      7f7b2c
               if (pkt_mark) {
      7f7b2c
                   ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark);
      7f7b2c
               }
      7f7b2c
      -        bool is_ipv4 = strchr(rule->nexthop, '.') ? true : false;
      7f7b2c
      +
      7f7b2c
      +        bool is_ipv4 = strchr(nexthop, '.') ? true : false;
      7f7b2c
               ds_put_format(&actions, "%s = %s; "
      7f7b2c
                             "%s = %s; "
      7f7b2c
                             "eth.src = %s; "
      7f7b2c
                             "outport = %s; "
      7f7b2c
                             "flags.loopback = 1; "
      7f7b2c
      +                      REG_ECMP_GROUP_ID" = 0; "
      7f7b2c
                             "next;",
      7f7b2c
                             is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6,
      7f7b2c
      -                      rule->nexthop,
      7f7b2c
      +                      nexthop,
      7f7b2c
                             is_ipv4 ? REG_SRC_IPV4 : REG_SRC_IPV6,
      7f7b2c
                             lrp_addr_s,
      7f7b2c
                             out_port->lrp_networks.ea_s,
      7f7b2c
      @@ -7601,7 +7608,7 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
      7f7b2c
               if (pkt_mark) {
      7f7b2c
                   ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark);
      7f7b2c
               }
      7f7b2c
      -        ds_put_cstr(&actions, "next;");
      7f7b2c
      +        ds_put_cstr(&actions, REG_ECMP_GROUP_ID" = 0; next;");
      7f7b2c
           }
      7f7b2c
           ds_put_format(&match, "%s", rule->match);
      7f7b2c
       
      7f7b2c
      @@ -7611,6 +7618,107 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
      7f7b2c
           ds_destroy(&actions);
      7f7b2c
       }
      7f7b2c
       
      7f7b2c
      +static void
      7f7b2c
      +build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
      7f7b2c
      +                                struct hmap *ports,
      7f7b2c
      +                                const struct nbrec_logical_router_policy *rule,
      7f7b2c
      +                                uint16_t ecmp_group_id)
      7f7b2c
      +{
      7f7b2c
      +    ovs_assert(rule->n_nexthops > 1);
      7f7b2c
      +
      7f7b2c
      +    bool nexthops_is_ipv4 = true;
      7f7b2c
      +
      7f7b2c
      +    /* Check that all the nexthops belong to the same addr family before
      7f7b2c
      +     * adding logical flows. */
      7f7b2c
      +    for (uint16_t i = 0; i < rule->n_nexthops; i++) {
      7f7b2c
      +        bool is_ipv4 = strchr(rule->nexthops[i], '.') ? true : false;
      7f7b2c
      +
      7f7b2c
      +        if (i == 0) {
      7f7b2c
      +            nexthops_is_ipv4 = is_ipv4;
      7f7b2c
      +        }
      7f7b2c
      +
      7f7b2c
      +        if (is_ipv4 != nexthops_is_ipv4) {
      7f7b2c
      +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
      7f7b2c
      +            VLOG_WARN_RL(&rl, "nexthop [%s] of the router policy with "
      7f7b2c
      +                         "the match [%s] do not belong to the same address "
      7f7b2c
      +                         "family as other next hops",
      7f7b2c
      +                         rule->nexthops[i], rule->match);
      7f7b2c
      +            return;
      7f7b2c
      +        }
      7f7b2c
      +    }
      7f7b2c
      +
      7f7b2c
      +    struct ds match = DS_EMPTY_INITIALIZER;
      7f7b2c
      +    struct ds actions = DS_EMPTY_INITIALIZER;
      7f7b2c
      +
      7f7b2c
      +    for (uint16_t i = 0; i < rule->n_nexthops; i++) {
      7f7b2c
      +        struct ovn_port *out_port = get_outport_for_routing_policy_nexthop(
      7f7b2c
      +             od, ports, rule->priority, rule->nexthops[i]);
      7f7b2c
      +        if (!out_port) {
      7f7b2c
      +            goto cleanup;
      7f7b2c
      +        }
      7f7b2c
      +
      7f7b2c
      +        const char *lrp_addr_s =
      7f7b2c
      +            find_lrp_member_ip(out_port, rule->nexthops[i]);
      7f7b2c
      +        if (!lrp_addr_s) {
      7f7b2c
      +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
      7f7b2c
      +            VLOG_WARN_RL(&rl, "lrp_addr not found for routing policy "
      7f7b2c
      +                            " priority %"PRId64" nexthop %s",
      7f7b2c
      +                            rule->priority, rule->nexthops[i]);
      7f7b2c
      +            goto cleanup;
      7f7b2c
      +        }
      7f7b2c
      +
      7f7b2c
      +        ds_clear(&actions);
      7f7b2c
      +        uint32_t pkt_mark = ovn_smap_get_uint(&rule->options, "pkt_mark", 0);
      7f7b2c
      +        if (pkt_mark) {
      7f7b2c
      +            ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark);
      7f7b2c
      +        }
      7f7b2c
      +
      7f7b2c
      +        bool is_ipv4 = strchr(rule->nexthops[i], '.') ? true : false;
      7f7b2c
      +
      7f7b2c
      +        ds_put_format(&actions, "%s = %s; "
      7f7b2c
      +                      "%s = %s; "
      7f7b2c
      +                      "eth.src = %s; "
      7f7b2c
      +                      "outport = %s; "
      7f7b2c
      +                      "flags.loopback = 1; "
      7f7b2c
      +                      "next;",
      7f7b2c
      +                      is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6,
      7f7b2c
      +                      rule->nexthops[i],
      7f7b2c
      +                      is_ipv4 ? REG_SRC_IPV4 : REG_SRC_IPV6,
      7f7b2c
      +                      lrp_addr_s,
      7f7b2c
      +                      out_port->lrp_networks.ea_s,
      7f7b2c
      +                      out_port->json_key);
      7f7b2c
      +
      7f7b2c
      +        ds_clear(&match);
      7f7b2c
      +        ds_put_format(&match, REG_ECMP_GROUP_ID" == %"PRIu16" && "
      7f7b2c
      +                      REG_ECMP_MEMBER_ID" == %"PRIu16,
      7f7b2c
      +                      ecmp_group_id, i + 1);
      7f7b2c
      +        ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY_ECMP,
      7f7b2c
      +                                100, ds_cstr(&match),
      7f7b2c
      +                                ds_cstr(&actions), &rule->header_);
      7f7b2c
      +    }
      7f7b2c
      +
      7f7b2c
      +    ds_clear(&actions);
      7f7b2c
      +    ds_put_format(&actions, "%s = %"PRIu16
      7f7b2c
      +                  "; %s = select(", REG_ECMP_GROUP_ID, ecmp_group_id,
      7f7b2c
      +                  REG_ECMP_MEMBER_ID);
      7f7b2c
      +
      7f7b2c
      +    for (uint16_t i = 0; i < rule->n_nexthops; i++) {
      7f7b2c
      +        if (i > 0) {
      7f7b2c
      +            ds_put_cstr(&actions, ", ");
      7f7b2c
      +        }
      7f7b2c
      +
      7f7b2c
      +        ds_put_format(&actions, "%"PRIu16, i + 1);
      7f7b2c
      +    }
      7f7b2c
      +    ds_put_cstr(&actions, ");");
      7f7b2c
      +    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY,
      7f7b2c
      +                            rule->priority, rule->match,
      7f7b2c
      +                            ds_cstr(&actions), &rule->header_);
      7f7b2c
      +
      7f7b2c
      +cleanup:
      7f7b2c
      +    ds_destroy(&match);
      7f7b2c
      +    ds_destroy(&actions);
      7f7b2c
      +}
      7f7b2c
      +
      7f7b2c
       struct parsed_route {
      7f7b2c
           struct ovs_list list_node;
      7f7b2c
           struct in6_addr prefix;
      7f7b2c
      @@ -10300,13 +10408,27 @@ build_ingress_policy_flows_for_lrouter(
      7f7b2c
           if (od->nbr) {
      7f7b2c
               /* This is a catch-all rule. It has the lowest priority (0)
      7f7b2c
                * does a match-all("1") and pass-through (next) */
      7f7b2c
      -        ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1", "next;");
      7f7b2c
      +        ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1",
      7f7b2c
      +                      REG_ECMP_GROUP_ID" = 0; next;");
      7f7b2c
      +        ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY_ECMP, 150,
      7f7b2c
      +                      REG_ECMP_GROUP_ID" == 0", "next;");
      7f7b2c
       
      7f7b2c
               /* Convert routing policies to flows. */
      7f7b2c
      +        uint16_t ecmp_group_id = 1;
      7f7b2c
               for (int i = 0; i < od->nbr->n_policies; i++) {
      7f7b2c
                   const struct nbrec_logical_router_policy *rule
      7f7b2c
                       = od->nbr->policies[i];
      7f7b2c
      -            build_routing_policy_flow(lflows, od, ports, rule, &rule->header_);
      7f7b2c
      +            bool is_ecmp_reroute =
      7f7b2c
      +                (!strcmp(rule->action, "reroute") && rule->n_nexthops > 1);
      7f7b2c
      +
      7f7b2c
      +            if (is_ecmp_reroute) {
      7f7b2c
      +                build_ecmp_routing_policy_flows(lflows, od, ports, rule,
      7f7b2c
      +                                                ecmp_group_id);
      7f7b2c
      +                ecmp_group_id++;
      7f7b2c
      +            } else {
      7f7b2c
      +                build_routing_policy_flow(lflows, od, ports, rule,
      7f7b2c
      +                                          &rule->header_);
      7f7b2c
      +            }
      7f7b2c
               }
      7f7b2c
           }
      7f7b2c
       }
      7f7b2c
      diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
      7f7b2c
      index af77dd138..b77a2308c 100644
      7f7b2c
      --- a/ovn-nb.ovsschema
      7f7b2c
      +++ b/ovn-nb.ovsschema
      7f7b2c
      @@ -1,7 +1,7 @@
      7f7b2c
       {
      7f7b2c
           "name": "OVN_Northbound",
      7f7b2c
      -    "version": "5.29.0",
      7f7b2c
      -    "cksum": "328602112 27064",
      7f7b2c
      +    "version": "5.30.0",
      7f7b2c
      +    "cksum": "3273824429 27172",
      7f7b2c
           "tables": {
      7f7b2c
               "NB_Global": {
      7f7b2c
                   "columns": {
      7f7b2c
      @@ -391,6 +391,8 @@
      7f7b2c
                           "key": {"type": "string",
      7f7b2c
                                   "enum": ["set", ["allow", "drop", "reroute"]]}}},
      7f7b2c
                       "nexthop": {"type": {"key": "string", "min": 0, "max": 1}},
      7f7b2c
      +                "nexthops": {"type": {
      7f7b2c
      +                    "key": "string", "min": 0, "max": "unlimited"}},
      7f7b2c
                       "options": {
      7f7b2c
                           "type": {"key": "string", "value": "string",
      7f7b2c
                                    "min": 0, "max": "unlimited"}},
      7f7b2c
      diff --git a/ovn-nb.xml b/ovn-nb.xml
      7f7b2c
      index e7a8d6833..0cf043790 100644
      7f7b2c
      --- a/ovn-nb.xml
      7f7b2c
      +++ b/ovn-nb.xml
      7f7b2c
      @@ -2723,18 +2723,34 @@
      7f7b2c
               
      7f7b2c
       
      7f7b2c
               
    • 7f7b2c
      -          reroute: Reroute packet to <ref column="nexthop"/>.
      7f7b2c
      +          reroute: Reroute packet to <ref column="nexthop"/> or
      7f7b2c
      +          <ref column="nexthops"/>.
      7f7b2c
               
      7f7b2c
             
      7f7b2c
           </column>
      7f7b2c
       
      7f7b2c
           <column name="nexthop">
      7f7b2c
      +      

      7f7b2c
      +        Note: This column is deprecated in favor of <ref column="nexthops"/>.
      7f7b2c
      +      

      7f7b2c
             

      7f7b2c
               Next-hop IP address for this route, which should be the IP
      7f7b2c
               address of a connected router port or the IP address of a logical port.
      7f7b2c
             

      7f7b2c
           </column>
      7f7b2c
       
      7f7b2c
      +    <column name="nexthops">
      7f7b2c
      +      

      7f7b2c
      +        Next-hop ECMP IP addresses for this route. Each IP in the list should
      7f7b2c
      +        be the IP address of a connected router port or the IP address of a
      7f7b2c
      +        logical port.
      7f7b2c
      +      

      7f7b2c
      +
      7f7b2c
      +      

      7f7b2c
      +        One IP from the list is selected as next hop.
      7f7b2c
      +      

      7f7b2c
      +    </column>
      7f7b2c
      +
      7f7b2c
           <column name="options" key="pkt_mark">
      7f7b2c
             

      7f7b2c
               Marks the packet with the value specified when the router policy
      7f7b2c
      diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
      7f7b2c
      index 50a4cae76..ce6c44db4 100644
      7f7b2c
      --- a/tests/ovn-northd.at
      7f7b2c
      +++ b/tests/ovn-northd.at
      7f7b2c
      @@ -2198,3 +2198,127 @@ dnl Number of common flows should be the same.
      7f7b2c
       check_row_count Logical_Flow ${n_flows_common} logical_dp_group=${dp_group_uuid}
      7f7b2c
       
      7f7b2c
       AT_CLEANUP
      7f7b2c
      +
      7f7b2c
      +AT_SETUP([ovn -- Router policies - ECMP reroute])
      7f7b2c
      +AT_KEYWORDS([router policies ecmp reroute])
      7f7b2c
      +ovn_start
      7f7b2c
      +
      7f7b2c
      +check ovn-nbctl ls-add sw0
      7f7b2c
      +check ovn-nbctl lsp-add sw0 sw0-port1
      7f7b2c
      +check ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3"
      7f7b2c
      +
      7f7b2c
      +check ovn-nbctl ls-add sw1
      7f7b2c
      +check ovn-nbctl lsp-add sw1 sw1-port1
      7f7b2c
      +check ovn-nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3"
      7f7b2c
      +
      7f7b2c
      +# Create a logical router and attach both logical switches
      7f7b2c
      +check ovn-nbctl lr-add lr0
      7f7b2c
      +check ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
      7f7b2c
      +check ovn-nbctl lsp-add sw0 sw0-lr0
      7f7b2c
      +check ovn-nbctl lsp-set-type sw0-lr0 router
      7f7b2c
      +check ovn-nbctl lsp-set-addresses sw0-lr0 00:00:00:00:ff:01
      7f7b2c
      +check ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
      7f7b2c
      +
      7f7b2c
      +check ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
      7f7b2c
      +check ovn-nbctl lsp-add sw1 sw1-lr0
      7f7b2c
      +check ovn-nbctl lsp-set-type sw1-lr0 router
      7f7b2c
      +check ovn-nbctl lsp-set-addresses sw1-lr0 00:00:00:00:ff:02
      7f7b2c
      +check ovn-nbctl lsp-set-options sw1-lr0 router-port=lr-sw1
      7f7b2c
      +
      7f7b2c
      +check ovn-nbctl ls-add public
      7f7b2c
      +check ovn-nbctl lrp-add lr0 lr0-public 00:00:20:20:12:13 172.168.0.100/24
      7f7b2c
      +check ovn-nbctl lsp-add public public-lr0
      7f7b2c
      +check ovn-nbctl lsp-set-type public-lr0 router
      7f7b2c
      +check ovn-nbctl lsp-set-addresses public-lr0 router
      7f7b2c
      +check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
      7f7b2c
      +
      7f7b2c
      +check ovn-nbctl --wait=sb lr-policy-add lr0  10 "ip4.src == 10.0.0.3" reroute 172.168.0.101,172.168.0.102
      7f7b2c
      +
      7f7b2c
      +ovn-sbctl dump-flows lr0 > lr0flows3
      7f7b2c
      +AT_CAPTURE_FILE([lr0flows3])
      7f7b2c
      +
      7f7b2c
      +AT_CHECK([grep "lr_in_policy" lr0flows3 | sort], [0], [dnl
      7f7b2c
      +  table=12(lr_in_policy       ), priority=0    , match=(1), action=(reg8[[0..15]] = 0; next;)
      7f7b2c
      +  table=12(lr_in_policy       ), priority=10   , match=(ip4.src == 10.0.0.3), action=(reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=150  , match=(reg8[[0..15]] == 0), action=(next;)
      7f7b2c
      +])
      7f7b2c
      +
      7f7b2c
      +check ovn-nbctl --wait=sb lr-policy-add lr0  10 "ip4.src == 10.0.0.4" reroute 172.168.0.101,172.168.0.102,172.168.0.103
      7f7b2c
      +ovn-sbctl dump-flows lr0 > lr0flows3
      7f7b2c
      +AT_CAPTURE_FILE([lr0flows3])
      7f7b2c
      +
      7f7b2c
      +AT_CHECK([grep "lr_in_policy" lr0flows3 |  \
      7f7b2c
      +sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = <cleared>/' | \
      7f7b2c
      +sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == <cleared>/' | sort], [0], [dnl
      7f7b2c
      +  table=12(lr_in_policy       ), priority=0    , match=(1), action=(reg8[[0..15]] = <cleared>; next;)
      7f7b2c
      +  table=12(lr_in_policy       ), priority=10   , match=(ip4.src == 10.0.0.3), action=(reg8[[0..15]] = <cleared>; reg8[[16..31]] = select(1, 2);)
      7f7b2c
      +  table=12(lr_in_policy       ), priority=10   , match=(ip4.src == 10.0.0.4), action=(reg8[[0..15]] = <cleared>; reg8[[16..31]] = select(1, 2, 3);)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=100  , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=100  , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=100  , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=100  , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=100  , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 3), action=(reg0 = 172.168.0.103; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=150  , match=(reg8[[0..15]] == <cleared>), action=(next;)
      7f7b2c
      +])
      7f7b2c
      +
      7f7b2c
      +check ovn-nbctl --wait=sb lr-policy-add lr0  10 "ip4.src == 10.0.0.5" reroute 172.168.0.110
      7f7b2c
      +ovn-sbctl dump-flows lr0 > lr0flows3
      7f7b2c
      +AT_CAPTURE_FILE([lr0flows3])
      7f7b2c
      +
      7f7b2c
      +AT_CHECK([grep "lr_in_policy" lr0flows3 |  \
      7f7b2c
      +sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = <cleared>/' | \
      7f7b2c
      +sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == <cleared>/' | sort], [0], [dnl
      7f7b2c
      +  table=12(lr_in_policy       ), priority=0    , match=(1), action=(reg8[[0..15]] = <cleared>; next;)
      7f7b2c
      +  table=12(lr_in_policy       ), priority=10   , match=(ip4.src == 10.0.0.3), action=(reg8[[0..15]] = <cleared>; reg8[[16..31]] = select(1, 2);)
      7f7b2c
      +  table=12(lr_in_policy       ), priority=10   , match=(ip4.src == 10.0.0.4), action=(reg8[[0..15]] = <cleared>; reg8[[16..31]] = select(1, 2, 3);)
      7f7b2c
      +  table=12(lr_in_policy       ), priority=10   , match=(ip4.src == 10.0.0.5), action=(reg0 = 172.168.0.110; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg8[[0..15]] = <cleared>; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=100  , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=100  , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=100  , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=100  , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=100  , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 3), action=(reg0 = 172.168.0.103; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=150  , match=(reg8[[0..15]] == <cleared>), action=(next;)
      7f7b2c
      +])
      7f7b2c
      +
      7f7b2c
      +check ovn-nbctl --wait=sb lr-policy-del lr0  10 "ip4.src == 10.0.0.3"
      7f7b2c
      +ovn-sbctl dump-flows lr0 > lr0flows3
      7f7b2c
      +AT_CAPTURE_FILE([lr0flows3])
      7f7b2c
      +
      7f7b2c
      +AT_CHECK([grep "lr_in_policy" lr0flows3 |  \
      7f7b2c
      +sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = <cleared>/' | \
      7f7b2c
      +sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == <cleared>/' | sort], [0], [dnl
      7f7b2c
      +  table=12(lr_in_policy       ), priority=0    , match=(1), action=(reg8[[0..15]] = <cleared>; next;)
      7f7b2c
      +  table=12(lr_in_policy       ), priority=10   , match=(ip4.src == 10.0.0.4), action=(reg8[[0..15]] = <cleared>; reg8[[16..31]] = select(1, 2, 3);)
      7f7b2c
      +  table=12(lr_in_policy       ), priority=10   , match=(ip4.src == 10.0.0.5), action=(reg0 = 172.168.0.110; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg8[[0..15]] = <cleared>; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=100  , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=100  , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=100  , match=(reg8[[0..15]] == <cleared> && reg8[[16..31]] == 3), action=(reg0 = 172.168.0.103; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=150  , match=(reg8[[0..15]] == <cleared>), action=(next;)
      7f7b2c
      +])
      7f7b2c
      +
      7f7b2c
      +check ovn-nbctl --wait=sb lr-policy-del lr0  10 "ip4.src == 10.0.0.4"
      7f7b2c
      +ovn-sbctl dump-flows lr0 > lr0flows3
      7f7b2c
      +AT_CAPTURE_FILE([lr0flows3])
      7f7b2c
      +
      7f7b2c
      +AT_CHECK([grep "lr_in_policy" lr0flows3 |  \
      7f7b2c
      +sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = <cleared>/' | \
      7f7b2c
      +sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == <cleared>/' | sort], [0], [dnl
      7f7b2c
      +  table=12(lr_in_policy       ), priority=0    , match=(1), action=(reg8[[0..15]] = <cleared>; next;)
      7f7b2c
      +  table=12(lr_in_policy       ), priority=10   , match=(ip4.src == 10.0.0.5), action=(reg0 = 172.168.0.110; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg8[[0..15]] = <cleared>; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=150  , match=(reg8[[0..15]] == <cleared>), action=(next;)
      7f7b2c
      +])
      7f7b2c
      +
      7f7b2c
      +check ovn-nbctl --wait=sb add logical_router_policy . nexthops "2000\:\:b"
      7f7b2c
      +ovn-sbctl dump-flows lr0 > lr0flows3
      7f7b2c
      +AT_CAPTURE_FILE([lr0flows3])
      7f7b2c
      +
      7f7b2c
      +AT_CHECK([grep "lr_in_policy" lr0flows3 |  \
      7f7b2c
      +sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = <cleared>/' | \
      7f7b2c
      +sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == <cleared>/' | sort], [0], [dnl
      7f7b2c
      +  table=12(lr_in_policy       ), priority=0    , match=(1), action=(reg8[[0..15]] = <cleared>; next;)
      7f7b2c
      +  table=13(lr_in_policy_ecmp  ), priority=150  , match=(reg8[[0..15]] == <cleared>), action=(next;)
      7f7b2c
      +])
      7f7b2c
      +
      7f7b2c
      +AT_CLEANUP
      7f7b2c
      diff --git a/tests/ovn.at b/tests/ovn.at
      7f7b2c
      index 2e0bc9c53..66088a7f5 100644
      7f7b2c
      --- a/tests/ovn.at
      7f7b2c
      +++ b/tests/ovn.at
      7f7b2c
      @@ -21156,31 +21156,31 @@ AT_CHECK([
      7f7b2c
       
      7f7b2c
       AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl
      7f7b2c
         table=12(lr_in_policy       ), priority=1001 , dnl
      7f7b2c
      -match=(ip6), action=(pkt.mark = 4294967295; next;)
      7f7b2c
      +match=(ip6), action=(pkt.mark = 4294967295; reg8[[0..15]] = 0; next;)
      7f7b2c
       ])
      7f7b2c
       
      7f7b2c
       ovn-nbctl --wait=hv set logical_router_policy $pol5 options:pkt_mark=-1
      7f7b2c
       AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl
      7f7b2c
         table=12(lr_in_policy       ), priority=1001 , dnl
      7f7b2c
      -match=(ip6), action=(next;)
      7f7b2c
      +match=(ip6), action=(reg8[[0..15]] = 0; next;)
      7f7b2c
       ])
      7f7b2c
       
      7f7b2c
       ovn-nbctl --wait=hv set logical_router_policy $pol5 options:pkt_mark=2147483648
      7f7b2c
       AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl
      7f7b2c
         table=12(lr_in_policy       ), priority=1001 , dnl
      7f7b2c
      -match=(ip6), action=(pkt.mark = 2147483648; next;)
      7f7b2c
      +match=(ip6), action=(pkt.mark = 2147483648; reg8[[0..15]] = 0; next;)
      7f7b2c
       ])
      7f7b2c
       
      7f7b2c
       ovn-nbctl --wait=hv set logical_router_policy $pol5 options:pkt_mark=foo
      7f7b2c
       AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl
      7f7b2c
         table=12(lr_in_policy       ), priority=1001 , dnl
      7f7b2c
      -match=(ip6), action=(next;)
      7f7b2c
      +match=(ip6), action=(reg8[[0..15]] = 0; next;)
      7f7b2c
       ])
      7f7b2c
       
      7f7b2c
       ovn-nbctl --wait=hv set logical_router_policy $pol5 options:pkt_mark=4294967296
      7f7b2c
       AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl
      7f7b2c
         table=12(lr_in_policy       ), priority=1001 , dnl
      7f7b2c
      -match=(ip6), action=(next;)
      7f7b2c
      +match=(ip6), action=(reg8[[0..15]] = 0; next;)
      7f7b2c
       ])
      7f7b2c
       
      7f7b2c
       OVN_CLEANUP([hv1])
      7f7b2c
      @@ -21759,7 +21759,7 @@ AT_CHECK([
      7f7b2c
           grep "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" -c)
      7f7b2c
       ])
      7f7b2c
       AT_CHECK([
      7f7b2c
      -    test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=21 | \
      7f7b2c
      +    test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=22 | \
      7f7b2c
           grep "priority=200" | \
      7f7b2c
           grep "actions=move:NXM_NX_CT_LABEL\\[[32..79\\]]->NXM_OF_ETH_DST\\[[\\]]" -c)
      7f7b2c
       ])
      7f7b2c
      @@ -21770,7 +21770,7 @@ AT_CHECK([
      7f7b2c
           grep "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" -c)
      7f7b2c
       ])
      7f7b2c
       AT_CHECK([
      7f7b2c
      -    test 0 -eq $(as hv2 ovs-ofctl dump-flows br-int table=21 | \
      7f7b2c
      +    test 0 -eq $(as hv2 ovs-ofctl dump-flows br-int table=22 | \
      7f7b2c
           grep "priority=200" | \
      7f7b2c
           grep "actions=move:NXM_NX_CT_LABEL\\[[32..79\\]]->NXM_OF_ETH_DST\\[[\\]]" -c)
      7f7b2c
       ])
      7f7b2c
      @@ -22208,7 +22208,7 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int | grep "actions=controller" | grep
      7f7b2c
       ])
      7f7b2c
       
      7f7b2c
       # The packet should've been dropped in the lr_in_arp_resolve stage.
      7f7b2c
      -AT_CHECK([as hv1 ovs-ofctl dump-flows br-int | grep -E "table=21, n_packets=1,.* priority=1,ip,metadata=0x${sw_key},nw_dst=10.0.1.1 actions=drop" -c], [0], [dnl
      7f7b2c
      +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int | grep -E "table=22, n_packets=1,.* priority=1,ip,metadata=0x${sw_key},nw_dst=10.0.1.1 actions=drop" -c], [0], [dnl
      7f7b2c
       1
      7f7b2c
       ])
      7f7b2c
       
      7f7b2c
      diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml
      7f7b2c
      index e5a35f307..e6fec9980 100644
      7f7b2c
      --- a/utilities/ovn-nbctl.8.xml
      7f7b2c
      +++ b/utilities/ovn-nbctl.8.xml
      7f7b2c
      @@ -739,7 +739,7 @@
      7f7b2c
           
      7f7b2c
             
      [--may-exist]lr-policy-add
      7f7b2c
                 router priority match
      7f7b2c
      -          action [nexthop]
      7f7b2c
      +          action [nexthop[,nexthop,...]]
      7f7b2c
                 [options key=value]] 
      7f7b2c
             
      7f7b2c
               

      7f7b2c
      @@ -748,10 +748,12 @@
      7f7b2c
                 are similar to OVN ACLs, but exist on the logical-router. Reroute
      7f7b2c
                 policies are needed for service-insertion and service-chaining.
      7f7b2c
                 nexthop is an optional parameter. It needs to be provided
      7f7b2c
      -          only when action is reroute. A policy is
      7f7b2c
      -          uniquely identified by priority and match.
      7f7b2c
      -          Multiple policies can have the same priority.
      7f7b2c
      -          options sets the router policy options as key-value pair.
      7f7b2c
      +          only when action is reroute. Multiple
      7f7b2c
      +          nexthops can be specified for ECMP routing.
      7f7b2c
      +          A policy is uniquely identified by priority and
      7f7b2c
      +          match. Multiple policies can have the same
      7f7b2c
      +          priority. options sets the router policy
      7f7b2c
      +          options as key-value pair.
      7f7b2c
                 The supported option is : pkt_mark.
      7f7b2c
               

      7f7b2c
       
      7f7b2c
      diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
      7f7b2c
      index 835161f25..94e7eedeb 100644
      7f7b2c
      --- a/utilities/ovn-nbctl.c
      7f7b2c
      +++ b/utilities/ovn-nbctl.c
      7f7b2c
      @@ -766,7 +766,7 @@ Route commands:\n\
      7f7b2c
         lr-route-list ROUTER      print routes for ROUTER\n\
      7f7b2c
       \n\
      7f7b2c
       Policy commands:\n\
      7f7b2c
      -  lr-policy-add ROUTER PRIORITY MATCH ACTION [NEXTHOP] \
      7f7b2c
      +  lr-policy-add ROUTER PRIORITY MATCH ACTION [NEXTHOP,[NEXTHOP,...]] \
      7f7b2c
       [OPTIONS KEY=VALUE ...] \n\
      7f7b2c
                                   add a policy to router\n\
      7f7b2c
         lr-policy-del ROUTER [{PRIORITY | UUID} [MATCH]]\n\
      7f7b2c
      @@ -3634,7 +3634,8 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
      7f7b2c
               return;
      7f7b2c
           }
      7f7b2c
           const char *action = ctx->argv[4];
      7f7b2c
      -    char *next_hop = NULL;
      7f7b2c
      +    size_t n_nexthops = 0;
      7f7b2c
      +    char **nexthops = NULL;
      7f7b2c
       
      7f7b2c
           bool reroute = false;
      7f7b2c
           /* Validate action. */
      7f7b2c
      @@ -3654,7 +3655,8 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
      7f7b2c
           /* Check if same routing policy already exists.
      7f7b2c
            * A policy is uniquely identified by priority and match */
      7f7b2c
           bool may_exist = !!shash_find(&ctx->options, "--may-exist");
      7f7b2c
      -    for (int i = 0; i < lr->n_policies; i++) {
      7f7b2c
      +    size_t i;
      7f7b2c
      +    for (i = 0; i < lr->n_policies; i++) {
      7f7b2c
               const struct nbrec_logical_router_policy *policy = lr->policies[i];
      7f7b2c
               if (policy->priority == priority &&
      7f7b2c
                   !strcmp(policy->match, ctx->argv[3])) {
      7f7b2c
      @@ -3665,12 +3667,53 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
      7f7b2c
                   return;
      7f7b2c
               }
      7f7b2c
           }
      7f7b2c
      +
      7f7b2c
           if (reroute) {
      7f7b2c
      -        next_hop = normalize_prefix_str(ctx->argv[5]);
      7f7b2c
      -        if (!next_hop) {
      7f7b2c
      -            ctl_error(ctx, "bad next hop argument: %s", ctx->argv[5]);
      7f7b2c
      -            return;
      7f7b2c
      +        char *nexthops_arg = xstrdup(ctx->argv[5]);
      7f7b2c
      +        char *save_ptr, *next_hop, *token;
      7f7b2c
      +
      7f7b2c
      +        n_nexthops = 0;
      7f7b2c
      +        size_t n_allocs = 0;
      7f7b2c
      +
      7f7b2c
      +        bool nexthops_is_ipv4 = true;
      7f7b2c
      +        for (token = strtok_r(nexthops_arg, ",", &save_ptr);
      7f7b2c
      +            token != NULL; token = strtok_r(NULL, ",", &save_ptr)) {
      7f7b2c
      +            next_hop = normalize_addr_str(token);
      7f7b2c
      +
      7f7b2c
      +            if (!next_hop) {
      7f7b2c
      +                ctl_error(ctx, "bad next hop argument: %s", ctx->argv[5]);
      7f7b2c
      +                free(nexthops_arg);
      7f7b2c
      +                for (i = 0; i < n_nexthops; i++) {
      7f7b2c
      +                    free(nexthops[i]);
      7f7b2c
      +                }
      7f7b2c
      +                free(nexthops);
      7f7b2c
      +                return;
      7f7b2c
      +            }
      7f7b2c
      +            if (n_nexthops == n_allocs) {
      7f7b2c
      +                nexthops = x2nrealloc(nexthops, &n_allocs, sizeof *nexthops);
      7f7b2c
      +            }
      7f7b2c
      +
      7f7b2c
      +            bool is_ipv4 = strchr(next_hop, '.') ? true : false;
      7f7b2c
      +            if (n_nexthops == 0) {
      7f7b2c
      +                nexthops_is_ipv4 = is_ipv4;
      7f7b2c
      +            }
      7f7b2c
      +
      7f7b2c
      +            if (is_ipv4 != nexthops_is_ipv4) {
      7f7b2c
      +                ctl_error(ctx, "bad next hops argument, not in the same "
      7f7b2c
      +                          "addr family : %s", ctx->argv[5]);
      7f7b2c
      +                free(nexthops_arg);
      7f7b2c
      +                free(next_hop);
      7f7b2c
      +                for (i = 0; i < n_nexthops; i++) {
      7f7b2c
      +                    free(nexthops[i]);
      7f7b2c
      +                }
      7f7b2c
      +                free(nexthops);
      7f7b2c
      +                return;
      7f7b2c
      +            }
      7f7b2c
      +            nexthops[n_nexthops] = next_hop;
      7f7b2c
      +            n_nexthops++;
      7f7b2c
               }
      7f7b2c
      +
      7f7b2c
      +        free(nexthops_arg);
      7f7b2c
           }
      7f7b2c
       
      7f7b2c
           struct nbrec_logical_router_policy *policy;
      7f7b2c
      @@ -3679,12 +3722,13 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
      7f7b2c
           nbrec_logical_router_policy_set_match(policy, ctx->argv[3]);
      7f7b2c
           nbrec_logical_router_policy_set_action(policy, action);
      7f7b2c
           if (reroute) {
      7f7b2c
      -        nbrec_logical_router_policy_set_nexthop(policy, next_hop);
      7f7b2c
      +        nbrec_logical_router_policy_set_nexthops(
      7f7b2c
      +            policy, (const char **)nexthops, n_nexthops);
      7f7b2c
           }
      7f7b2c
       
      7f7b2c
           /* Parse the options. */
      7f7b2c
           struct smap options = SMAP_INITIALIZER(&options);
      7f7b2c
      -    for (size_t i = reroute ? 6 : 5; i < ctx->argc; i++) {
      7f7b2c
      +    for (i = reroute ? 6 : 5; i < ctx->argc; i++) {
      7f7b2c
               char *key, *value;
      7f7b2c
               value = xstrdup(ctx->argv[i]);
      7f7b2c
               key = strsep(&value, "=");
      7f7b2c
      @@ -3694,7 +3738,10 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
      7f7b2c
                   ctl_error(ctx, "No value specified for the option : %s", key);
      7f7b2c
                   smap_destroy(&options);
      7f7b2c
                   free(key);
      7f7b2c
      -            free(next_hop);
      7f7b2c
      +            for (i = 0; i < n_nexthops; i++) {
      7f7b2c
      +                free(nexthops[i]);
      7f7b2c
      +            }
      7f7b2c
      +            free(nexthops);
      7f7b2c
                   return;
      7f7b2c
               }
      7f7b2c
               free(key);
      7f7b2c
      @@ -3703,9 +3750,11 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
      7f7b2c
           smap_destroy(&options);
      7f7b2c
       
      7f7b2c
           nbrec_logical_router_update_policies_addvalue(lr, policy);
      7f7b2c
      -    if (next_hop != NULL) {
      7f7b2c
      -        free(next_hop);
      7f7b2c
      +
      7f7b2c
      +    for (i = 0; i < n_nexthops; i++) {
      7f7b2c
      +        free(nexthops[i]);
      7f7b2c
           }
      7f7b2c
      +    free(nexthops);
      7f7b2c
       }
      7f7b2c
       
      7f7b2c
       static void
      7f7b2c
      -- 
      7f7b2c
      2.28.0
      7f7b2c