diff --git a/SOURCES/ovn-20.12.0.patch b/SOURCES/ovn-20.12.0.patch
new file mode 100644
index 0000000..b8ea6d5
--- /dev/null
+++ b/SOURCES/ovn-20.12.0.patch
@@ -0,0 +1,22089 @@
+diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh
+index 0e9f87fa8..731dcacb9 100755
+--- a/.ci/linux-build.sh
++++ b/.ci/linux-build.sh
+@@ -9,8 +9,7 @@ EXTRA_OPTS="--enable-Werror"
+
+ function configure_ovs()
+ {
+- git clone https://github.com/openvswitch/ovs.git ovs_src
+- pushd ovs_src
++ pushd ovs
+ ./boot.sh && ./configure $* || { cat config.log; exit 1; }
+ make -j4 || { cat config.log; exit 1; }
+ popd
+@@ -19,7 +18,7 @@ function configure_ovs()
+ function configure_ovn()
+ {
+ configure_ovs $*
+- ./boot.sh && ./configure --with-ovs-source=$PWD/ovs_src $* || \
++ ./boot.sh && ./configure $* || \
+ { cat config.log; exit 1; }
+ }
+
+@@ -43,7 +42,7 @@ if [ "$TESTSUITE" ]; then
+ # Now we only need to prepare the Makefile without sparse-wrapped CC.
+ configure_ovn
+
+- export DISTCHECK_CONFIGURE_FLAGS="$OPTS --with-ovs-source=$PWD/ovs_src"
++ export DISTCHECK_CONFIGURE_FLAGS="$OPTS"
+ if ! make distcheck -j4 TESTSUITEFLAGS="-j4" RECHECK=yes; then
+ # testsuite.log is necessary for debugging.
+ cat */_build/sub/tests/testsuite.log
+diff --git a/.ci/osx-build.sh b/.ci/osx-build.sh
+index 6617f0b9d..4b78b66dd 100755
+--- a/.ci/osx-build.sh
++++ b/.ci/osx-build.sh
+@@ -7,8 +7,7 @@ EXTRA_OPTS=""
+
+ function configure_ovs()
+ {
+- git clone https://github.com/openvswitch/ovs.git ovs_src
+- pushd ovs_src
++ pushd ovs
+ ./boot.sh && ./configure $*
+ make -j4 || { cat config.log; exit 1; }
+ popd
+@@ -17,7 +16,7 @@ function configure_ovs()
+ function configure_ovn()
+ {
+ configure_ovs $*
+- ./boot.sh && ./configure $* --with-ovs-source=$PWD/ovs_src
++ ./boot.sh && ./configure $*
+ }
+
+ configure_ovn $EXTRA_OPTS $*
+@@ -32,7 +31,7 @@ if ! "$@"; then
+ exit 1
+ fi
+ if [ "$TESTSUITE" ] && [ "$CC" != "clang" ]; then
+- export DISTCHECK_CONFIGURE_FLAGS="$EXTRA_OPTS --with-ovs-source=$PWD/ovs_src"
++ export DISTCHECK_CONFIGURE_FLAGS="$EXTRA_OPTS"
+ if ! make distcheck RECHECK=yes; then
+ # testsuite.log is necessary for debugging.
+ cat */_build/sub/tests/testsuite.log
+diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
+index 7be75ca36..d825e257c 100644
+--- a/.github/workflows/test.yml
++++ b/.github/workflows/test.yml
+@@ -48,6 +48,8 @@ jobs:
+ steps:
+ - name: checkout
+ uses: actions/checkout@v2
++ with:
++ submodules: recursive
+
+ - name: install required dependencies
+ run: sudo apt install -y ${{ env.dependencies }}
+@@ -99,6 +101,8 @@ jobs:
+ steps:
+ - name: checkout
+ uses: actions/checkout@v2
++ with:
++ submodules: recursive
+ - name: install dependencies
+ run: brew install automake libtool
+ - name: prepare
+diff --git a/.gitignore b/.gitignore
+index 7ca9b3859..68333384e 100644
+--- a/.gitignore
++++ b/.gitignore
+@@ -94,3 +94,5 @@ testsuite.tmp.orig
+ /.venv
+ /cxx-check
+ /*.ovsschema.stamp
++/compile_ovn.sh
++
+diff --git a/.gitmodules b/.gitmodules
+new file mode 100644
+index 000000000..e083f6bde
+--- /dev/null
++++ b/.gitmodules
+@@ -0,0 +1,3 @@
++[submodule "ovs"]
++ path = ovs
++ url = https://github.com/openvswitch/ovs
+diff --git a/.gitreview b/.gitreview
+new file mode 100644
+index 000000000..27e8042ac
+--- /dev/null
++++ b/.gitreview
+@@ -0,0 +1,6 @@
++[gerrit]
++host=code.engineering.redhat.com
++port=22
++project=ovn.git
++defaultbranch=ovn2.13
++
+diff --git a/AUTHORS.rst b/AUTHORS.rst
+index 5d926c11f..29c2c011c 100644
+--- a/AUTHORS.rst
++++ b/AUTHORS.rst
+@@ -155,6 +155,7 @@ Geoffrey Wossum gwossum@acm.org
+ Gianluca Merlo gianluca.merlo@gmail.com
+ Giuseppe Lettieri g.lettieri@iet.unipi.it
+ Glen Gibb grg@stanford.edu
++Gongming Chen gmingchen@tencent.com
+ Guoshuai Li ligs@dtdream.com
+ Guolin Yang gyang@vmware.com
+ Guru Chaitanya Perakam gperakam@Brocade.com
+diff --git a/Documentation/intro/install/general.rst b/Documentation/intro/install/general.rst
+index 65b1f4a40..cee99c63d 100644
+--- a/Documentation/intro/install/general.rst
++++ b/Documentation/intro/install/general.rst
+@@ -66,6 +66,10 @@ To compile the userspace programs in the OVN distribution, you will
+ need the following software:
+
+ - Open vSwitch (https://docs.openvswitch.org/en/latest/intro/install/).
++ Open vSwitch is included as a submodule in the OVN source code. It is
++ kept at the minimum recommended version for OVN to operate optimally.
++ See below for instructions about how to use a different OVS source
++ location.
+
+ - GNU make
+
+@@ -140,27 +144,44 @@ Bootstrapping
+ -------------
+
+ This step is not needed if you have downloaded a released tarball. If
+-you pulled the sources directly from an Open vSwitch Git tree or got a
+-Git tree snapshot, then run boot.sh in the top source directory to build
++you pulled the sources directly from an OVN Git tree or got a Git tree
++snapshot, then run boot.sh in the top source directory to build
+ the "configure" script::
+
+ $ ./boot.sh
+
+-Before configuring OVN, clone, configure and build Open vSwitch.
++Before configuring OVN, build Open vSwitch. The easiest way to do this
++is to use the included OVS submodule in the OVN source::
++
++ $ git submodule update --init
++ $ cd ovs
++ $ ./boot.sh
++ $ ./configure
++ $ make
++ $ cd ..
++
++It is not required to use the included OVS submodule; however the OVS
++submodule is guaranteed to be the minimum recommended version of OVS
++to ensure OVN's optimal operation. If you wish to use OVS source code
++from a different location on the file system, then be sure to configure
++and build OVS before building OVN.
+
+ .. _general-configuring:
+
+ Configuring
+ -----------
+
+-Configure the package by running the configure script. You need to
+-invoke configure with atleast the argument --with-ovs-source.
+-For example::
++Then configure the package by running the configure script::
++
++ $ ./configure
++
++If your OVS source directory is not the included OVS submodule, specify the
++location of the OVS source code using --with-ovs-source::
+
+ $ ./configure --with-ovs-source=/path/to/ovs/source
+
+-If you have built Open vSwitch in a separate directory, then you
+-need to provide that path in the option - --with-ovs-build.
++If you have built Open vSwitch in a separate directory from its source
++code, then you need to provide that path in the option - --with-ovs-build.
+
+ By default all files are installed under ``/usr/local``. OVN expects to find
+ its database in ``/usr/local/etc/ovn`` by default.
+diff --git a/Makefile.am b/Makefile.am
+index 7ce3d27e4..04a6d7c63 100644
+--- a/Makefile.am
++++ b/Makefile.am
+@@ -48,6 +48,8 @@ AM_CFLAGS = -Wstrict-prototypes
+ AM_CFLAGS += $(WARNING_FLAGS)
+ AM_CFLAGS += $(OVS_CFLAGS)
+
++AM_DISTCHECK_CONFIGURE_FLAGS = --with-ovs-source=$(PWD)/ovs
++
+ if NDEBUG
+ AM_CPPFLAGS += -DNDEBUG
+ AM_CFLAGS += -fomit-frame-pointer
+@@ -105,7 +107,9 @@ EXTRA_DIST = \
+ ovn-ic-nb.ovsschema \
+ ovn-ic-nb.xml \
+ ovn-ic-sb.ovsschema \
+- ovn-ic-sb.xml
++ ovn-ic-sb.xml \
++ .gitreview \
++ compile_ovn.sh
+ bin_PROGRAMS =
+ sbin_PROGRAMS =
+ bin_SCRIPTS =
+@@ -157,6 +161,7 @@ noinst_HEADERS += $(EXTRA_DIST)
+
+ ro_c = echo '/* -*- mode: c; buffer-read-only: t -*- */'
+ ro_shell = printf '\043 Generated automatically -- do not modify! -*- buffer-read-only: t -*-\n'
++submodules = $(shell grep 'path =' $(srcdir)/.gitmodules | sed -E 's/[\t ]*path =\s*(.*)/\1/g' | xargs)
+
+ SUFFIXES += .in
+ .in:
+@@ -216,6 +221,8 @@ dist-hook-git: distfiles
+ @if test -e $(srcdir)/.git && (git --version) >/dev/null 2>&1; then \
+ (cd $(srcdir) && git ls-files) | grep -v '\.gitignore$$' | \
+ grep -v '\.gitattributes$$' | \
++ grep -v '\.gitmodules$$' | \
++ grep -v "$(submodules)" | \
+ LC_ALL=C sort -u > all-gitfiles; \
+ LC_ALL=C comm -1 -3 distfiles all-gitfiles > missing-distfiles; \
+ if test -s missing-distfiles; then \
+@@ -247,8 +254,8 @@ ALL_LOCAL += config-h-check
+ config-h-check:
+ @cd $(srcdir); \
+ if test -e .git && (git --version) >/dev/null 2>&1 && \
+- git --no-pager grep -L '#include
++ This table looks up the MAC learning table of the logical switch
++ datapath to check if the
++ For each such logical port p whose port security
++ is disabled and 'unknown' address set following flow
++ is added.
++
++ This table learns the MAC addresses seen on the logical ports
++ whose port security is disabled and 'unknown' address set
++ if the
++ For each such logical port p whose port security
++ is disabled and 'unknown' address set following flow
++ is added.
++
+ This table prepares flows for possible stateful ACL processing in
+@@ -332,7 +398,7 @@
+ db="OVN_Northbound"/> table.
+
+ This table prepares flows for possible stateful load balancing processing
+@@ -399,7 +465,7 @@
+ logical router datapath to logical switch datapath.
+
+ This table prepares flows for all possible stateful processing
+@@ -410,12 +476,13 @@
+
+ This table consists of logical flows that set hints
+ (
+ Logical flows in this table closely reproduce those in the
+@@ -518,8 +585,9 @@
+ flows with the
+
+ Logical flows in this table closely reproduce those in the
+@@ -619,7 +687,7 @@
+
+
+
+-
+ Logical flows in this table closely reproduce those in the
+@@ -641,7 +709,7 @@
+
+
+
+-
+ It contains a priority-0 flow that simply moves traffic to the next
+@@ -667,7 +735,7 @@
+ connection.)
+
+ This table implements ARP/ND responder in a logical switch for known
+@@ -1069,7 +1164,7 @@ output;
+
+
+
+-
+ This table adds the DHCPv4 options to a DHCPv4 packet from the
+@@ -1130,7 +1225,7 @@ next;
+
+
+
+-
+ This table implements DHCP responder for the DHCP replies generated by
+@@ -1211,7 +1306,7 @@ output;
+
+
+
+-
+ This table looks up and resolves the DNS names to the corresponding
+@@ -1240,7 +1335,7 @@ reg0[4] = dns_lookup(); next;
+
+
+
+-
+ This table implements DNS responder for the DNS replies generated by
+@@ -1275,7 +1370,7 @@ output;
+
+
+
+-
+ Traffic from the
+ This table implements switching behavior. It contains these logical
+@@ -1481,12 +1576,58 @@ output;
+
+
+
++ This table handles the packets whose destination was not found or
++ and looked up in the MAC learning table of the logical switch
++ datapath. It contains the following flows.
++
++ If the logical switch has logical ports with 'unknown' addresses set,
++ then the below logical flow is added
++
++ If the logical switch has no logical ports with 'unknown' address
++ set, then the below logical flow is added
++ Ingress Table 3:
++ from-lport
Pre-ACLsIngress Table 3: Lookup MAC address learning table
++
++ port-mac
pair is present
++ or not. MAC is learnt only for logical switch VIF ports whose
++ port security is disabled and 'unknown' address set.
++
++
++
++
++
++ inport == p
and action
++ reg0[11] = lookup_fdb(inport, eth.src); next;
++ Ingress Table 4: Learn MAC of 'unknown' ports.
++
++ lookup_fdb
action returned false in the
++ previous table.
++
++
++
++
++
++ inport == p && reg0[11] == 0
and
++ action put_fdb(inport, eth.src); next;
which stores
++ the port-mac
in the mac learning table of the
++ logical switch datapath and advances the packet to the next table.
++ Ingress Table 5:
+
+ from-lport
Pre-ACLsIngress Table 4: Pre-LB
++ Ingress Table 6: Pre-LB
+
+ Ingress Table 5: Pre-stateful
++ Ingress Table 7: Pre-stateful
+
+ ct_next;
action.
+ Ingress Table 6:
++ from-lport
ACL hintsIngress Table 8:
+
+ from-lport
ACL hintsreg0
bits) to be used in the next stage, in the ACL
+- processing table. Multiple hints can be set for the same packet.
++ processing table, if stateful ACLs or load balancers are configured.
++ Multiple hints can be set for the same packet.
+ The possible hints are:
+
+@@ -489,7 +556,7 @@
+
+
+
+- Ingress table 7:
++ from-lport
ACLsIngress table 9:
+
+ from-lport
ACLstcp_reset { output <-> inport;
+ next(pipeline=egress,table=5);}
+- action for TCP connections and icmp4/icmp6
action
+- for UDP connections.
++ action for TCP connections,icmp4/icmp6
action
++ for UDP connections, and sctp_abort {output <-%gt; inport;
++ next(pipeline=egress,table=5);}
action for SCTP associations.
+
+ drop;
for new or untracked
+@@ -597,7 +665,7 @@
+ Ingress Table 8:
++ from-lport
QoS MarkingIngress Table 10:
+
+ from-lport
QoS MarkingIngress Table 9:
++ from-lport
QoS MeterIngress Table 11:
+
+ from-lport
QoS MeterIngress Table 10: LB
++ Ingress Table 12: LB
+
+ Ingress Table 11: Stateful
++ Ingress Table 13: Stateful
+
+
+
+
+- OVN_Southbound
db is either online
or
+- empty.
++ empty. For IPv4 traffic the flow also loads the original destination
++ IP and transport port in registers reg1
and
++ reg2
. For IPv6 traffic the flow also loads the original
++ destination IP and transport port in registers xxreg1
and
++ reg2
.
+
+ ct_lb(args)
, where args contains comma
+ separated IP addresses of the same address family as VIP.
++ For IPv4 traffic the flow also loads the original destination
++ IP and transport port in registers reg1
and
++ reg2
. For IPv6 traffic the flow also loads the original
++ destination IP and transport port in registers xxreg1
and
++ reg2
.
++ --reject
option and
++ it has no active backends, a TCP reset segment (for tcp) or an ICMP
++ port unreachable packet (for all other kind of traffic) will be sent
++ whenever an incoming packet is received for this load-balancer.
++ Please note using --reject
option will disable
++ empty_lb SB controller event for this load balancer.
+ ct_commit; next;
action based on a hint provided by
+ the previous tables (with a match for reg0[1] == 1
).
+ ct_lb;
as the action based on a hint provided by the
+- previous tables (with a match for reg0[2] == 1
).
++ previous tables (with a match for reg0[2] == 1
and
++ on supported load balancer protocols and address families).
++ For IPv4 traffic the flows also load the original destination
++ IP and transport port in registers reg1
and
++ reg2
. For IPv6 traffic the flows also load the original
++ destination IP and transport port in registers xxreg1
and
++ reg2
.
+ Ingress Table 12: Pre-Hairpin
++ Ingress Table 14: Pre-Hairpin
+
+
+
+- ip && ct.trk&& ct.dnat
to check if the
++ priority-100 flow is added with the match
++ ip && ct.trk
to check if the
+ packet needs to be hairpinned (if after load balancing the destination
+- IP matches the source IP) or not by executing the action
+- reg0[6] = chk_lb_hairpin();
and advances the packet to
+- the next table.
+- ip
to check if
+- the packet is a reply for a hairpinned connection or not by executing
+- the action reg0[6] = chk_lb_hairpin_reply();
and advances
+- the packet to the next table.
++ IP matches the source IP) or not by executing the actions
++ reg0[6] = chk_lb_hairpin();
and
++ reg0[12] = chk_lb_hairpin_reply();
and advances the packet
++ to the next table.
+ Ingress Table 13: Nat-Hairpin
++ Ingress Table 15: Nat-Hairpin
+
+
+
+- ip && (ct.new || ct.est) && ct.trk &&
+- ct.dnat && reg0[6] == 1
which hairpins the traffic by
++ priority-100 flow is added with the match
++ ip && ct.new && ct.trk &&
++ reg0[6] == 1
which hairpins the traffic by
+ NATting source IP to the load balancer VIP by executing the action
+ ct_snat_to_vip
and advances the packet to the next table.
+ ip && reg0[6] == 1
which matches on the replies
++ priority-100 flow is added with the match
++ ip && ct.est && ct.trk &&
++ reg0[6] == 1
which hairpins the traffic by
++ NATting source IP to the load balancer VIP by executing the action
++ ct_snat
and advances the packet to the next table.
++ ip && reg0[12] == 1
which matches on the replies
+ of hairpinned traffic (i.e., destination IP is VIP,
+ source IP is the backend IP and source L4 port is backend port for L4
+ load balancers) and executes ct_snat
and advances the
+@@ -766,7 +861,7 @@
+ Ingress Table 14: Hairpin
++ Ingress Table 16: Hairpin
+
+
+
+- Ingress Table 15: ARP/ND responder
++ Ingress Table 17: ARP/ND responder
+
+ Ingress Table 16: DHCP option processing
++ Ingress Table 18: DHCP option processing
+
+ Ingress Table 17: DHCP responses
++ Ingress Table 19: DHCP responses
+
+ Ingress Table 18 DNS Lookup
++ Ingress Table 20 DNS Lookup
+
+ Ingress Table 19 DNS Responses
++ Ingress Table 21 DNS Responses
+
+ Ingress table 20 External ports
++ Ingress table 22 External ports
+
+ external
logical ports enter the ingress
+@@ -1318,7 +1413,7 @@ output;
+
+
+
+- Ingress Table 21 Destination Lookup
++ Ingress Table 23 Destination Lookup
+
+ MC_UNKNOWN
multicast group, which
+- ovn-northd
populates with all enabled logical ports that
+- accept unknown destination packets. As a small optimization, if no
+- logical ports accept unknown destination packets,
+- ovn-northd
omits this multicast group and logical flow.
++ One priority-0 fallback flow that matches all packets with the
++ action outport = get_fdb(eth.dst); next;
. The action
++ get_fdb
gets the port for the eth.dst
++ in the MAC learning table of the logical switch datapath. If there
++ is no entry for eth.dst
in the MAC learning table,
++ then it stores none
in the outport
.
++ Ingress Table 23 Destination unknown
++
++
++
+
+@@ -1926,6 +2067,27 @@ next;
+
++
++
++ outport == none
then
++ outputs them to the MC_UNKNOWN
multicast group, which
++ ovn-northd
populates with all enabled logical ports
++ that accept unknown destination packets. As a small optimization,
++ if no logical ports accept unknown destination packets,
++ ovn-northd
omits this multicast group and logical
++ flow.
++
++
++ outport == none
++ and drops the packets.
++ get_fdb
action.
+
++ For each BFD port the two following priority-110 flows are added ++ to manage BFD traffic: ++ ++
ip4.src
or ip6.src
is any IP
++ address owned by the router port and udp.dst == 3784
++
, the packet is advanced to the next pipeline stage.
++ ip4.dst
or ip6.dst
is any IP
++ address owned by the router port and udp.dst == 3784
++
, the handle_bfd_msg
action is executed.
++
+ L3 admission control: A priority-100 flow drops packets that match
+@@ -2449,6 +2611,16 @@ icmp6 {
+ with an action ct_snat;
.
+
++ If the Gateway router is configured with
++ lb_force_snat_ip=router_ip
then for every logical router
++ port P attached to the Gateway router with the router ip
++ B, a priority-110 flow is added with the match
++ inport == P && ip4.dst == B
or
++ inport == P && ip6.dst == B
++ with an action ct_snat;
.
++
+ If the Gateway router has been configured to force SNAT any
+ previously load-balanced packets to B, a priority-100 flow
+@@ -2592,6 +2764,15 @@ icmp6 {
+ packets, the above action will be replaced by
+ flags.force_snat_for_lb = 1; ct_dnat;
.
+
--reject
option and
++ it has no active backends, a TCP reset segment (for tcp) or an ICMP
++ port unreachable packet (for all other kind of traffic) will be sent
++ whenever an incoming packet is received for this load-balancer.
++ Please note using --reject
option will disable
++ empty_lb SB controller event for this load balancer.
++ Ingress Table 6: DNAT on Gateway Routers
+@@ -3022,14 +3203,36 @@ outport = P; + +
+- If the policy action is reroute
, then the logical
+- flow is added with the following actions:
++ If the policy action is reroute
with 2 or more nexthops
++ defined, then the logical flow is added with the following actions:
++
++reg8[0..15] = GID; ++reg8[16..31] = select(1,..n); ++++ ++
++ where GID is the ECMP group id generated by
++ ovn-northd
for this policy and n
++ is the number of nexthops. select
action
++ selects one of the nexthop member id, stores it in the register
++ reg8[16..31]
and advances the packet to the
++ next stage.
++
++ If the policy action is reroute
with just one nexhop,
++ then the logical flow is added with the following actions:
+
+ [xx]reg0 = H; + eth.src = E; + outport = P; ++reg8[0..15] = 0; + flags.loopback = 1; + next; ++@@ -3053,7 +3256,51 @@ next; +
++ This table handles the ECMP for the router policies configured ++ with multiple nexthops. ++
++ ++
++ A priority-150 flow is added to advance the packet to the next stage
++ if the ECMP group id register reg8[0..15]
is 0.
++
++ For each ECMP reroute router policy with multiple nexthops,
++ a priority-100 flow is added for each nexthop H
++ with the match reg8[0..15] == GID &&
++ reg8[16..31] == M
where GID
++ is the router policy group id generated by ovn-northd
++ and M is the member id of the nexthop H
++ generated by ovn-northd
. The following actions are added
++ to the flow:
++
++[xx]reg0 = H; ++eth.src = E; ++outport = P ++"flags.loopback = 1; " ++"next;" ++++ ++
++ where H is the nexthop
defined in the
++ router policy, E is the ethernet address of the
++ logical router port from which the nexthop
is
++ reachable and P is the logical router port from
++ which the nexthop
is reachable.
++
+ Any packet that reaches this table is an IP packet whose next-hop +@@ -3239,7 +3486,7 @@ next; + + + +-
+ For distributed logical routers with distributed gateway port configured +@@ -3269,7 +3516,7 @@ REGBIT_PKT_LARGER = check_pkt_larger(L); next; + and advances to the next table. +
+ +-+ For distributed logical routers with distributed gateway port configured +@@ -3330,7 +3577,7 @@ icmp6 { + and advances to the next table. +
+ +-+ For distributed logical routers where one of the logical router +@@ -3370,7 +3617,7 @@ icmp6 { + + + +-
+ In the common case where the Ethernet destination has been resolved, this
+@@ -3546,6 +3793,32 @@ nd_ns {
+ flags.force_snat_for_dnat == 1 && ip
with an
+ action ct_snat(B);
.
+
++ If the Gateway router in the OVN Northbound database has been
++ configured to force SNAT a packet (that has been previously
++ load-balanced) using router IP (i.e :lb_force_snat_ip=router_ip), then for
++ each logical router port P attached to the Gateway
++ router, a priority-110 flow matches
++ flags.force_snat_for_lb == 1 && outport == P
++
with an action ct_snat(R);
++ where R is the IP configured on the router port.
++ If R
is an IPv4 address then the match will also
++ include ip4
and if it is an IPv6 address, then the
++ match will also include ip6
.
++
++ If the logical router port P is configured with multiple ++ IPv4 and multiple IPv6 addresses, only the first IPv4 and first IPv6 ++ address is considered. ++
++
+ If the Gateway router in the OVN Northbound database has been
+ configured to force SNAT a packet (that has been previously
+@@ -3553,6 +3826,9 @@ nd_ns {
+ flags.force_snat_for_lb == 1 && ip
with an
+ action ct_snat(B);
.
+
+ For each configuration in the OVN Northbound database, that asks
+ to change the source IP address of a packet from an IP address of
+@@ -3566,14 +3842,18 @@ nd_ns {
+ options, then the action would be ip4/6.src=
+ (B)
.
+
+ If the NAT rule has allowed_ext_ips
configured, then
+ there is an additional match ip4.dst == allowed_ext_ips
+
. Similarly, for IPV6, match would be ip6.dst ==
+ allowed_ext_ips
.
+
+ If the NAT rule has exempted_ext_ips
set, then
+ there is an additional flow configured at the priority + 1 of
+@@ -3582,7 +3862,9 @@ nd_ns {
+ . This flow is used to bypass the ct_snat action for a packet
+ which is destinted to exempted_ext_ips
.
+
+ A priority-0 logical flow with match 1
has actions
+ next;
.
+diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
+index 5a3227568..c81e3220c 100644
+--- a/northd/ovn-northd.c
++++ b/northd/ovn-northd.c
+@@ -38,6 +38,7 @@
+ #include "lib/ovn-util.h"
+ #include "lib/lb.h"
+ #include "ovn/actions.h"
++#include "ovn/features.h"
+ #include "ovn/logical-fields.h"
+ #include "packets.h"
+ #include "openvswitch/poll-loop.h"
+@@ -141,25 +142,28 @@ enum ovn_stage {
+ PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
+ PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
+ PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
+- PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
+- PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
+- PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
+- PIPELINE_STAGE(SWITCH, IN, ACL_HINT, 6, "ls_in_acl_hint") \
+- PIPELINE_STAGE(SWITCH, IN, ACL, 7, "ls_in_acl") \
+- PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 8, "ls_in_qos_mark") \
+- PIPELINE_STAGE(SWITCH, IN, QOS_METER, 9, "ls_in_qos_meter") \
+- PIPELINE_STAGE(SWITCH, IN, LB, 10, "ls_in_lb") \
+- PIPELINE_STAGE(SWITCH, IN, STATEFUL, 11, "ls_in_stateful") \
+- PIPELINE_STAGE(SWITCH, IN, PRE_HAIRPIN, 12, "ls_in_pre_hairpin") \
+- PIPELINE_STAGE(SWITCH, IN, NAT_HAIRPIN, 13, "ls_in_nat_hairpin") \
+- PIPELINE_STAGE(SWITCH, IN, HAIRPIN, 14, "ls_in_hairpin") \
+- PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 15, "ls_in_arp_rsp") \
+- PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 16, "ls_in_dhcp_options") \
+- PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 17, "ls_in_dhcp_response") \
+- PIPELINE_STAGE(SWITCH, IN, DNS_LOOKUP, 18, "ls_in_dns_lookup") \
+- PIPELINE_STAGE(SWITCH, IN, DNS_RESPONSE, 19, "ls_in_dns_response") \
+- PIPELINE_STAGE(SWITCH, IN, EXTERNAL_PORT, 20, "ls_in_external_port") \
+- PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 21, "ls_in_l2_lkup") \
++ PIPELINE_STAGE(SWITCH, IN, LOOKUP_FDB , 3, "ls_in_lookup_fdb") \
++ PIPELINE_STAGE(SWITCH, IN, PUT_FDB, 4, "ls_in_put_fdb") \
++ PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 5, "ls_in_pre_acl") \
++ PIPELINE_STAGE(SWITCH, IN, PRE_LB, 6, "ls_in_pre_lb") \
++ PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 7, "ls_in_pre_stateful") \
++ PIPELINE_STAGE(SWITCH, IN, ACL_HINT, 8, "ls_in_acl_hint") \
++ PIPELINE_STAGE(SWITCH, IN, ACL, 9, "ls_in_acl") \
++ PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 10, "ls_in_qos_mark") \
++ PIPELINE_STAGE(SWITCH, IN, QOS_METER, 11, "ls_in_qos_meter") \
++ PIPELINE_STAGE(SWITCH, IN, LB, 12, "ls_in_lb") \
++ PIPELINE_STAGE(SWITCH, IN, STATEFUL, 13, "ls_in_stateful") \
++ PIPELINE_STAGE(SWITCH, IN, PRE_HAIRPIN, 14, "ls_in_pre_hairpin") \
++ PIPELINE_STAGE(SWITCH, IN, NAT_HAIRPIN, 15, "ls_in_nat_hairpin") \
++ PIPELINE_STAGE(SWITCH, IN, HAIRPIN, 16, "ls_in_hairpin") \
++ PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 17, "ls_in_arp_rsp") \
++ PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 18, "ls_in_dhcp_options") \
++ PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 19, "ls_in_dhcp_response") \
++ PIPELINE_STAGE(SWITCH, IN, DNS_LOOKUP, 20, "ls_in_dns_lookup") \
++ PIPELINE_STAGE(SWITCH, IN, DNS_RESPONSE, 21, "ls_in_dns_response") \
++ PIPELINE_STAGE(SWITCH, IN, EXTERNAL_PORT, 22, "ls_in_external_port") \
++ PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 23, "ls_in_l2_lkup") \
++ PIPELINE_STAGE(SWITCH, IN, L2_UNKNOWN, 24, "ls_in_l2_unknown") \
+ \
+ /* Logical switch egress stages. */ \
+ PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
+@@ -188,11 +192,12 @@ enum ovn_stage {
+ PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 10, "lr_in_ip_routing") \
+ PIPELINE_STAGE(ROUTER, IN, IP_ROUTING_ECMP, 11, "lr_in_ip_routing_ecmp") \
+ PIPELINE_STAGE(ROUTER, IN, POLICY, 12, "lr_in_policy") \
+- PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 13, "lr_in_arp_resolve") \
+- PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN , 14, "lr_in_chk_pkt_len") \
+- PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 15,"lr_in_larger_pkts") \
+- PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 16, "lr_in_gw_redirect") \
+- PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 17, "lr_in_arp_request") \
++ PIPELINE_STAGE(ROUTER, IN, POLICY_ECMP, 13, "lr_in_policy_ecmp") \
++ PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 14, "lr_in_arp_resolve") \
++ PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN , 15, "lr_in_chk_pkt_len") \
++ PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 16, "lr_in_larger_pkts") \
++ PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 17, "lr_in_gw_redirect") \
++ PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 18, "lr_in_arp_request") \
+ \
+ /* Logical router egress stages. */ \
+ PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \
+@@ -225,6 +230,12 @@ enum ovn_stage {
+ #define REGBIT_ACL_HINT_ALLOW "reg0[8]"
+ #define REGBIT_ACL_HINT_DROP "reg0[9]"
+ #define REGBIT_ACL_HINT_BLOCK "reg0[10]"
++#define REGBIT_LKUP_FDB "reg0[11]"
++#define REGBIT_HAIRPIN_REPLY "reg0[12]"
++
++#define REG_ORIG_DIP_IPV4 "reg1"
++#define REG_ORIG_DIP_IPV6 "xxreg1"
++#define REG_ORIG_TP_DPORT "reg2[0..15]"
+
+ /* Register definitions for switches and routers. */
+
+@@ -259,12 +270,29 @@ enum ovn_stage {
+ * OVS register usage:
+ *
+ * Logical Switch pipeline:
+- * +---------+----------------------------------------------+
+- * | R0 | REGBIT_{CONNTRACK/DHCP/DNS/HAIRPIN} |
+- * | | REGBIT_ACL_HINT_{ALLOW_NEW/ALLOW/DROP/BLOCK} |
+- * +---------+----------------------------------------------+
+- * | R1 - R9 | UNUSED |
+- * +---------+----------------------------------------------+
++ * +----+----------------------------------------------+---+------------------+
++ * | R0 | REGBIT_{CONNTRACK/DHCP/DNS} | | |
++ * | | REGBIT_{HAIRPIN/HAIRPIN_REPLY} | X | |
++ * | | REGBIT_ACL_HINT_{ALLOW_NEW/ALLOW/DROP/BLOCK} | X | |
++ * +----+----------------------------------------------+ X | |
++ * | R1 | ORIG_DIP_IPV4 (>= IN_STATEFUL) | R | |
++ * +----+----------------------------------------------+ E | |
++ * | R2 | ORIG_TP_DPORT (>= IN_STATEFUL) | G | |
++ * +----+----------------------------------------------+ 0 | |
++ * | R3 | UNUSED | | |
++ * +----+----------------------------------------------+---+------------------+
++ * | R4 | UNUSED | | |
++ * +----+----------------------------------------------+ X | ORIG_DIP_IPV6 |
++ * | R5 | UNUSED | X | (>= IN_STATEFUL) |
++ * +----+----------------------------------------------+ R | |
++ * | R6 | UNUSED | E | |
++ * +----+----------------------------------------------+ G | |
++ * | R7 | UNUSED | 1 | |
++ * +----+----------------------------------------------+---+------------------+
++ * | R8 | UNUSED |
++ * +----+----------------------------------------------+
++ * | R9 | UNUSED |
++ * +----+----------------------------------------------+
+ *
+ * Logical Router pipeline:
+ * +-----+--------------------------+---+-----------------+---+---------------+
+@@ -608,6 +636,8 @@ struct ovn_datapath {
+ struct hmap port_tnlids;
+ uint32_t port_key_hint;
+
++ bool has_stateful_acl;
++ bool has_lb_vip;
+ bool has_unknown;
+
+ /* IPAM data. */
+@@ -633,6 +663,7 @@ struct ovn_datapath {
+
+ struct lport_addresses dnat_force_snat_addrs;
+ struct lport_addresses lb_force_snat_addrs;
++ bool lb_force_snat_router_ip;
+
+ struct ovn_port **localnet_ports;
+ size_t n_localnet_ports;
+@@ -646,6 +677,9 @@ struct ovn_datapath {
+ struct hmap nb_pgs;
+ };
+
++static bool ls_has_stateful_acl(struct ovn_datapath *od);
++static bool ls_has_lb_vip(struct ovn_datapath *od);
++
+ /* Contains a NAT entry with the external addresses pre-parsed. */
+ struct ovn_nat {
+ const struct nbrec_nat *nb;
+@@ -723,14 +757,28 @@ init_nat_entries(struct ovn_datapath *od)
+ }
+ }
+
+- if (get_force_snat_ip(od, "lb", &od->lb_force_snat_addrs)) {
+- if (od->lb_force_snat_addrs.n_ipv4_addrs) {
+- snat_ip_add(od, od->lb_force_snat_addrs.ipv4_addrs[0].addr_s,
+- NULL);
+- }
+- if (od->lb_force_snat_addrs.n_ipv6_addrs) {
+- snat_ip_add(od, od->lb_force_snat_addrs.ipv6_addrs[0].addr_s,
+- NULL);
++ /* Check if 'lb_force_snat_ip' is configured with 'router_ip'. */
++ const char *lb_force_snat =
++ smap_get(&od->nbr->options, "lb_force_snat_ip");
++ if (lb_force_snat && !strcmp(lb_force_snat, "router_ip")
++ && smap_get(&od->nbr->options, "chassis")) {
++ /* Set it to true only if its gateway router and
++ * options:lb_force_snat_ip=router_ip. */
++ od->lb_force_snat_router_ip = true;
++ } else {
++ od->lb_force_snat_router_ip = false;
++
++ /* Check if 'lb_force_snat_ip' is configured with a set of
++ * IP address(es). */
++ if (get_force_snat_ip(od, "lb", &od->lb_force_snat_addrs)) {
++ if (od->lb_force_snat_addrs.n_ipv4_addrs) {
++ snat_ip_add(od, od->lb_force_snat_addrs.ipv4_addrs[0].addr_s,
++ NULL);
++ }
++ if (od->lb_force_snat_addrs.n_ipv6_addrs) {
++ snat_ip_add(od, od->lb_force_snat_addrs.ipv6_addrs[0].addr_s,
++ NULL);
++ }
+ }
+ }
+
+@@ -872,6 +920,20 @@ ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
+ return NULL;
+ }
+
++static struct ovn_datapath *
++ovn_datapath_find_by_key(struct hmap *datapaths, uint32_t dp_key)
++{
++ struct ovn_datapath *od;
++
++ HMAP_FOR_EACH (od, key_node, datapaths) {
++ if (od->tunnel_key == dp_key) {
++ return od;
++ }
++ }
++
++ return NULL;
++}
++
+ static bool
+ ovn_datapath_is_stale(const struct ovn_datapath *od)
+ {
+@@ -1472,6 +1534,8 @@ struct ovn_port {
+
+ bool has_unknown; /* If the addresses have 'unknown' defined. */
+
++ bool has_bfd;
++
+ /* The port's peer:
+ *
+ * - A switch port S of type "router" has a router port R as a peer,
+@@ -1543,17 +1607,38 @@ ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
+ }
+ }
+
++/* Returns the ovn_port that matches 'name'. If 'prefer_bound' is true and
++ * multiple ports share the same name, gives precendence to ports bound to
++ * an ovn_datapath.
++ */
+ static struct ovn_port *
+-ovn_port_find(const struct hmap *ports, const char *name)
++ovn_port_find__(const struct hmap *ports, const char *name,
++ bool prefer_bound)
+ {
++ struct ovn_port *matched_op = NULL;
+ struct ovn_port *op;
+
+ HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
+ if (!strcmp(op->key, name)) {
+- return op;
++ matched_op = op;
++ if (!prefer_bound || op->od) {
++ return op;
++ }
+ }
+ }
+- return NULL;
++ return matched_op;
++}
++
++static struct ovn_port *
++ovn_port_find(const struct hmap *ports, const char *name)
++{
++ return ovn_port_find__(ports, name, false);
++}
++
++static struct ovn_port *
++ovn_port_find_bound(const struct hmap *ports, const char *name)
++{
++ return ovn_port_find__(ports, name, true);
+ }
+
+ /* Returns true if the logical switch port 'enabled' column is empty or
+@@ -2336,15 +2421,13 @@ join_logical_ports(struct northd_context *ctx,
+ for (size_t i = 0; i < od->nbs->n_ports; i++) {
+ const struct nbrec_logical_switch_port *nbsp
+ = od->nbs->ports[i];
+- struct ovn_port *op = ovn_port_find(ports, nbsp->name);
+- if (op && op->sb->datapath == od->sb) {
+- if (op->nbsp || op->nbrp) {
+- static struct vlog_rate_limit rl
+- = VLOG_RATE_LIMIT_INIT(5, 1);
+- VLOG_WARN_RL(&rl, "duplicate logical port %s",
+- nbsp->name);
+- continue;
+- }
++ struct ovn_port *op = ovn_port_find_bound(ports, nbsp->name);
++ if (op && (op->od || op->nbsp || op->nbrp)) {
++ static struct vlog_rate_limit rl
++ = VLOG_RATE_LIMIT_INIT(5, 1);
++ VLOG_WARN_RL(&rl, "duplicate logical port %s", nbsp->name);
++ continue;
++ } else if (op && (!op->sb || op->sb->datapath == od->sb)) {
+ ovn_port_set_nb(op, nbsp, NULL);
+ ovs_list_remove(&op->list);
+
+@@ -2435,16 +2518,15 @@ join_logical_ports(struct northd_context *ctx,
+ continue;
+ }
+
+- struct ovn_port *op = ovn_port_find(ports, nbrp->name);
+- if (op && op->sb->datapath == od->sb) {
+- if (op->nbsp || op->nbrp) {
+- static struct vlog_rate_limit rl
+- = VLOG_RATE_LIMIT_INIT(5, 1);
+- VLOG_WARN_RL(&rl, "duplicate logical router port %s",
+- nbrp->name);
+- destroy_lport_addresses(&lrp_networks);
+- continue;
+- }
++ struct ovn_port *op = ovn_port_find_bound(ports, nbrp->name);
++ if (op && (op->od || op->nbsp || op->nbrp)) {
++ static struct vlog_rate_limit rl
++ = VLOG_RATE_LIMIT_INIT(5, 1);
++ VLOG_WARN_RL(&rl, "duplicate logical router port %s",
++ nbrp->name);
++ destroy_lport_addresses(&lrp_networks);
++ continue;
++ } else if (op && (!op->sb || op->sb->datapath == od->sb)) {
+ ovn_port_set_nb(op, NULL, nbrp);
+ ovs_list_remove(&op->list);
+ ovs_list_push_back(both, &op->list);
+@@ -2487,7 +2569,7 @@ join_logical_ports(struct northd_context *ctx,
+ char *redirect_name =
+ ovn_chassis_redirect_name(nbrp->name);
+ struct ovn_port *crp = ovn_port_find(ports, redirect_name);
+- if (crp && crp->sb->datapath == od->sb) {
++ if (crp && crp->sb && crp->sb->datapath == od->sb) {
+ crp->derived = true;
+ ovn_port_set_nb(crp, NULL, nbrp);
+ ovs_list_remove(&crp->list);
+@@ -3179,6 +3261,12 @@ ovn_port_update_sbrec(struct northd_context *ctx,
+ } else {
+ sbrec_port_binding_set_ha_chassis_group(op->sb, NULL);
+ }
++ } else if (op->sb->ha_chassis_group) {
++ /* Clear the port bindings ha_chassis_group if the type is
++ * not external and if this column is set. This can happen
++ * when an external port is reset to type normal and
++ * ha_chassis_group cleared in the same transaction. */
++ sbrec_port_binding_set_ha_chassis_group(op->sb, NULL);
+ }
+ } else {
+ const char *chassis = NULL;
+@@ -3308,6 +3396,14 @@ ovn_port_update_sbrec(struct northd_context *ctx,
+ if (op->tunnel_key != op->sb->tunnel_key) {
+ sbrec_port_binding_set_tunnel_key(op->sb, op->tunnel_key);
+ }
++
++ /* ovn-controller will update 'Port_Binding.up' only if it was explicitly
++ * set to 'false'.
++ */
++ if (!op->sb->n_up) {
++ bool up = false;
++ sbrec_port_binding_set_up(op->sb, &up, 1);
++ }
+ }
+
+ /* Remove mac_binding entries that refer to logical_ports which are
+@@ -3340,6 +3436,26 @@ cleanup_sb_ha_chassis_groups(struct northd_context *ctx,
+ }
+ }
+
++static void
++cleanup_stale_fdp_entries(struct northd_context *ctx, struct hmap *datapaths)
++{
++ const struct sbrec_fdb *fdb_e, *next;
++ SBREC_FDB_FOR_EACH_SAFE (fdb_e, next, ctx->ovnsb_idl) {
++ bool delete = true;
++ struct ovn_datapath *od
++ = ovn_datapath_find_by_key(datapaths, fdb_e->dp_key);
++ if (od) {
++ if (ovn_tnlid_present(&od->port_tnlids, fdb_e->port_key)) {
++ delete = false;
++ }
++ }
++
++ if (delete) {
++ sbrec_fdb_delete(fdb_e);
++ }
++ }
++}
++
+ struct service_monitor_info {
+ struct hmap_node hmap_node;
+ const struct sbrec_service_monitor *sbrec_mon;
+@@ -3436,12 +3552,12 @@ ovn_lb_svc_create(struct northd_context *ctx, struct ovn_northd_lb *lb,
+ }
+
+ static
+-void build_lb_vip_ct_lb_actions(struct ovn_lb_vip *lb_vip,
+- struct ovn_northd_lb_vip *lb_vip_nb,
+- struct ds *action,
+- char *selection_fields)
++void build_lb_vip_actions(struct ovn_lb_vip *lb_vip,
++ struct ovn_northd_lb_vip *lb_vip_nb,
++ struct ds *action, char *selection_fields,
++ bool ls_dp)
+ {
+- bool skip_hash_fields = false;
++ bool skip_hash_fields = false, reject = false;
+
+ if (lb_vip_nb->lb_health_check) {
+ ds_put_cstr(action, "ct_lb(backends=");
+@@ -3463,18 +3579,30 @@ void build_lb_vip_ct_lb_actions(struct ovn_lb_vip *lb_vip,
+ }
+
+ if (!n_active_backends) {
+- skip_hash_fields = true;
+- ds_clear(action);
+- ds_put_cstr(action, "drop;");
++ if (!lb_vip->empty_backend_rej) {
++ ds_clear(action);
++ ds_put_cstr(action, "drop;");
++ skip_hash_fields = true;
++ } else {
++ reject = true;
++ }
+ } else {
+ ds_chomp(action, ',');
+ ds_put_cstr(action, ");");
+ }
++ } else if (lb_vip->empty_backend_rej && !lb_vip->n_backends) {
++ reject = true;
+ } else {
+ ds_put_format(action, "ct_lb(backends=%s);", lb_vip_nb->backend_ips);
+ }
+
+- if (!skip_hash_fields && selection_fields && selection_fields[0]) {
++ if (reject) {
++ int stage = ls_dp ? ovn_stage_get_table(S_SWITCH_OUT_QOS_MARK)
++ : ovn_stage_get_table(S_ROUTER_OUT_SNAT);
++ ds_clear(action);
++ ds_put_format(action, "reg0 = 0; reject { outport <-> inport; "
++ "next(pipeline=egress,table=%d);};", stage);
++ } else if (!skip_hash_fields && selection_fields && selection_fields[0]) {
+ ds_chomp(action, ';');
+ ds_chomp(action, ')');
+ ds_put_format(action, "; hash_fields=\"%s\");", selection_fields);
+@@ -3547,10 +3675,18 @@ build_ovn_lbs(struct northd_context *ctx, struct hmap *datapaths,
+ /* Create SB Load balancer records if not present and sync
+ * the SB load balancer columns. */
+ HMAP_FOR_EACH (lb, hmap_node, lbs) {
++
+ if (!lb->n_dps) {
+ continue;
+ }
+
++ /* Store the fact that northd provides the original (destination IP +
++ * transport port) tuple.
++ */
++ struct smap options;
++ smap_clone(&options, &lb->nlb->options);
++ smap_replace(&options, "hairpin_orig_tuple", "true");
++
+ if (!lb->slb) {
+ sbrec_lb = sbrec_load_balancer_insert(ctx->ovnsb_txn);
+ lb->slb = sbrec_lb;
+@@ -3564,9 +3700,11 @@ build_ovn_lbs(struct northd_context *ctx, struct hmap *datapaths,
+ sbrec_load_balancer_set_name(lb->slb, lb->nlb->name);
+ sbrec_load_balancer_set_vips(lb->slb, &lb->nlb->vips);
+ sbrec_load_balancer_set_protocol(lb->slb, lb->nlb->protocol);
++ sbrec_load_balancer_set_options(lb->slb, &options);
+ sbrec_load_balancer_set_datapaths(
+ lb->slb, (struct sbrec_datapath_binding **)lb->dps,
+ lb->n_dps);
++ smap_destroy(&options);
+ }
+
+ /* Set the list of associated load balanacers to a logical switch
+@@ -4822,7 +4960,7 @@ ovn_ls_port_group_destroy(struct hmap *nb_pgs)
+ }
+
+ static bool
+-has_stateful_acl(struct ovn_datapath *od)
++ls_has_stateful_acl(struct ovn_datapath *od)
+ {
+ for (size_t i = 0; i < od->nbs->n_acls; i++) {
+ struct nbrec_acl *acl = od->nbs->acls[i];
+@@ -4905,50 +5043,82 @@ build_lswitch_input_port_sec_od(
+ }
+
+ static void
+-build_lswitch_output_port_sec(struct hmap *ports, struct hmap *datapaths,
+- struct hmap *lflows)
++build_lswitch_learn_fdb_op(
++ struct ovn_port *op, struct hmap *lflows,
++ struct ds *actions, struct ds *match)
+ {
+- struct ds actions = DS_EMPTY_INITIALIZER;
+- struct ds match = DS_EMPTY_INITIALIZER;
+- struct ovn_port *op;
++ if (op->nbsp && !op->n_ps_addrs && !strcmp(op->nbsp->type, "") &&
++ op->has_unknown) {
++ ds_clear(match);
++ ds_clear(actions);
++ ds_put_format(match, "inport == %s", op->json_key);
++ ds_put_format(actions, REGBIT_LKUP_FDB
++ " = lookup_fdb(inport, eth.src); next;");
++ ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_LOOKUP_FDB, 100,
++ ds_cstr(match), ds_cstr(actions),
++ &op->nbsp->header_);
+
+- /* Egress table 8: Egress port security - IP (priorities 90 and 80)
+- * if port security enabled.
+- *
+- * Egress table 9: Egress port security - L2 (priorities 50 and 150).
+- *
+- * Priority 50 rules implement port security for enabled logical port.
+- *
+- * Priority 150 rules drop packets to disabled logical ports, so that
+- * they don't even receive multicast or broadcast packets.
+- */
+- HMAP_FOR_EACH (op, key_node, ports) {
+- if (!op->nbsp || lsp_is_external(op->nbsp)) {
+- continue;
+- }
++ ds_put_cstr(match, " && "REGBIT_LKUP_FDB" == 0");
++ ds_clear(actions);
++ ds_put_cstr(actions, "put_fdb(inport, eth.src); next;");
++ ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_PUT_FDB, 100,
++ ds_cstr(match), ds_cstr(actions),
++ &op->nbsp->header_);
++ }
++}
+
+- ds_clear(&actions);
+- ds_clear(&match);
++static void
++build_lswitch_learn_fdb_od(
++ struct ovn_datapath *od, struct hmap *lflows)
++{
++
++ if (od->nbs) {
++ ovn_lflow_add(lflows, od, S_SWITCH_IN_LOOKUP_FDB, 0, "1", "next;");
++ ovn_lflow_add(lflows, od, S_SWITCH_IN_PUT_FDB, 0, "1", "next;");
++ }
++}
++
++/* Egress table 8: Egress port security - IP (priorities 90 and 80)
++ * if port security enabled.
++ *
++ * Egress table 9: Egress port security - L2 (priorities 50 and 150).
++ *
++ * Priority 50 rules implement port security for enabled logical port.
++ *
++ * Priority 150 rules drop packets to disabled logical ports, so that
++ * they don't even receive multicast or broadcast packets.
++ */
++static void
++build_lswitch_output_port_sec_op(struct ovn_port *op,
++ struct hmap *lflows,
++ struct ds *match,
++ struct ds *actions)
++{
++
++ if (op->nbsp && (!lsp_is_external(op->nbsp))) {
++
++ ds_clear(actions);
++ ds_clear(match);
+
+- ds_put_format(&match, "outport == %s", op->json_key);
++ ds_put_format(match, "outport == %s", op->json_key);
+ if (lsp_is_enabled(op->nbsp)) {
+ build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
+- &match);
++ match);
+
+ if (!strcmp(op->nbsp->type, "localnet")) {
+ const char *queue_id = smap_get(&op->sb->options,
+ "qdisc_queue_id");
+ if (queue_id) {
+- ds_put_format(&actions, "set_queue(%s); ", queue_id);
++ ds_put_format(actions, "set_queue(%s); ", queue_id);
+ }
+ }
+- ds_put_cstr(&actions, "output;");
++ ds_put_cstr(actions, "output;");
+ ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2,
+- 50, ds_cstr(&match), ds_cstr(&actions),
++ 50, ds_cstr(match), ds_cstr(actions),
+ &op->nbsp->header_);
+ } else {
+ ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2,
+- 150, ds_cstr(&match), "drop;",
++ 150, ds_cstr(match), "drop;",
+ &op->nbsp->header_);
+ }
+
+@@ -4956,23 +5126,20 @@ build_lswitch_output_port_sec(struct hmap *ports, struct hmap *datapaths,
+ build_port_security_ip(P_OUT, op, lflows, &op->nbsp->header_);
+ }
+ }
++}
+
+- /* Egress tables 8: Egress port security - IP (priority 0)
+- * Egress table 9: Egress port security L2 - multicast/broadcast
+- * (priority 100). */
+- struct ovn_datapath *od;
+- HMAP_FOR_EACH (od, key_node, datapaths) {
+- if (!od->nbs) {
+- continue;
+- }
+-
++/* Egress tables 8: Egress port security - IP (priority 0)
++ * Egress table 9: Egress port security L2 - multicast/broadcast
++ * (priority 100). */
++static void
++build_lswitch_output_port_sec_od(struct ovn_datapath *od,
++ struct hmap *lflows)
++{
++ if (od->nbs) {
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
+ "output;");
+ }
+-
+- ds_destroy(&match);
+- ds_destroy(&actions);
+ }
+
+ static void
+@@ -5008,8 +5175,6 @@ skip_port_from_conntrack(struct ovn_datapath *od, struct ovn_port *op,
+ static void
+ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
+ {
+- bool has_stateful = has_stateful_acl(od);
+-
+ /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
+ * allowed by default. */
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
+@@ -5024,7 +5189,7 @@ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
+ /* If there are any stateful ACL rules in this datapath, we must
+ * send all IP packets through the conntrack action, which handles
+ * defragmentation, in order to match L4 headers. */
+- if (has_stateful) {
++ if (od->has_stateful_acl) {
+ for (size_t i = 0; i < od->n_router_ports; i++) {
+ skip_port_from_conntrack(od, od->router_ports[i],
+ S_SWITCH_IN_PRE_ACL, S_SWITCH_OUT_PRE_ACL,
+@@ -5084,7 +5249,10 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows,
+ struct nbrec_load_balancer *lb,
+ int pl, struct shash *meter_groups)
+ {
+- if (!controller_event_en || lb_vip->n_backends) {
++ bool controller_event = smap_get_bool(&lb->options, "event", false) ||
++ controller_event_en; /* deprecated */
++ if (!controller_event || lb_vip->n_backends ||
++ lb_vip->empty_backend_rej) {
+ return;
+ }
+
+@@ -5124,7 +5292,7 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows,
+ }
+
+ static bool
+-has_lb_vip(struct ovn_datapath *od)
++ls_has_lb_vip(struct ovn_datapath *od)
+ {
+ for (int i = 0; i < od->nbs->n_load_balancer; i++) {
+ struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i];
+@@ -5267,6 +5435,13 @@ build_acl_hints(struct ovn_datapath *od, struct hmap *lflows)
+ for (size_t i = 0; i < ARRAY_SIZE(stages); i++) {
+ enum ovn_stage stage = stages[i];
+
++ /* In any case, advance to the next stage. */
++ ovn_lflow_add(lflows, od, stage, 0, "1", "next;");
++
++ if (!od->has_stateful_acl && !od->has_lb_vip) {
++ continue;
++ }
++
+ /* New, not already established connections, may hit either allow
+ * or drop ACLs. For allow ACLs, the connection must also be committed
+ * to conntrack so we set REGBIT_ACL_HINT_ALLOW_NEW.
+@@ -5327,9 +5502,6 @@ build_acl_hints(struct ovn_datapath *od, struct hmap *lflows)
+ ovn_lflow_add(lflows, od, stage, 1, "ct.est && ct_label.blocked == 0",
+ REGBIT_ACL_HINT_BLOCK " = 1; "
+ "next;");
+-
+- /* In any case, advance to the next stage. */
+- ovn_lflow_add(lflows, od, stage, 0, "1", "next;");
+ }
+ }
+
+@@ -5661,7 +5833,7 @@ static void
+ build_acls(struct ovn_datapath *od, struct hmap *lflows,
+ struct hmap *port_groups, const struct shash *meter_groups)
+ {
+- bool has_stateful = (has_stateful_acl(od) || has_lb_vip(od));
++ bool has_stateful = od->has_stateful_acl || od->has_lb_vip;
+
+ /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
+ * default. A related rule at priority 1 is added below if there
+@@ -5930,7 +6102,7 @@ build_lb(struct ovn_datapath *od, struct hmap *lflows)
+ }
+ }
+
+- if (has_lb_vip(od)) {
++ if (od->has_lb_vip) {
+ /* Ingress and Egress LB Table (Priority 65534).
+ *
+ * Send established traffic through conntrack for just NAT. */
+@@ -5953,11 +6125,20 @@ build_lb_rules(struct ovn_datapath *od, struct hmap *lflows,
+ struct ovn_lb_vip *lb_vip = &lb->vips[i];
+ struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[i];
+
++ struct ds action = DS_EMPTY_INITIALIZER;
+ const char *ip_match = NULL;
++
++ /* Store the original destination IP to be used when generating
++ * hairpin flows.
++ */
+ if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) {
+ ip_match = "ip4";
++ ds_put_format(&action, REG_ORIG_DIP_IPV4 " = %s; ",
++ lb_vip->vip_str);
+ } else {
+ ip_match = "ip6";
++ ds_put_format(&action, REG_ORIG_DIP_IPV6 " = %s; ",
++ lb_vip->vip_str);
+ }
+
+ const char *proto = NULL;
+@@ -5970,12 +6151,17 @@ build_lb_rules(struct ovn_datapath *od, struct hmap *lflows,
+ proto = "sctp";
+ }
+ }
++
++ /* Store the original destination port to be used when generating
++ * hairpin flows.
++ */
++ ds_put_format(&action, REG_ORIG_TP_DPORT " = %"PRIu16"; ",
++ lb_vip->vip_port);
+ }
+
+ /* New connections in Ingress table. */
+- struct ds action = DS_EMPTY_INITIALIZER;
+- build_lb_vip_ct_lb_actions(lb_vip, lb_vip_nb, &action,
+- lb->selection_fields);
++ build_lb_vip_actions(lb_vip, lb_vip_nb, &action,
++ lb->selection_fields, true);
+
+ struct ds match = DS_EMPTY_INITIALIZER;
+ ds_put_format(&match, "ct.new && %s.dst == %s", ip_match,
+@@ -6021,9 +6207,39 @@ build_stateful(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs)
+ * REGBIT_CONNTRACK_COMMIT is set for new connections and
+ * REGBIT_CONNTRACK_NAT is set for established connections. So they
+ * don't overlap.
++ *
++ * In the ingress pipeline, also store the original destination IP and
++ * transport port to be used when detecting hairpin packets.
+ */
+- ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
+- REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
++ const char *lb_protocols[] = {"tcp", "udp", "sctp"};
++ struct ds actions = DS_EMPTY_INITIALIZER;
++ struct ds match = DS_EMPTY_INITIALIZER;
++
++ for (size_t i = 0; i < ARRAY_SIZE(lb_protocols); i++) {
++ ds_clear(&match);
++ ds_clear(&actions);
++ ds_put_format(&match, REGBIT_CONNTRACK_NAT" == 1 && ip4 && %s",
++ lb_protocols[i]);
++ ds_put_format(&actions, REG_ORIG_DIP_IPV4 " = ip4.dst; "
++ REG_ORIG_TP_DPORT " = %s.dst; ct_lb;",
++ lb_protocols[i]);
++ ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
++ ds_cstr(&match), ds_cstr(&actions));
++
++ ds_clear(&match);
++ ds_clear(&actions);
++ ds_put_format(&match, REGBIT_CONNTRACK_NAT" == 1 && ip6 && %s",
++ lb_protocols[i]);
++ ds_put_format(&actions, REG_ORIG_DIP_IPV6 " = ip6.dst; "
++ REG_ORIG_TP_DPORT " = %s.dst; ct_lb;",
++ lb_protocols[i]);
++ ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
++ ds_cstr(&match), ds_cstr(&actions));
++ }
++
++ ds_destroy(&actions);
++ ds_destroy(&match);
++
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
+ REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
+
+@@ -6051,40 +6267,50 @@ build_lb_hairpin(struct ovn_datapath *od, struct hmap *lflows)
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_NAT_HAIRPIN, 0, "1", "next;");
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_HAIRPIN, 0, "1", "next;");
+
+- if (has_lb_vip(od)) {
+- /* Check if the packet needs to be hairpinned. */
+- ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_HAIRPIN, 100,
+- "ip && ct.trk && ct.dnat",
+- REGBIT_HAIRPIN " = chk_lb_hairpin(); next;",
++ if (od->has_lb_vip) {
++ /* Check if the packet needs to be hairpinned.
++ * Set REGBIT_HAIRPIN in the original direction and
++ * REGBIT_HAIRPIN_REPLY in the reply direction.
++ */
++ ovn_lflow_add_with_hint(
++ lflows, od, S_SWITCH_IN_PRE_HAIRPIN, 100, "ip && ct.trk",
++ REGBIT_HAIRPIN " = chk_lb_hairpin(); "
++ REGBIT_HAIRPIN_REPLY " = chk_lb_hairpin_reply(); "
++ "next;",
++ &od->nbs->header_);
++
++ /* If packet needs to be hairpinned, snat the src ip with the VIP
++ * for new sessions. */
++ ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_NAT_HAIRPIN, 100,
++ "ip && ct.new && ct.trk"
++ " && "REGBIT_HAIRPIN " == 1",
++ "ct_snat_to_vip; next;",
+ &od->nbs->header_);
+
+- /* Check if the packet is a reply of hairpinned traffic. */
+- ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_HAIRPIN, 90, "ip",
+- REGBIT_HAIRPIN " = chk_lb_hairpin_reply(); "
+- "next;", &od->nbs->header_);
+-
+- /* If packet needs to be hairpinned, snat the src ip with the VIP. */
++ /* If packet needs to be hairpinned, for established sessions there
++ * should already be an SNAT conntrack entry.
++ */
+ ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_NAT_HAIRPIN, 100,
+- "ip && (ct.new || ct.est) && ct.trk && ct.dnat"
++ "ip && ct.est && ct.trk"
+ " && "REGBIT_HAIRPIN " == 1",
+- "ct_snat_to_vip; next;",
++ "ct_snat;",
+ &od->nbs->header_);
+
+ /* For the reply of hairpinned traffic, snat the src ip to the VIP. */
+ ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_NAT_HAIRPIN, 90,
+- "ip && "REGBIT_HAIRPIN " == 1", "ct_snat;",
++ "ip && "REGBIT_HAIRPIN_REPLY " == 1",
++ "ct_snat;",
+ &od->nbs->header_);
+
+ /* Ingress Hairpin table.
+ * - Priority 1: Packets that were SNAT-ed for hairpinning should be
+ * looped back (i.e., swap ETH addresses and send back on inport).
+ */
+- ovn_lflow_add(lflows, od, S_SWITCH_IN_HAIRPIN, 1,
+- REGBIT_HAIRPIN " == 1",
+- "eth.dst <-> eth.src;"
+- "outport = inport;"
+- "flags.loopback = 1;"
+- "output;");
++ ovn_lflow_add(
++ lflows, od, S_SWITCH_IN_HAIRPIN, 1,
++ "("REGBIT_HAIRPIN " == 1 || " REGBIT_HAIRPIN_REPLY " == 1)",
++ "eth.dst <-> eth.src; outport = inport; flags.loopback = 1; "
++ "output;");
+ }
+ }
+
+@@ -6754,9 +6980,7 @@ is_vlan_transparent(const struct ovn_datapath *od)
+ }
+
+ static void
+-build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+- struct hmap *lflows, struct hmap *mcgroups,
+- struct hmap *igmp_groups, struct hmap *lbs)
++build_lswitch_flows(struct hmap *datapaths, struct hmap *lflows)
+ {
+ /* This flow table structure is documented in ovn-northd(8), so please
+ * update ovn-northd.8.xml if you change anything. */
+@@ -6765,32 +6989,111 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ struct ds actions = DS_EMPTY_INITIALIZER;
+ struct ovn_datapath *od;
+
+- /* Ingress table 13: ARP/ND responder, skip requests coming from localnet
+- * and vtep ports. (priority 100); see ovn-northd.8.xml for the
+- * rationale. */
+- struct ovn_port *op;
+- HMAP_FOR_EACH (op, key_node, ports) {
+- if (!op->nbsp) {
++ /* Ingress table 24: Destination lookup for unknown MACs (priority 0). */
++ HMAP_FOR_EACH (od, key_node, datapaths) {
++ if (!od->nbs) {
+ continue;
+ }
+
++ ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
++ "outport = get_fdb(eth.dst); next;");
++
++ if (od->has_unknown) {
++ ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_UNKNOWN, 50,
++ "outport == \"none\"",
++ "outport = \""MC_UNKNOWN"\"; output;");
++ } else {
++ ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_UNKNOWN, 50,
++ "outport == \"none\"", "drop;");
++ }
++ ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_UNKNOWN, 0, "1",
++ "output;");
++ }
++
++ ds_destroy(&match);
++ ds_destroy(&actions);
++}
++
++/* Build pre-ACL and ACL tables for both ingress and egress.
++ * Ingress tables 3 through 10. Egress tables 0 through 7. */
++static void
++build_lswitch_lflows_pre_acl_and_acl(struct ovn_datapath *od,
++ struct hmap *port_groups,
++ struct hmap *lflows,
++ struct shash *meter_groups,
++ struct hmap *lbs)
++{
++ if (od->nbs) {
++ od->has_stateful_acl = ls_has_stateful_acl(od);
++ od->has_lb_vip = ls_has_lb_vip(od);
++
++ build_pre_acls(od, lflows);
++ build_pre_lb(od, lflows, meter_groups, lbs);
++ build_pre_stateful(od, lflows);
++ build_acl_hints(od, lflows);
++ build_acls(od, lflows, port_groups, meter_groups);
++ build_qos(od, lflows);
++ build_lb(od, lflows);
++ build_stateful(od, lflows, lbs);
++ build_lb_hairpin(od, lflows);
++ }
++}
++
++/* Logical switch ingress table 0: Admission control framework (priority
++ * 100). */
++static void
++build_lswitch_lflows_admission_control(struct ovn_datapath *od,
++ struct hmap *lflows)
++{
++ if (od->nbs) {
++ /* Logical VLANs not supported. */
++ if (!is_vlan_transparent(od)) {
++ /* Block logical VLANs. */
++ ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100,
++ "vlan.present", "drop;");
++ }
++
++ /* Broadcast/multicast source address is invalid. */
++ ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
++ "drop;");
++
++ /* Port security flows have priority 50
++ * (see build_lswitch_input_port_sec()) and will continue
++ * to the next table if packet source is acceptable. */
++ }
++}
++
++/* Ingress table 13: ARP/ND responder, skip requests coming from localnet
++ * and vtep ports. (priority 100); see ovn-northd.8.xml for the
++ * rationale. */
++
++static void
++build_lswitch_arp_nd_responder_skip_local(struct ovn_port *op,
++ struct hmap *lflows,
++ struct ds *match)
++{
++ if (op->nbsp) {
+ if ((!strcmp(op->nbsp->type, "localnet")) ||
+ (!strcmp(op->nbsp->type, "vtep"))) {
+- ds_clear(&match);
+- ds_put_format(&match, "inport == %s", op->json_key);
++ ds_clear(match);
++ ds_put_format(match, "inport == %s", op->json_key);
+ ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP,
+- 100, ds_cstr(&match), "next;",
++ 100, ds_cstr(match), "next;",
+ &op->nbsp->header_);
+ }
+ }
++}
+
+- /* Ingress table 13: ARP/ND responder, reply for known IPs.
+- * (priority 50). */
+- HMAP_FOR_EACH (op, key_node, ports) {
+- if (!op->nbsp) {
+- continue;
+- }
+-
++/* Ingress table 13: ARP/ND responder, reply for known IPs.
++ * (priority 50). */
++static void
++build_lswitch_arp_nd_responder_known_ips(struct ovn_port *op,
++ struct hmap *lflows,
++ struct hmap *ports,
++ struct ds *actions,
++ struct ds *match)
++{
++ if (op->nbsp) {
+ if (!strcmp(op->nbsp->type, "virtual")) {
+ /* Handle
+ * - GARPs for virtual ip which belongs to a logical port
+@@ -6806,7 +7109,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ "virtual-parents");
+ if (!virtual_ip || !virtual_parents ||
+ !ip_parse(virtual_ip, &ip)) {
+- continue;
++ return;
+ }
+
+ char *tokstr = xstrdup(virtual_parents);
+@@ -6821,21 +7124,21 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ continue;
+ }
+
+- ds_clear(&match);
+- ds_put_format(&match, "inport == \"%s\" && "
++ ds_clear(match);
++ ds_put_format(match, "inport == \"%s\" && "
+ "((arp.op == 1 && arp.spa == %s && "
+ "arp.tpa == %s) || (arp.op == 2 && "
+ "arp.spa == %s))",
+ vparent, virtual_ip, virtual_ip,
+ virtual_ip);
+- ds_clear(&actions);
+- ds_put_format(&actions,
++ ds_clear(actions);
++ ds_put_format(actions,
+ "bind_vport(%s, inport); "
+ "next;",
+ op->json_key);
+ ovn_lflow_add_with_hint(lflows, op->od,
+ S_SWITCH_IN_ARP_ND_RSP, 100,
+- ds_cstr(&match), ds_cstr(&actions),
++ ds_cstr(match), ds_cstr(actions),
+ &vp->nbsp->header_);
+ }
+
+@@ -6850,20 +7153,20 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ if (check_lsp_is_up &&
+ !lsp_is_up(op->nbsp) && !lsp_is_router(op->nbsp) &&
+ strcmp(op->nbsp->type, "localport")) {
+- continue;
++ return;
+ }
+
+ if (lsp_is_external(op->nbsp) || op->has_unknown) {
+- continue;
++ return;
+ }
+
+ for (size_t i = 0; i < op->n_lsp_addrs; i++) {
+ for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
+- ds_clear(&match);
+- ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
++ ds_clear(match);
++ ds_put_format(match, "arp.tpa == %s && arp.op == 1",
+ op->lsp_addrs[i].ipv4_addrs[j].addr_s);
+- ds_clear(&actions);
+- ds_put_format(&actions,
++ ds_clear(actions);
++ ds_put_format(actions,
+ "eth.dst = eth.src; "
+ "eth.src = %s; "
+ "arp.op = 2; /* ARP reply */ "
+@@ -6878,8 +7181,8 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ op->lsp_addrs[i].ipv4_addrs[j].addr_s);
+ ovn_lflow_add_with_hint(lflows, op->od,
+ S_SWITCH_IN_ARP_ND_RSP, 50,
+- ds_cstr(&match),
+- ds_cstr(&actions),
++ ds_cstr(match),
++ ds_cstr(actions),
+ &op->nbsp->header_);
+
+ /* Do not reply to an ARP request from the port that owns
+@@ -6894,10 +7197,10 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ * address is intended to detect situations where the
+ * network is not working as configured, so dropping the
+ * request would frustrate that intent.) */
+- ds_put_format(&match, " && inport == %s", op->json_key);
++ ds_put_format(match, " && inport == %s", op->json_key);
+ ovn_lflow_add_with_hint(lflows, op->od,
+ S_SWITCH_IN_ARP_ND_RSP, 100,
+- ds_cstr(&match), "next;",
++ ds_cstr(match), "next;",
+ &op->nbsp->header_);
+ }
+
+@@ -6905,15 +7208,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ * unicast IPv6 address and its all-nodes multicast address,
+ * but always respond with the unicast IPv6 address. */
+ for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
+- ds_clear(&match);
+- ds_put_format(&match,
++ ds_clear(match);
++ ds_put_format(match,
+ "nd_ns && ip6.dst == {%s, %s} && nd.target == %s",
+ op->lsp_addrs[i].ipv6_addrs[j].addr_s,
+ op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s,
+ op->lsp_addrs[i].ipv6_addrs[j].addr_s);
+
+- ds_clear(&actions);
+- ds_put_format(&actions,
++ ds_clear(actions);
++ ds_put_format(actions,
+ "%s { "
+ "eth.src = %s; "
+ "ip6.src = %s; "
+@@ -6930,93 +7233,99 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ op->lsp_addrs[i].ea_s);
+ ovn_lflow_add_with_hint(lflows, op->od,
+ S_SWITCH_IN_ARP_ND_RSP, 50,
+- ds_cstr(&match),
+- ds_cstr(&actions),
++ ds_cstr(match),
++ ds_cstr(actions),
+ &op->nbsp->header_);
+
+ /* Do not reply to a solicitation from the port that owns
+ * the address (otherwise DAD detection will fail). */
+- ds_put_format(&match, " && inport == %s", op->json_key);
++ ds_put_format(match, " && inport == %s", op->json_key);
+ ovn_lflow_add_with_hint(lflows, op->od,
+ S_SWITCH_IN_ARP_ND_RSP, 100,
+- ds_cstr(&match), "next;",
++ ds_cstr(match), "next;",
+ &op->nbsp->header_);
+ }
+ }
+ }
+ }
++}
+
+- /* Ingress table 13: ARP/ND responder, by default goto next.
+- * (priority 0)*/
+- HMAP_FOR_EACH (od, key_node, datapaths) {
+- if (!od->nbs) {
+- continue;
+- }
+-
++/* Ingress table 13: ARP/ND responder, by default goto next.
++ * (priority 0)*/
++static void
++build_lswitch_arp_nd_responder_default(struct ovn_datapath *od,
++ struct hmap *lflows)
++{
++ if (od->nbs) {
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
+ }
++}
+
+- /* Ingress table 13: ARP/ND responder for service monitor source ip.
+- * (priority 110)*/
+- struct ovn_northd_lb *lb;
+- HMAP_FOR_EACH (lb, hmap_node, lbs) {
+- for (size_t i = 0; i < lb->n_vips; i++) {
+- struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[i];
+- if (!lb_vip_nb->lb_health_check) {
++/* Ingress table 13: ARP/ND responder for service monitor source ip.
++ * (priority 110)*/
++static void
++build_lswitch_arp_nd_service_monitor(struct ovn_northd_lb *lb,
++ struct hmap *lflows,
++ struct ds *actions,
++ struct ds *match)
++{
++ for (size_t i = 0; i < lb->n_vips; i++) {
++ struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[i];
++ if (!lb_vip_nb->lb_health_check) {
++ continue;
++ }
++
++ for (size_t j = 0; j < lb_vip_nb->n_backends; j++) {
++ struct ovn_northd_lb_backend *backend_nb =
++ &lb_vip_nb->backends_nb[j];
++ if (!backend_nb->op || !backend_nb->svc_mon_src_ip) {
+ continue;
+ }
+
+- for (size_t j = 0; j < lb_vip_nb->n_backends; j++) {
+- struct ovn_northd_lb_backend *backend_nb =
+- &lb_vip_nb->backends_nb[j];
+- if (!backend_nb->op || !backend_nb->svc_mon_src_ip) {
+- continue;
+- }
+-
+- ds_clear(&match);
+- ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
+- backend_nb->svc_mon_src_ip);
+- ds_clear(&actions);
+- ds_put_format(&actions,
+- "eth.dst = eth.src; "
+- "eth.src = %s; "
+- "arp.op = 2; /* ARP reply */ "
+- "arp.tha = arp.sha; "
+- "arp.sha = %s; "
+- "arp.tpa = arp.spa; "
+- "arp.spa = %s; "
+- "outport = inport; "
+- "flags.loopback = 1; "
+- "output;",
+- svc_monitor_mac, svc_monitor_mac,
+- backend_nb->svc_mon_src_ip);
+- ovn_lflow_add_with_hint(lflows,
+- backend_nb->op->od,
+- S_SWITCH_IN_ARP_ND_RSP, 110,
+- ds_cstr(&match), ds_cstr(&actions),
+- &lb->nlb->header_);
+- }
++ ds_clear(match);
++ ds_put_format(match, "arp.tpa == %s && arp.op == 1",
++ backend_nb->svc_mon_src_ip);
++ ds_clear(actions);
++ ds_put_format(actions,
++ "eth.dst = eth.src; "
++ "eth.src = %s; "
++ "arp.op = 2; /* ARP reply */ "
++ "arp.tha = arp.sha; "
++ "arp.sha = %s; "
++ "arp.tpa = arp.spa; "
++ "arp.spa = %s; "
++ "outport = inport; "
++ "flags.loopback = 1; "
++ "output;",
++ svc_monitor_mac, svc_monitor_mac,
++ backend_nb->svc_mon_src_ip);
++ ovn_lflow_add_with_hint(lflows,
++ backend_nb->op->od,
++ S_SWITCH_IN_ARP_ND_RSP, 110,
++ ds_cstr(match), ds_cstr(actions),
++ &lb->nlb->header_);
+ }
+ }
++}
+
+
+- /* Logical switch ingress table 14 and 15: DHCP options and response
+- * priority 100 flows. */
+- HMAP_FOR_EACH (op, key_node, ports) {
+- if (!op->nbsp) {
+- continue;
+- }
+-
++/* Logical switch ingress table 14 and 15: DHCP options and response
++ * priority 100 flows. */
++static void
++build_lswitch_dhcp_options_and_response(struct ovn_port *op,
++ struct hmap *lflows)
++{
++ if (op->nbsp) {
+ if (!lsp_is_enabled(op->nbsp) || lsp_is_router(op->nbsp)) {
+ /* Don't add the DHCP flows if the port is not enabled or if the
+ * port is a router port. */
+- continue;
++ return;
+ }
+
+ if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) {
+ /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport.
+ */
+- continue;
++ return;
+ }
+
+ bool is_external = lsp_is_external(op->nbsp);
+@@ -7024,7 +7333,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ !op->nbsp->ha_chassis_group)) {
+ /* If it's an external port and there are no localnet ports
+ * and if it doesn't belong to an HA chassis group ignore it. */
+- continue;
++ return;
+ }
+
+ for (size_t i = 0; i < op->n_lsp_addrs; i++) {
+@@ -7047,14 +7356,35 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ }
+ }
+ }
++}
+
+- /* Logical switch ingress table 17 and 18: DNS lookup and response
+- * priority 100 flows.
+- */
+- HMAP_FOR_EACH (od, key_node, datapaths) {
+- if (!od->nbs || !ls_has_dns_records(od->nbs)) {
+- continue;
+- }
++/* Ingress table 14 and 15: DHCP options and response, by default goto
++ * next. (priority 0).
++ * Ingress table 16 and 17: DNS lookup and response, by default goto next.
++ * (priority 0).
++ * Ingress table 18 - External port handling, by default goto next.
++ * (priority 0). */
++static void
++build_lswitch_dhcp_and_dns_defaults(struct ovn_datapath *od,
++ struct hmap *lflows)
++{
++ if (od->nbs) {
++ ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;");
++ ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;");
++ ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 0, "1", "next;");
++ ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 0, "1", "next;");
++ ovn_lflow_add(lflows, od, S_SWITCH_IN_EXTERNAL_PORT, 0, "1", "next;");
++ }
++}
++
++/* Logical switch ingress table 17 and 18: DNS lookup and response
++* priority 100 flows.
++*/
++static void
++build_lswitch_dns_lookup_and_response(struct ovn_datapath *od,
++ struct hmap *lflows)
++{
++ if (od->nbs && ls_has_dns_records(od->nbs)) {
+
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 100,
+ "udp.dst == 53",
+@@ -7071,47 +7401,33 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100,
+ dns_match, dns_action);
+ }
++}
+
+- /* Ingress table 14 and 15: DHCP options and response, by default goto
+- * next. (priority 0).
+- * Ingress table 16 and 17: DNS lookup and response, by default goto next.
+- * (priority 0).
+- * Ingress table 18 - External port handling, by default goto next.
+- * (priority 0). */
+-
+- HMAP_FOR_EACH (od, key_node, datapaths) {
+- if (!od->nbs) {
+- continue;
+- }
+-
+- ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_SWITCH_IN_EXTERNAL_PORT, 0, "1", "next;");
+- }
+-
+- HMAP_FOR_EACH (op, key_node, ports) {
+- if (!op->nbsp || !lsp_is_external(op->nbsp)) {
+- continue;
+- }
++/* Table 18: External port. Drop ARP request for router ips from
++ * external ports on chassis not binding those ports.
++ * This makes the router pipeline to be run only on the chassis
++ * binding the external ports. */
++static void
++build_lswitch_external_port(struct ovn_port *op,
++ struct hmap *lflows)
++{
++ if (op->nbsp && lsp_is_external(op->nbsp)) {
+
+- /* Table 18: External port. Drop ARP request for router ips from
+- * external ports on chassis not binding those ports.
+- * This makes the router pipeline to be run only on the chassis
+- * binding the external ports. */
+ for (size_t i = 0; i < op->od->n_localnet_ports; i++) {
+ build_drop_arp_nd_flows_for_unbound_router_ports(
+ op, op->od->localnet_ports[i], lflows);
+ }
+ }
++}
+
+- /* Ingress table 19: Destination lookup, broadcast and multicast handling
+- * (priority 70 - 100). */
+- HMAP_FOR_EACH (od, key_node, datapaths) {
+- if (!od->nbs) {
+- continue;
+- }
++/* Ingress table 19: Destination lookup, broadcast and multicast handling
++ * (priority 70 - 100). */
++static void
++build_lswitch_destination_lookup_bmcast(struct ovn_datapath *od,
++ struct hmap *lflows,
++ struct ds *actions)
++{
++ if (od->nbs) {
+
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 110,
+ "eth.dst == $svc_monitor_mac",
+@@ -7120,22 +7436,22 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ struct mcast_switch_info *mcast_sw_info = &od->mcast_info.sw;
+
+ if (mcast_sw_info->enabled) {
+- ds_clear(&actions);
++ ds_clear(actions);
+ if (mcast_sw_info->flood_reports) {
+- ds_put_cstr(&actions,
++ ds_put_cstr(actions,
+ "clone { "
+ "outport = \""MC_MROUTER_STATIC"\"; "
+ "output; "
+ "};");
+ }
+- ds_put_cstr(&actions, "igmp;");
++ ds_put_cstr(actions, "igmp;");
+ /* Punt IGMP traffic to controller. */
+ ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 100,
+- "ip4 && ip.proto == 2", ds_cstr(&actions));
++ "ip4 && ip.proto == 2", ds_cstr(actions));
+
+ /* Punt MLD traffic to controller. */
+ ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 100,
+- "mldv1 || mldv2", ds_cstr(&actions));
++ "mldv1 || mldv2", ds_cstr(actions));
+
+ /* Flood all IP multicast traffic destined to 224.0.0.X to all
+ * ports - RFC 4541, section 2.1.2, item 2.
+@@ -7157,10 +7473,10 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ * handled by the L2 multicast flow.
+ */
+ if (!mcast_sw_info->flood_unregistered) {
+- ds_clear(&actions);
++ ds_clear(actions);
+
+ if (mcast_sw_info->flood_relay) {
+- ds_put_cstr(&actions,
++ ds_put_cstr(actions,
+ "clone { "
+ "outport = \""MC_MROUTER_FLOOD"\"; "
+ "output; "
+@@ -7168,7 +7484,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ }
+
+ if (mcast_sw_info->flood_static) {
+- ds_put_cstr(&actions, "outport =\""MC_STATIC"\"; output;");
++ ds_put_cstr(actions, "outport =\""MC_STATIC"\"; output;");
+ }
+
+ /* Explicitly drop the traffic if relay or static flooding
+@@ -7176,30 +7492,33 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ */
+ if (!mcast_sw_info->flood_relay &&
+ !mcast_sw_info->flood_static) {
+- ds_put_cstr(&actions, "drop;");
++ ds_put_cstr(actions, "drop;");
+ }
+
+ ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 80,
+ "ip4.mcast || ip6.mcast",
+- ds_cstr(&actions));
++ ds_cstr(actions));
+ }
+ }
+
+ ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 70, "eth.mcast",
+ "outport = \""MC_FLOOD"\"; output;");
+ }
++}
+
+- /* Ingress table 19: Add IP multicast flows learnt from IGMP/MLD
+- * (priority 90). */
+- struct ovn_igmp_group *igmp_group;
+
+- HMAP_FOR_EACH (igmp_group, hmap_node, igmp_groups) {
+- if (!igmp_group->datapath) {
+- continue;
+- }
++/* Ingress table 19: Add IP multicast flows learnt from IGMP/MLD
++ * (priority 90). */
++static void
++build_lswitch_ip_mcast_igmp_mld(struct ovn_igmp_group *igmp_group,
++ struct hmap *lflows,
++ struct ds *actions,
++ struct ds *match)
++{
++ if (igmp_group->datapath) {
+
+- ds_clear(&match);
+- ds_clear(&actions);
++ ds_clear(match);
++ ds_clear(actions);
+
+ struct mcast_switch_info *mcast_sw_info =
+ &igmp_group->datapath->mcast_info.sw;
+@@ -7211,57 +7530,62 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ ovs_be32 group_address =
+ in6_addr_get_mapped_ipv4(&igmp_group->address);
+ if (ip_is_local_multicast(group_address)) {
+- continue;
++ return;
+ }
+
+ if (mcast_sw_info->active_v4_flows >= mcast_sw_info->table_size) {
+- continue;
++ return;
+ }
+ mcast_sw_info->active_v4_flows++;
+- ds_put_format(&match, "eth.mcast && ip4 && ip4.dst == %s ",
++ ds_put_format(match, "eth.mcast && ip4 && ip4.dst == %s ",
+ igmp_group->mcgroup.name);
+ } else {
+ /* RFC 4291, section 2.7.1: Skip groups that correspond to all
+ * hosts.
+ */
+ if (ipv6_is_all_hosts(&igmp_group->address)) {
+- continue;
++ return;
+ }
+ if (mcast_sw_info->active_v6_flows >= mcast_sw_info->table_size) {
+- continue;
++ return;
+ }
+ mcast_sw_info->active_v6_flows++;
+- ds_put_format(&match, "eth.mcast && ip6 && ip6.dst == %s ",
++ ds_put_format(match, "eth.mcast && ip6 && ip6.dst == %s ",
+ igmp_group->mcgroup.name);
+ }
+
+ /* Also flood traffic to all multicast routers with relay enabled. */
+ if (mcast_sw_info->flood_relay) {
+- ds_put_cstr(&actions,
++ ds_put_cstr(actions,
+ "clone { "
+ "outport = \""MC_MROUTER_FLOOD "\"; "
+ "output; "
+ "};");
+ }
+ if (mcast_sw_info->flood_static) {
+- ds_put_cstr(&actions,
++ ds_put_cstr(actions,
+ "clone { "
+ "outport =\""MC_STATIC"\"; "
+ "output; "
+ "};");
+ }
+- ds_put_format(&actions, "outport = \"%s\"; output; ",
++ ds_put_format(actions, "outport = \"%s\"; output; ",
+ igmp_group->mcgroup.name);
+
+ ovn_lflow_add_unique(lflows, igmp_group->datapath, S_SWITCH_IN_L2_LKUP,
+- 90, ds_cstr(&match), ds_cstr(&actions));
++ 90, ds_cstr(match), ds_cstr(actions));
+ }
++}
+
+- /* Ingress table 19: Destination lookup, unicast handling (priority 50), */
+- HMAP_FOR_EACH (op, key_node, ports) {
+- if (!op->nbsp || lsp_is_external(op->nbsp)) {
+- continue;
+- }
++/* Ingress table 19: Destination lookup, unicast handling (priority 50), */
++static void
++build_lswitch_ip_unicast_lookup(struct ovn_port *op,
++ struct hmap *lflows,
++ struct hmap *mcgroups,
++ struct ds *actions,
++ struct ds *match)
++{
++ if (op->nbsp && (!lsp_is_external(op->nbsp))) {
+
+ /* For ports connected to logical routers add flows to bypass the
+ * broadcast flooding of ARP/ND requests in table 19. We direct the
+@@ -7279,15 +7603,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ struct eth_addr mac;
+ if (ovs_scan(op->nbsp->addresses[i],
+ ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
+- ds_clear(&match);
+- ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
++ ds_clear(match);
++ ds_put_format(match, "eth.dst == "ETH_ADDR_FMT,
+ ETH_ADDR_ARGS(mac));
+
+- ds_clear(&actions);
+- ds_put_format(&actions, "outport = %s; output;", op->json_key);
++ ds_clear(actions);
++ ds_put_format(actions, "outport = %s; output;", op->json_key);
+ ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_L2_LKUP,
+- 50, ds_cstr(&match),
+- ds_cstr(&actions),
++ 50, ds_cstr(match),
++ ds_cstr(actions),
+ &op->nbsp->header_);
+ } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
+ if (lsp_is_enabled(op->nbsp)) {
+@@ -7300,15 +7624,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
+ continue;
+ }
+- ds_clear(&match);
+- ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
++ ds_clear(match);
++ ds_put_format(match, "eth.dst == "ETH_ADDR_FMT,
+ ETH_ADDR_ARGS(mac));
+
+- ds_clear(&actions);
+- ds_put_format(&actions, "outport = %s; output;", op->json_key);
++ ds_clear(actions);
++ ds_put_format(actions, "outport = %s; output;", op->json_key);
+ ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_L2_LKUP,
+- 50, ds_cstr(&match),
+- ds_cstr(&actions),
++ 50, ds_cstr(match),
++ ds_cstr(actions),
+ &op->nbsp->header_);
+ } else if (!strcmp(op->nbsp->addresses[i], "router")) {
+ if (!op->peer || !op->peer->nbrp
+@@ -7316,8 +7640,8 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
+ continue;
+ }
+- ds_clear(&match);
+- ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
++ ds_clear(match);
++ ds_put_format(match, "eth.dst == "ETH_ADDR_FMT,
+ ETH_ADDR_ARGS(mac));
+ if (op->peer->od->l3dgw_port
+ && op->peer->od->l3redirect_port
+@@ -7343,16 +7667,16 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ }
+
+ if (add_chassis_resident_check) {
+- ds_put_format(&match, " && is_chassis_resident(%s)",
++ ds_put_format(match, " && is_chassis_resident(%s)",
+ op->peer->od->l3redirect_port->json_key);
+ }
+ }
+
+- ds_clear(&actions);
+- ds_put_format(&actions, "outport = %s; output;", op->json_key);
++ ds_clear(actions);
++ ds_put_format(actions, "outport = %s; output;", op->json_key);
+ ovn_lflow_add_with_hint(lflows, op->od,
+ S_SWITCH_IN_L2_LKUP, 50,
+- ds_cstr(&match), ds_cstr(&actions),
++ ds_cstr(match), ds_cstr(actions),
+ &op->nbsp->header_);
+
+ /* Add ethernet addresses specified in NAT rules on
+@@ -7366,19 +7690,19 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ && nat->logical_port && nat->external_mac
+ && eth_addr_from_string(nat->external_mac, &mac)) {
+
+- ds_clear(&match);
+- ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT
++ ds_clear(match);
++ ds_put_format(match, "eth.dst == "ETH_ADDR_FMT
+ " && is_chassis_resident(\"%s\")",
+ ETH_ADDR_ARGS(mac),
+ nat->logical_port);
+
+- ds_clear(&actions);
+- ds_put_format(&actions, "outport = %s; output;",
++ ds_clear(actions);
++ ds_put_format(actions, "outport = %s; output;",
+ op->json_key);
+ ovn_lflow_add_with_hint(lflows, op->od,
+ S_SWITCH_IN_L2_LKUP, 50,
+- ds_cstr(&match),
+- ds_cstr(&actions),
++ ds_cstr(match),
++ ds_cstr(actions),
+ &op->nbsp->header_);
+ }
+ }
+@@ -7392,71 +7716,202 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ }
+ }
+ }
++}
+
+- /* Ingress table 19: Destination lookup for unknown MACs (priority 0). */
+- HMAP_FOR_EACH (od, key_node, datapaths) {
+- if (!od->nbs) {
++struct bfd_entry {
++ struct hmap_node hmap_node;
++
++ const struct sbrec_bfd *sb_bt;
++
++ bool ref;
++};
++
++static struct bfd_entry *
++bfd_port_lookup(struct hmap *bfd_map, const char *logical_port,
++ const char *dst_ip)
++{
++ struct bfd_entry *bfd_e;
++ uint32_t hash;
++
++ hash = hash_string(dst_ip, 0);
++ hash = hash_string(logical_port, hash);
++ HMAP_FOR_EACH_WITH_HASH (bfd_e, hmap_node, hash, bfd_map) {
++ if (!strcmp(bfd_e->sb_bt->logical_port, logical_port) &&
++ !strcmp(bfd_e->sb_bt->dst_ip, dst_ip)) {
++ return bfd_e;
++ }
++ }
++ return NULL;
++}
++
++static void
++bfd_cleanup_connections(struct northd_context *ctx, struct hmap *bfd_map)
++{
++ const struct nbrec_bfd *nb_bt;
++ struct bfd_entry *bfd_e;
++
++ NBREC_BFD_FOR_EACH (nb_bt, ctx->ovnnb_idl) {
++ bfd_e = bfd_port_lookup(bfd_map, nb_bt->logical_port, nb_bt->dst_ip);
++ if (!bfd_e) {
+ continue;
+ }
+
+- if (od->has_unknown) {
+- ovn_lflow_add_unique(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
+- "outport = \""MC_UNKNOWN"\"; output;");
++ if (!bfd_e->ref && strcmp(nb_bt->status, "admin_down")) {
++ /* no user for this bfd connection */
++ nbrec_bfd_set_status(nb_bt, "admin_down");
+ }
+ }
+
+- build_lswitch_output_port_sec(ports, datapaths, lflows);
+-
+- ds_destroy(&match);
+- ds_destroy(&actions);
++ HMAP_FOR_EACH_POP (bfd_e, hmap_node, bfd_map) {
++ free(bfd_e);
++ }
+ }
+
+-/* Build pre-ACL and ACL tables for both ingress and egress.
+- * Ingress tables 3 through 10. Egress tables 0 through 7. */
++#define BFD_DEF_MINTX 1000 /* 1s */
++#define BFD_DEF_MINRX 1000 /* 1s */
++#define BFD_DEF_DETECT_MULT 5
++
+ static void
+-build_lswitch_lflows_pre_acl_and_acl(struct ovn_datapath *od,
+- struct hmap *port_groups,
+- struct hmap *lflows,
+- struct shash *meter_groups,
+- struct hmap *lbs)
++build_bfd_update_sb_conf(const struct nbrec_bfd *nb_bt,
++ const struct sbrec_bfd *sb_bt)
+ {
+- if (od->nbs) {
+- build_pre_acls(od, lflows);
+- build_pre_lb(od, lflows, meter_groups, lbs);
+- build_pre_stateful(od, lflows);
+- build_acl_hints(od, lflows);
+- build_acls(od, lflows, port_groups, meter_groups);
+- build_qos(od, lflows);
+- build_lb(od, lflows);
+- build_stateful(od, lflows, lbs);
+- build_lb_hairpin(od, lflows);
++ if (strcmp(nb_bt->dst_ip, sb_bt->dst_ip)) {
++ sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip);
++ }
++
++ if (strcmp(nb_bt->logical_port, sb_bt->logical_port)) {
++ sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port);
++ }
++
++ if (strcmp(nb_bt->status, sb_bt->status)) {
++ sbrec_bfd_set_status(sb_bt, nb_bt->status);
++ }
++
++ int detect_mult = nb_bt->n_detect_mult ? nb_bt->detect_mult[0]
++ : BFD_DEF_DETECT_MULT;
++ if (detect_mult != sb_bt->detect_mult) {
++ sbrec_bfd_set_detect_mult(sb_bt, detect_mult);
++ }
++
++ int min_tx = nb_bt->n_min_tx ? nb_bt->min_tx[0] : BFD_DEF_MINTX;
++ if (min_tx != sb_bt->min_tx) {
++ sbrec_bfd_set_min_tx(sb_bt, min_tx);
++ }
++
++ int min_rx = nb_bt->n_min_rx ? nb_bt->min_rx[0] : BFD_DEF_MINRX;
++ if (min_rx != sb_bt->min_rx) {
++ sbrec_bfd_set_min_rx(sb_bt, min_rx);
+ }
+ }
+
+-/* Logical switch ingress table 0: Admission control framework (priority
+- * 100). */
++/* RFC 5881 section 4
++ * The source port MUST be in the range 49152 through 65535.
++ * The same UDP source port number MUST be used for all BFD
++ * Control packets associated with a particular session.
++ * The source port number SHOULD be unique among all BFD
++ * sessions on the system
++ */
++#define BFD_UDP_SRC_PORT_START 49152
++#define BFD_UDP_SRC_PORT_LEN (65535 - BFD_UDP_SRC_PORT_START)
++
++static int bfd_get_unused_port(unsigned long *bfd_src_ports)
++{
++ int port;
++
++ port = bitmap_scan(bfd_src_ports, 0, 0, BFD_UDP_SRC_PORT_LEN);
++ if (port == BFD_UDP_SRC_PORT_LEN) {
++ return -ENOSPC;
++ }
++ bitmap_set1(bfd_src_ports, port);
++
++ return port + BFD_UDP_SRC_PORT_START;
++}
++
+ static void
+-build_lswitch_lflows_admission_control(struct ovn_datapath *od,
+- struct hmap *lflows)
++build_bfd_table(struct northd_context *ctx, struct hmap *bfd_connections,
++ struct hmap *ports)
+ {
+- if (od->nbs) {
+- /* Logical VLANs not supported. */
+- if (!is_vlan_transparent(od)) {
+- /* Block logical VLANs. */
+- ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100,
+- "vlan.present", "drop;");
++ struct hmap sb_only = HMAP_INITIALIZER(&sb_only);
++ const struct sbrec_bfd *sb_bt;
++ unsigned long *bfd_src_ports;
++ struct bfd_entry *bfd_e;
++ uint32_t hash;
++
++ bfd_src_ports = bitmap_allocate(BFD_UDP_SRC_PORT_LEN);
++
++ SBREC_BFD_FOR_EACH (sb_bt, ctx->ovnsb_idl) {
++ bfd_e = xmalloc(sizeof *bfd_e);
++ bfd_e->sb_bt = sb_bt;
++ hash = hash_string(sb_bt->dst_ip, 0);
++ hash = hash_string(sb_bt->logical_port, hash);
++ hmap_insert(&sb_only, &bfd_e->hmap_node, hash);
++ bitmap_set1(bfd_src_ports, sb_bt->src_port - BFD_UDP_SRC_PORT_START);
++ }
++
++ const struct nbrec_bfd *nb_bt;
++ NBREC_BFD_FOR_EACH (nb_bt, ctx->ovnnb_idl) {
++ if (!nb_bt->status) {
++ /* default state is admin_down */
++ nbrec_bfd_set_status(nb_bt, "admin_down");
+ }
+
+- /* Broadcast/multicast source address is invalid. */
+- ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
+- "drop;");
++ bfd_e = bfd_port_lookup(&sb_only, nb_bt->logical_port, nb_bt->dst_ip);
++ if (!bfd_e) {
++ int udp_src = bfd_get_unused_port(bfd_src_ports);
++ if (udp_src < 0) {
++ continue;
++ }
+
+- /* Port security flows have priority 50
+- * (see build_lswitch_input_port_sec()) and will continue
+- * to the next table if packet source is acceptable. */
++ sb_bt = sbrec_bfd_insert(ctx->ovnsb_txn);
++ sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port);
++ sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip);
++ sbrec_bfd_set_disc(sb_bt, 1 + random_uint32());
++ sbrec_bfd_set_src_port(sb_bt, udp_src);
++ sbrec_bfd_set_status(sb_bt, nb_bt->status);
++
++ int min_tx = nb_bt->n_min_tx ? nb_bt->min_tx[0] : BFD_DEF_MINTX;
++ sbrec_bfd_set_min_tx(sb_bt, min_tx);
++ int min_rx = nb_bt->n_min_rx ? nb_bt->min_rx[0] : BFD_DEF_MINRX;
++ sbrec_bfd_set_min_rx(sb_bt, min_rx);
++ int d_mult = nb_bt->n_detect_mult ? nb_bt->detect_mult[0]
++ : BFD_DEF_DETECT_MULT;
++ sbrec_bfd_set_detect_mult(sb_bt, d_mult);
++ } else if (strcmp(bfd_e->sb_bt->status, nb_bt->status)) {
++ if (!strcmp(nb_bt->status, "admin_down") ||
++ !strcmp(bfd_e->sb_bt->status, "admin_down")) {
++ sbrec_bfd_set_status(bfd_e->sb_bt, nb_bt->status);
++ } else {
++ nbrec_bfd_set_status(nb_bt, bfd_e->sb_bt->status);
++ }
++ }
++ if (bfd_e) {
++ build_bfd_update_sb_conf(nb_bt, bfd_e->sb_bt);
++
++ hmap_remove(&sb_only, &bfd_e->hmap_node);
++ bfd_e->ref = false;
++ hash = hash_string(bfd_e->sb_bt->dst_ip, 0);
++ hash = hash_string(bfd_e->sb_bt->logical_port, hash);
++ hmap_insert(bfd_connections, &bfd_e->hmap_node, hash);
++ }
++
++ struct ovn_port *op = ovn_port_find(ports, nb_bt->logical_port);
++ if (op) {
++ op->has_bfd = true;
++ }
+ }
+-}
+
++ HMAP_FOR_EACH_POP (bfd_e, hmap_node, &sb_only) {
++ struct ovn_port *op = ovn_port_find(ports, bfd_e->sb_bt->logical_port);
++ if (op) {
++ op->has_bfd = false;
++ }
++ sbrec_bfd_delete(bfd_e->sb_bt);
++ free(bfd_e);
++ }
++ hmap_destroy(&sb_only);
++
++ bitmap_free(bfd_src_ports);
++}
+
+ /* Returns a string of the IP address of the router port 'op' that
+ * overlaps with 'ip_s". If one is not found, returns NULL.
+@@ -7549,33 +8004,39 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
+ struct ds actions = DS_EMPTY_INITIALIZER;
+
+ if (!strcmp(rule->action, "reroute")) {
++ ovs_assert(rule->n_nexthops <= 1);
++
++ char *nexthop =
++ (rule->n_nexthops == 1 ? rule->nexthops[0] : rule->nexthop);
+ struct ovn_port *out_port = get_outport_for_routing_policy_nexthop(
+- od, ports, rule->priority, rule->nexthop);
++ od, ports, rule->priority, nexthop);
+ if (!out_port) {
+ return;
+ }
+
+- const char *lrp_addr_s = find_lrp_member_ip(out_port, rule->nexthop);
++ const char *lrp_addr_s = find_lrp_member_ip(out_port, nexthop);
+ if (!lrp_addr_s) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "lrp_addr not found for routing policy "
+ " priority %"PRId64" nexthop %s",
+- rule->priority, rule->nexthop);
++ rule->priority, nexthop);
+ return;
+ }
+ uint32_t pkt_mark = ovn_smap_get_uint(&rule->options, "pkt_mark", 0);
+ if (pkt_mark) {
+ ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark);
+ }
+- bool is_ipv4 = strchr(rule->nexthop, '.') ? true : false;
++
++ bool is_ipv4 = strchr(nexthop, '.') ? true : false;
+ ds_put_format(&actions, "%s = %s; "
+ "%s = %s; "
+ "eth.src = %s; "
+ "outport = %s; "
+ "flags.loopback = 1; "
++ REG_ECMP_GROUP_ID" = 0; "
+ "next;",
+ is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6,
+- rule->nexthop,
++ nexthop,
+ is_ipv4 ? REG_SRC_IPV4 : REG_SRC_IPV6,
+ lrp_addr_s,
+ out_port->lrp_networks.ea_s,
+@@ -7588,7 +8049,7 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
+ if (pkt_mark) {
+ ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark);
+ }
+- ds_put_cstr(&actions, "next;");
++ ds_put_cstr(&actions, REG_ECMP_GROUP_ID" = 0; next;");
+ }
+ ds_put_format(&match, "%s", rule->match);
+
+@@ -7598,15 +8059,116 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
+ ds_destroy(&actions);
+ }
+
+-struct parsed_route {
+- struct ovs_list list_node;
+- struct in6_addr prefix;
+- unsigned int plen;
+- bool is_src_route;
+- uint32_t hash;
+- const struct nbrec_logical_router_static_route *route;
+- bool ecmp_symmetric_reply;
+-};
++static void
++build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
++ struct hmap *ports,
++ const struct nbrec_logical_router_policy *rule,
++ uint16_t ecmp_group_id)
++{
++ ovs_assert(rule->n_nexthops > 1);
++
++ bool nexthops_is_ipv4 = true;
++
++ /* Check that all the nexthops belong to the same addr family before
++ * adding logical flows. */
++ for (uint16_t i = 0; i < rule->n_nexthops; i++) {
++ bool is_ipv4 = strchr(rule->nexthops[i], '.') ? true : false;
++
++ if (i == 0) {
++ nexthops_is_ipv4 = is_ipv4;
++ }
++
++ if (is_ipv4 != nexthops_is_ipv4) {
++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
++ VLOG_WARN_RL(&rl, "nexthop [%s] of the router policy with "
++ "the match [%s] do not belong to the same address "
++ "family as other next hops",
++ rule->nexthops[i], rule->match);
++ return;
++ }
++ }
++
++ struct ds match = DS_EMPTY_INITIALIZER;
++ struct ds actions = DS_EMPTY_INITIALIZER;
++
++ for (size_t i = 0; i < rule->n_nexthops; i++) {
++ struct ovn_port *out_port = get_outport_for_routing_policy_nexthop(
++ od, ports, rule->priority, rule->nexthops[i]);
++ if (!out_port) {
++ goto cleanup;
++ }
++
++ const char *lrp_addr_s =
++ find_lrp_member_ip(out_port, rule->nexthops[i]);
++ if (!lrp_addr_s) {
++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
++ VLOG_WARN_RL(&rl, "lrp_addr not found for routing policy "
++ " priority %"PRId64" nexthop %s",
++ rule->priority, rule->nexthops[i]);
++ goto cleanup;
++ }
++
++ ds_clear(&actions);
++ uint32_t pkt_mark = ovn_smap_get_uint(&rule->options, "pkt_mark", 0);
++ if (pkt_mark) {
++ ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark);
++ }
++
++ bool is_ipv4 = strchr(rule->nexthops[i], '.') ? true : false;
++
++ ds_put_format(&actions, "%s = %s; "
++ "%s = %s; "
++ "eth.src = %s; "
++ "outport = %s; "
++ "flags.loopback = 1; "
++ "next;",
++ is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6,
++ rule->nexthops[i],
++ is_ipv4 ? REG_SRC_IPV4 : REG_SRC_IPV6,
++ lrp_addr_s,
++ out_port->lrp_networks.ea_s,
++ out_port->json_key);
++
++ ds_clear(&match);
++ ds_put_format(&match, REG_ECMP_GROUP_ID" == %"PRIu16" && "
++ REG_ECMP_MEMBER_ID" == %"PRIuSIZE,
++ ecmp_group_id, i + 1);
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY_ECMP,
++ 100, ds_cstr(&match),
++ ds_cstr(&actions), &rule->header_);
++ }
++
++ ds_clear(&actions);
++ ds_put_format(&actions, "%s = %"PRIu16
++ "; %s = select(", REG_ECMP_GROUP_ID, ecmp_group_id,
++ REG_ECMP_MEMBER_ID);
++
++ for (size_t i = 0; i < rule->n_nexthops; i++) {
++ if (i > 0) {
++ ds_put_cstr(&actions, ", ");
++ }
++
++ ds_put_format(&actions, "%"PRIuSIZE, i + 1);
++ }
++ ds_put_cstr(&actions, ");");
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY,
++ rule->priority, rule->match,
++ ds_cstr(&actions), &rule->header_);
++
++cleanup:
++ ds_destroy(&match);
++ ds_destroy(&actions);
++}
++
++struct parsed_route {
++ struct ovs_list list_node;
++ struct in6_addr prefix;
++ unsigned int plen;
++ bool is_src_route;
++ uint32_t hash;
++ const struct nbrec_logical_router_static_route *route;
++ bool ecmp_symmetric_reply;
++};
+
+ static uint32_t
+ route_hash(struct parsed_route *route)
+@@ -7619,7 +8181,8 @@ route_hash(struct parsed_route *route)
+ * Otherwise return NULL. */
+ static struct parsed_route *
+ parsed_routes_add(struct ovs_list *routes,
+- const struct nbrec_logical_router_static_route *route)
++ const struct nbrec_logical_router_static_route *route,
++ struct hmap *bfd_connections)
+ {
+ /* Verify that the next hop is an IP address with an all-ones mask. */
+ struct in6_addr nexthop;
+@@ -7660,6 +8223,25 @@ parsed_routes_add(struct ovs_list *routes,
+ return NULL;
+ }
+
++ const struct nbrec_bfd *nb_bt = route->bfd;
++ if (nb_bt && !strcmp(nb_bt->dst_ip, route->nexthop)) {
++ struct bfd_entry *bfd_e;
++
++ bfd_e = bfd_port_lookup(bfd_connections, nb_bt->logical_port,
++ nb_bt->dst_ip);
++ if (bfd_e) {
++ bfd_e->ref = true;
++ }
++
++ if (!strcmp(nb_bt->status, "admin_down")) {
++ nbrec_bfd_set_status(nb_bt, "down");
++ }
++
++ if (!strcmp(nb_bt->status, "down")) {
++ return NULL;
++ }
++ }
++
+ struct parsed_route *pr = xzalloc(sizeof *pr);
+ pr->prefix = prefix;
+ pr->plen = plen;
+@@ -8102,16 +8684,15 @@ add_route(struct hmap *lflows, const struct ovn_port *op,
+ build_route_match(op_inport, network_s, plen, is_src_route, is_ipv4,
+ &match, &priority);
+
+- struct ds actions = DS_EMPTY_INITIALIZER;
+- ds_put_format(&actions, "ip.ttl--; "REG_ECMP_GROUP_ID" = 0; %s = ",
++ struct ds common_actions = DS_EMPTY_INITIALIZER;
++ ds_put_format(&common_actions, REG_ECMP_GROUP_ID" = 0; %s = ",
+ is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6);
+-
+ if (gateway) {
+- ds_put_cstr(&actions, gateway);
++ ds_put_cstr(&common_actions, gateway);
+ } else {
+- ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6");
++ ds_put_format(&common_actions, "ip%s.dst", is_ipv4 ? "4" : "6");
+ }
+- ds_put_format(&actions, "; "
++ ds_put_format(&common_actions, "; "
+ "%s = %s; "
+ "eth.src = %s; "
+ "outport = %s; "
+@@ -8121,11 +8702,20 @@ add_route(struct hmap *lflows, const struct ovn_port *op,
+ lrp_addr_s,
+ op->lrp_networks.ea_s,
+ op->json_key);
++ struct ds actions = DS_EMPTY_INITIALIZER;
++ ds_put_format(&actions, "ip.ttl--; %s", ds_cstr(&common_actions));
+
+ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_ROUTING, priority,
+ ds_cstr(&match), ds_cstr(&actions),
+ stage_hint);
++ if (op->has_bfd) {
++ ds_put_format(&match, " && udp.dst == 3784");
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_ROUTING,
++ priority + 1, ds_cstr(&match),
++ ds_cstr(&common_actions), stage_hint);
++ }
+ ds_destroy(&match);
++ ds_destroy(&common_actions);
+ ds_destroy(&actions);
+ }
+
+@@ -8203,15 +8793,10 @@ get_force_snat_ip(struct ovn_datapath *od, const char *key_type,
+ return false;
+ }
+
+- if (!extract_ip_addresses(addresses, laddrs) ||
+- laddrs->n_ipv4_addrs > 1 ||
+- laddrs->n_ipv6_addrs > 1 ||
+- (laddrs->n_ipv4_addrs && laddrs->ipv4_addrs[0].plen != 32) ||
+- (laddrs->n_ipv6_addrs && laddrs->ipv6_addrs[0].plen != 128)) {
++ if (!extract_ip_address(addresses, laddrs)) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"",
+ addresses, UUID_ARGS(&od->key));
+- destroy_lport_addresses(laddrs);
+ return false;
+ }
+
+@@ -8221,7 +8806,7 @@ get_force_snat_ip(struct ovn_datapath *od, const char *key_type,
+ static void
+ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
+ struct ds *match, struct ds *actions, int priority,
+- bool lb_force_snat_ip, struct ovn_lb_vip *lb_vip,
++ bool force_snat_for_lb, struct ovn_lb_vip *lb_vip,
+ const char *proto, struct nbrec_load_balancer *lb,
+ struct shash *meter_groups, struct sset *nat_entries)
+ {
+@@ -8230,7 +8815,7 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
+
+ /* A match and actions for new connections. */
+ char *new_match = xasprintf("ct.new && %s", ds_cstr(match));
+- if (lb_force_snat_ip) {
++ if (force_snat_for_lb) {
+ char *new_actions = xasprintf("flags.force_snat_for_lb = 1; %s",
+ ds_cstr(actions));
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, priority,
+@@ -8243,7 +8828,7 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
+
+ /* A match and actions for established connections. */
+ char *est_match = xasprintf("ct.est && %s", ds_cstr(match));
+- if (lb_force_snat_ip) {
++ if (force_snat_for_lb) {
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, priority,
+ est_match,
+ "flags.force_snat_for_lb = 1; ct_dnat;",
+@@ -8320,7 +8905,7 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
+ ds_put_format(&undnat_match, ") && outport == %s && "
+ "is_chassis_resident(%s)", od->l3dgw_port->json_key,
+ od->l3redirect_port->json_key);
+- if (lb_force_snat_ip) {
++ if (force_snat_for_lb) {
+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 120,
+ ds_cstr(&undnat_match),
+ "flags.force_snat_for_lb = 1; ct_dnat;",
+@@ -8788,2375 +9373,2531 @@ build_lrouter_force_snat_flows(struct hmap *lflows, struct ovn_datapath *od,
+ }
+
+ static void
+-build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
+- struct hmap *lflows, struct shash *meter_groups,
+- struct hmap *lbs)
++build_lrouter_force_snat_flows_op(struct ovn_port *op,
++ struct hmap *lflows,
++ struct ds *match, struct ds *actions)
+ {
+- /* This flow table structure is documented in ovn-northd(8), so please
+- * update ovn-northd.8.xml if you change anything. */
++ if (!op->nbrp || !op->peer || !op->od->lb_force_snat_router_ip) {
++ return;
++ }
+
+- struct ds match = DS_EMPTY_INITIALIZER;
+- struct ds actions = DS_EMPTY_INITIALIZER;
++ if (op->lrp_networks.n_ipv4_addrs) {
++ ds_clear(match);
++ ds_clear(actions);
+
+- struct ovn_datapath *od;
+- struct ovn_port *op;
++ ds_put_format(match, "inport == %s && ip4.dst == %s",
++ op->json_key, op->lrp_networks.ipv4_addrs[0].addr_s);
++ ovn_lflow_add(lflows, op->od, S_ROUTER_IN_UNSNAT, 110,
++ ds_cstr(match), "ct_snat;");
+
+- HMAP_FOR_EACH (od, key_node, datapaths) {
+- if (!od->nbr) {
+- continue;
+- }
++ ds_clear(match);
+
+- /* Priority-90-92 flows handle ARP requests and ND packets. Most are
+- * per logical port but DNAT addresses can be handled per datapath
+- * for non gateway router ports.
+- *
+- * Priority 91 and 92 flows are added for each gateway router
+- * port to handle the special cases. In case we get the packet
+- * on a regular port, just reply with the port's ETH address.
+- */
+- for (int i = 0; i < od->nbr->n_nat; i++) {
+- struct ovn_nat *nat_entry = &od->nat_entries[i];
++ /* Higher priority rules to force SNAT with the router port ip.
++ * This only takes effect when the packet has already been
++ * load balanced once. */
++ ds_put_format(match, "flags.force_snat_for_lb == 1 && ip4 && "
++ "outport == %s", op->json_key);
++ ds_put_format(actions, "ct_snat(%s);",
++ op->lrp_networks.ipv4_addrs[0].addr_s);
++ ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_SNAT, 110,
++ ds_cstr(match), ds_cstr(actions));
++ if (op->lrp_networks.n_ipv4_addrs > 2) {
++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
++ VLOG_WARN_RL(&rl, "Logical router port %s is configured with "
++ "multiple IPv4 addresses. Only the first "
++ "IP [%s] is considered as SNAT for load "
++ "balancer", op->json_key,
++ op->lrp_networks.ipv4_addrs[0].addr_s);
++ }
++ }
++
++ /* op->lrp_networks.ipv6_addrs will always have LLA and that will be
++ * last in the list. So add the flows only if n_ipv6_addrs > 1. */
++ if (op->lrp_networks.n_ipv6_addrs > 1) {
++ ds_clear(match);
++ ds_clear(actions);
+
+- /* Skip entries we failed to parse. */
+- if (!nat_entry_is_valid(nat_entry)) {
+- continue;
+- }
++ ds_put_format(match, "inport == %s && ip6.dst == %s",
++ op->json_key, op->lrp_networks.ipv6_addrs[0].addr_s);
++ ovn_lflow_add(lflows, op->od, S_ROUTER_IN_UNSNAT, 110,
++ ds_cstr(match), "ct_snat;");
+
+- /* Skip SNAT entries for now, we handle unique SNAT IPs separately
+- * below.
+- */
+- if (!strcmp(nat_entry->nb->type, "snat")) {
+- continue;
+- }
+- build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
++ ds_clear(match);
++
++ /* Higher priority rules to force SNAT with the router port ip.
++ * This only takes effect when the packet has already been
++ * load balanced once. */
++ ds_put_format(match, "flags.force_snat_for_lb == 1 && ip6 && "
++ "outport == %s", op->json_key);
++ ds_put_format(actions, "ct_snat(%s);",
++ op->lrp_networks.ipv6_addrs[0].addr_s);
++ ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_SNAT, 110,
++ ds_cstr(match), ds_cstr(actions));
++ if (op->lrp_networks.n_ipv6_addrs > 2) {
++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
++ VLOG_WARN_RL(&rl, "Logical router port %s is configured with "
++ "multiple IPv6 addresses. Only the first "
++ "IP [%s] is considered as SNAT for load "
++ "balancer", op->json_key,
++ op->lrp_networks.ipv6_addrs[0].addr_s);
+ }
++ }
++}
+
+- /* Now handle SNAT entries too, one per unique SNAT IP. */
+- struct shash_node *snat_snode;
+- SHASH_FOR_EACH (snat_snode, &od->snat_ips) {
+- struct ovn_snat_ip *snat_ip = snat_snode->data;
++static void
++build_lrouter_bfd_flows(struct hmap *lflows, struct ovn_port *op)
++{
++ if (!op->has_bfd) {
++ return;
++ }
+
+- if (ovs_list_is_empty(&snat_ip->snat_entries)) {
+- continue;
+- }
++ struct ds ip_list = DS_EMPTY_INITIALIZER;
++ struct ds match = DS_EMPTY_INITIALIZER;
+
+- struct ovn_nat *nat_entry =
+- CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
+- struct ovn_nat, ext_addr_list_node);
+- build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
+- }
++ if (op->lrp_networks.n_ipv4_addrs) {
++ op_put_v4_networks(&ip_list, op, false);
++ ds_put_format(&match, "ip4.src == %s && udp.dst == 3784",
++ ds_cstr(&ip_list));
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 110,
++ ds_cstr(&match), "next; ",
++ &op->nbrp->header_);
++ ds_clear(&match);
++ ds_put_format(&match, "ip4.dst == %s && udp.dst == 3784",
++ ds_cstr(&ip_list));
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 110,
++ ds_cstr(&match), "handle_bfd_msg(); ",
++ &op->nbrp->header_);
+ }
++ if (op->lrp_networks.n_ipv6_addrs) {
++ ds_clear(&ip_list);
++ ds_clear(&match);
+
+- /* Logical router ingress table 3: IP Input for IPv4. */
+- HMAP_FOR_EACH (op, key_node, ports) {
+- if (!op->nbrp) {
+- continue;
++ op_put_v6_networks(&ip_list, op);
++ ds_put_format(&match, "ip6.src == %s && udp.dst == 3784",
++ ds_cstr(&ip_list));
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 110,
++ ds_cstr(&match), "next; ",
++ &op->nbrp->header_);
++ ds_clear(&match);
++ ds_put_format(&match, "ip6.dst == %s && udp.dst == 3784",
++ ds_cstr(&ip_list));
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 110,
++ ds_cstr(&match), "handle_bfd_msg(); ",
++ &op->nbrp->header_);
++ }
++
++ ds_destroy(&ip_list);
++ ds_destroy(&match);
++}
++
++/* Logical router ingress Table 0: L2 Admission Control
++ * Generic admission control flows (without inport check).
++ */
++static void
++build_adm_ctrl_flows_for_lrouter(
++ struct ovn_datapath *od, struct hmap *lflows)
++{
++ if (od->nbr) {
++ /* Logical VLANs not supported.
++ * Broadcast/multicast source address is invalid. */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
++ "vlan.present || eth.src[40]", "drop;");
++ }
++}
++
++/* Logical router ingress Table 0: L2 Admission Control
++ * This table drops packets that the router shouldn’t see at all based
++ * on their Ethernet headers.
++ */
++static void
++build_adm_ctrl_flows_for_lrouter_port(
++ struct ovn_port *op, struct hmap *lflows,
++ struct ds *match, struct ds *actions)
++{
++ if (op->nbrp) {
++ if (!lrport_is_enabled(op->nbrp)) {
++ /* Drop packets from disabled logical ports (since logical flow
++ * tables are default-drop). */
++ return;
+ }
+
+ if (op->derived) {
+- /* No ingress packets are accepted on a chassisredirect
+- * port, so no need to program flows for that port. */
+- continue;
++ /* No ingress packets should be received on a chassisredirect
++ * port. */
++ return;
+ }
+
+- if (op->lrp_networks.n_ipv4_addrs) {
+- /* L3 admission control: drop packets that originate from an
+- * IPv4 address owned by the router or a broadcast address
+- * known to the router (priority 100). */
+- ds_clear(&match);
+- ds_put_cstr(&match, "ip4.src == ");
+- op_put_v4_networks(&match, op, true);
+- ds_put_cstr(&match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
+- ds_cstr(&match), "drop;",
+- &op->nbrp->header_);
++ /* Store the ethernet address of the port receiving the packet.
++ * This will save us from having to match on inport further down in
++ * the pipeline.
++ */
++ ds_clear(actions);
++ ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
++ op->lrp_networks.ea_s);
+
+- /* ICMP echo reply. These flows reply to ICMP echo requests
+- * received for the router's IP address. Since packets only
+- * get here as part of the logical router datapath, the inport
+- * (i.e. the incoming locally attached net) does not matter.
+- * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
+- ds_clear(&match);
+- ds_put_cstr(&match, "ip4.dst == ");
+- op_put_v4_networks(&match, op, false);
+- ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
++ ds_clear(match);
++ ds_put_format(match, "eth.mcast && inport == %s", op->json_key);
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
++ ds_cstr(match), ds_cstr(actions),
++ &op->nbrp->header_);
+
+- const char * icmp_actions = "ip4.dst <-> ip4.src; "
+- "ip.ttl = 255; "
+- "icmp4.type = 0; "
+- "flags.loopback = 1; "
+- "next; ";
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
+- ds_cstr(&match), icmp_actions,
+- &op->nbrp->header_);
++ ds_clear(match);
++ ds_put_format(match, "eth.dst == %s && inport == %s",
++ op->lrp_networks.ea_s, op->json_key);
++ if (op->od->l3dgw_port && op == op->od->l3dgw_port
++ && op->od->l3redirect_port) {
++ /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s
++ * should only be received on the gateway chassis. */
++ ds_put_format(match, " && is_chassis_resident(%s)",
++ op->od->l3redirect_port->json_key);
+ }
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
++ ds_cstr(match), ds_cstr(actions),
++ &op->nbrp->header_);
++ }
++}
+
+- /* ICMP time exceeded */
+- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+- ds_clear(&match);
+- ds_clear(&actions);
+
+- ds_put_format(&match,
+- "inport == %s && ip4 && "
+- "ip.ttl == {0, 1} && !ip.later_frag", op->json_key);
+- ds_put_format(&actions,
+- "icmp4 {"
+- "eth.dst <-> eth.src; "
+- "icmp4.type = 11; /* Time exceeded */ "
+- "icmp4.code = 0; /* TTL exceeded in transit */ "
+- "ip4.dst = ip4.src; "
+- "ip4.src = %s; "
+- "ip.ttl = 255; "
+- "next; };",
+- op->lrp_networks.ipv4_addrs[i].addr_s);
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
+- ds_cstr(&match), ds_cstr(&actions),
+- &op->nbrp->header_);
+- }
++/* Logical router ingress Table 1 and 2: Neighbor lookup and learning
++ * lflows for logical routers. */
++static void
++build_neigh_learning_flows_for_lrouter(
++ struct ovn_datapath *od, struct hmap *lflows,
++ struct ds *match, struct ds *actions)
++{
++ if (od->nbr) {
+
+- /* ARP reply. These flows reply to ARP requests for the router's own
+- * IP address. */
+- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+- ds_clear(&match);
+- ds_put_format(&match, "arp.spa == %s/%u",
+- op->lrp_networks.ipv4_addrs[i].network_s,
+- op->lrp_networks.ipv4_addrs[i].plen);
++ /* Learn MAC bindings from ARP/IPv6 ND.
++ *
++ * For ARP packets, table LOOKUP_NEIGHBOR does a lookup for the
++ * (arp.spa, arp.sha) in the mac binding table using the 'lookup_arp'
++ * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_RESULT bit.
++ * If "always_learn_from_arp_request" is set to false, it will also
++ * lookup for the (arp.spa) in the mac binding table using the
++ * "lookup_arp_ip" action for ARP request packets, and stores the
++ * result in REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit; or set that bit
++ * to "1" directly for ARP response packets.
++ *
++ * For IPv6 ND NA packets, table LOOKUP_NEIGHBOR does a lookup
++ * for the (nd.target, nd.tll) in the mac binding table using the
++ * 'lookup_nd' action and stores the result in
++ * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. If
++ * "always_learn_from_arp_request" is set to false,
++ * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit is set.
++ *
++ * For IPv6 ND NS packets, table LOOKUP_NEIGHBOR does a lookup
++ * for the (ip6.src, nd.sll) in the mac binding table using the
++ * 'lookup_nd' action and stores the result in
++ * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. If
++ * "always_learn_from_arp_request" is set to false, it will also lookup
++ * for the (ip6.src) in the mac binding table using the "lookup_nd_ip"
++ * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
++ * bit.
++ *
++ * Table LEARN_NEIGHBOR learns the mac-binding using the action
++ * - 'put_arp/put_nd'. Learning mac-binding is skipped if
++ * REGBIT_LOOKUP_NEIGHBOR_RESULT bit is set or
++ * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT is not set.
++ *
++ * */
+
+- if (op->od->l3dgw_port && op->od->l3redirect_port && op->peer
+- && op->peer->od->n_localnet_ports) {
+- bool add_chassis_resident_check = false;
+- if (op == op->od->l3dgw_port) {
+- /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
+- * should only be sent from the gateway chassis, so that
+- * upstream MAC learning points to the gateway chassis.
+- * Also need to avoid generation of multiple ARP responses
+- * from different chassis. */
+- add_chassis_resident_check = true;
+- } else {
+- /* Check if the option 'reside-on-redirect-chassis'
+- * is set to true on the router port. If set to true
+- * and if peer's logical switch has a localnet port, it
+- * means the router pipeline for the packets from
+- * peer's logical switch is be run on the chassis
+- * hosting the gateway port and it should reply to the
+- * ARP requests for the router port IPs.
+- */
+- add_chassis_resident_check = smap_get_bool(
+- &op->nbrp->options,
+- "reside-on-redirect-chassis", false);
+- }
++ /* Flows for LOOKUP_NEIGHBOR. */
++ bool learn_from_arp_request = smap_get_bool(&od->nbr->options,
++ "always_learn_from_arp_request", true);
++ ds_clear(actions);
++ ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
++ " = lookup_arp(inport, arp.spa, arp.sha); %snext;",
++ learn_from_arp_request ? "" :
++ REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100,
++ "arp.op == 2", ds_cstr(actions));
+
+- if (add_chassis_resident_check) {
+- ds_put_format(&match, " && is_chassis_resident(%s)",
+- op->od->l3redirect_port->json_key);
+- }
+- }
++ ds_clear(actions);
++ ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
++ " = lookup_nd(inport, nd.target, nd.tll); %snext;",
++ learn_from_arp_request ? "" :
++ REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_na",
++ ds_cstr(actions));
+
+- build_lrouter_arp_flow(op->od, op,
+- op->lrp_networks.ipv4_addrs[i].addr_s,
+- REG_INPORT_ETH_ADDR, &match, false, 90,
+- &op->nbrp->header_, lflows);
+- }
++ ds_clear(actions);
++ ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
++ " = lookup_nd(inport, ip6.src, nd.sll); %snext;",
++ learn_from_arp_request ? "" :
++ REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
++ " = lookup_nd_ip(inport, ip6.src); ");
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_ns",
++ ds_cstr(actions));
+
+- /* A set to hold all load-balancer vips that need ARP responses. */
+- struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
+- struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
+- get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6);
++ /* For other packet types, we can skip neighbor learning.
++ * So set REGBIT_LOOKUP_NEIGHBOR_RESULT to 1. */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 0, "1",
++ REGBIT_LOOKUP_NEIGHBOR_RESULT" = 1; next;");
+
+- const char *ip_address;
+- SSET_FOR_EACH (ip_address, &all_ips_v4) {
+- ds_clear(&match);
+- if (op == op->od->l3dgw_port) {
+- ds_put_format(&match, "is_chassis_resident(%s)",
+- op->od->l3redirect_port->json_key);
+- }
++ /* Flows for LEARN_NEIGHBOR. */
++ /* Skip Neighbor learning if not required. */
++ ds_clear(match);
++ ds_put_format(match, REGBIT_LOOKUP_NEIGHBOR_RESULT" == 1%s",
++ learn_from_arp_request ? "" :
++ " || "REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" == 0");
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 100,
++ ds_cstr(match), "next;");
+
+- build_lrouter_arp_flow(op->od, op,
+- ip_address, REG_INPORT_ETH_ADDR,
+- &match, false, 90, NULL, lflows);
+- }
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
++ "arp", "put_arp(inport, arp.spa, arp.sha); next;");
+
+- SSET_FOR_EACH (ip_address, &all_ips_v6) {
+- ds_clear(&match);
+- if (op == op->od->l3dgw_port) {
+- ds_put_format(&match, "is_chassis_resident(%s)",
+- op->od->l3redirect_port->json_key);
+- }
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
++ "nd_na", "put_nd(inport, nd.target, nd.tll); next;");
+
+- build_lrouter_nd_flow(op->od, op, "nd_na",
+- ip_address, NULL, REG_INPORT_ETH_ADDR,
+- &match, false, 90, NULL, lflows);
+- }
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
++ "nd_ns", "put_nd(inport, ip6.src, nd.sll); next;");
++ }
+
+- sset_destroy(&all_ips_v4);
+- sset_destroy(&all_ips_v6);
++}
+
+- if (!smap_get(&op->od->nbr->options, "chassis")
+- && !op->od->l3dgw_port) {
+- /* UDP/TCP port unreachable. */
+- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+- ds_clear(&match);
+- ds_put_format(&match,
+- "ip4 && ip4.dst == %s && !ip.later_frag && udp",
+- op->lrp_networks.ipv4_addrs[i].addr_s);
+- const char *action = "icmp4 {"
+- "eth.dst <-> eth.src; "
+- "ip4.dst <-> ip4.src; "
+- "ip.ttl = 255; "
+- "icmp4.type = 3; "
+- "icmp4.code = 3; "
+- "next; };";
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+- 80, ds_cstr(&match), action,
+- &op->nbrp->header_);
++/* Logical router ingress Table 1: Neighbor lookup lflows
++ * for logical router ports. */
++static void
++build_neigh_learning_flows_for_lrouter_port(
++ struct ovn_port *op, struct hmap *lflows,
++ struct ds *match, struct ds *actions)
++{
++ if (op->nbrp) {
+
+- ds_clear(&match);
+- ds_put_format(&match,
+- "ip4 && ip4.dst == %s && !ip.later_frag && tcp",
+- op->lrp_networks.ipv4_addrs[i].addr_s);
+- action = "tcp_reset {"
+- "eth.dst <-> eth.src; "
+- "ip4.dst <-> ip4.src; "
+- "next; };";
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+- 80, ds_cstr(&match), action,
+- &op->nbrp->header_);
++ bool learn_from_arp_request = smap_get_bool(&op->od->nbr->options,
++ "always_learn_from_arp_request", true);
+
+- ds_clear(&match);
+- ds_put_format(&match,
+- "ip4 && ip4.dst == %s && !ip.later_frag",
++ /* Check if we need to learn mac-binding from ARP requests. */
++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
++ if (!learn_from_arp_request) {
++ /* ARP request to this address should always get learned,
++ * so add a priority-110 flow to set
++ * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT to 1. */
++ ds_clear(match);
++ ds_put_format(match,
++ "inport == %s && arp.spa == %s/%u && "
++ "arp.tpa == %s && arp.op == 1",
++ op->json_key,
++ op->lrp_networks.ipv4_addrs[i].network_s,
++ op->lrp_networks.ipv4_addrs[i].plen,
+ op->lrp_networks.ipv4_addrs[i].addr_s);
+- action = "icmp4 {"
+- "eth.dst <-> eth.src; "
+- "ip4.dst <-> ip4.src; "
+- "ip.ttl = 255; "
+- "icmp4.type = 3; "
+- "icmp4.code = 2; "
+- "next; };";
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+- 70, ds_cstr(&match), action,
++ if (op->od->l3dgw_port && op == op->od->l3dgw_port
++ && op->od->l3redirect_port) {
++ ds_put_format(match, " && is_chassis_resident(%s)",
++ op->od->l3redirect_port->json_key);
++ }
++ const char *actions_s = REGBIT_LOOKUP_NEIGHBOR_RESULT
++ " = lookup_arp(inport, arp.spa, arp.sha); "
++ REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1;"
++ " next;";
++ ovn_lflow_add_with_hint(lflows, op->od,
++ S_ROUTER_IN_LOOKUP_NEIGHBOR, 110,
++ ds_cstr(match), actions_s,
+ &op->nbrp->header_);
+ }
++ ds_clear(match);
++ ds_put_format(match,
++ "inport == %s && arp.spa == %s/%u && arp.op == 1",
++ op->json_key,
++ op->lrp_networks.ipv4_addrs[i].network_s,
++ op->lrp_networks.ipv4_addrs[i].plen);
++ if (op->od->l3dgw_port && op == op->od->l3dgw_port
++ && op->od->l3redirect_port) {
++ ds_put_format(match, " && is_chassis_resident(%s)",
++ op->od->l3redirect_port->json_key);
++ }
++ ds_clear(actions);
++ ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
++ " = lookup_arp(inport, arp.spa, arp.sha); %snext;",
++ learn_from_arp_request ? "" :
++ REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
++ " = lookup_arp_ip(inport, arp.spa); ");
++ ovn_lflow_add_with_hint(lflows, op->od,
++ S_ROUTER_IN_LOOKUP_NEIGHBOR, 100,
++ ds_cstr(match), ds_cstr(actions),
++ &op->nbrp->header_);
+ }
++ }
++}
+
+- /* Drop IP traffic destined to router owned IPs except if the IP is
+- * also a SNAT IP. Those are dropped later, in stage
+- * "lr_in_arp_resolve", if unSNAT was unsuccessful.
+- *
+- * Priority 60.
+- */
+- build_lrouter_drop_own_dest(op, S_ROUTER_IN_IP_INPUT, 60, false,
+- lflows);
+-
+- /* ARP / ND handling for external IP addresses.
+- *
+- * DNAT and SNAT IP addresses are external IP addresses that need ARP
+- * handling.
+- *
+- * These are already taken care globally, per router. The only
+- * exception is on the l3dgw_port where we might need to use a
+- * different ETH address.
+- */
+- if (op != op->od->l3dgw_port) {
+- continue;
+- }
++/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: IPv6 Router
++ * Adv (RA) options and response. */
++static void
++build_ND_RA_flows_for_lrouter_port(
++ struct ovn_port *op, struct hmap *lflows,
++ struct ds *match, struct ds *actions)
++{
++ if (!op->nbrp || op->nbrp->peer || !op->peer) {
++ return;
++ }
+
+- for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
+- struct ovn_nat *nat_entry = &op->od->nat_entries[i];
++ if (!op->lrp_networks.n_ipv6_addrs) {
++ return;
++ }
+
+- /* Skip entries we failed to parse. */
+- if (!nat_entry_is_valid(nat_entry)) {
+- continue;
+- }
++ struct smap options;
++ smap_clone(&options, &op->sb->options);
+
+- /* Skip SNAT entries for now, we handle unique SNAT IPs separately
+- * below.
+- */
+- if (!strcmp(nat_entry->nb->type, "snat")) {
+- continue;
+- }
+- build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows);
+- }
++ /* enable IPv6 prefix delegation */
++ bool prefix_delegation = smap_get_bool(&op->nbrp->options,
++ "prefix_delegation", false);
++ if (!lrport_is_enabled(op->nbrp)) {
++ prefix_delegation = false;
++ }
++ smap_add(&options, "ipv6_prefix_delegation",
++ prefix_delegation ? "true" : "false");
+
+- /* Now handle SNAT entries too, one per unique SNAT IP. */
+- struct shash_node *snat_snode;
+- SHASH_FOR_EACH (snat_snode, &op->od->snat_ips) {
+- struct ovn_snat_ip *snat_ip = snat_snode->data;
++ bool ipv6_prefix = smap_get_bool(&op->nbrp->options,
++ "prefix", false);
++ if (!lrport_is_enabled(op->nbrp)) {
++ ipv6_prefix = false;
++ }
++ smap_add(&options, "ipv6_prefix",
++ ipv6_prefix ? "true" : "false");
++ sbrec_port_binding_set_options(op->sb, &options);
+
+- if (ovs_list_is_empty(&snat_ip->snat_entries)) {
+- continue;
+- }
++ smap_destroy(&options);
+
+- struct ovn_nat *nat_entry =
+- CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
+- struct ovn_nat, ext_addr_list_node);
+- build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows);
+- }
++ const char *address_mode = smap_get(
++ &op->nbrp->ipv6_ra_configs, "address_mode");
++
++ if (!address_mode) {
++ return;
++ }
++ if (strcmp(address_mode, "slaac") &&
++ strcmp(address_mode, "dhcpv6_stateful") &&
++ strcmp(address_mode, "dhcpv6_stateless")) {
++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
++ VLOG_WARN_RL(&rl, "Invalid address mode [%s] defined",
++ address_mode);
++ return;
+ }
+
+- /* NAT, Defrag and load balancing. */
+- HMAP_FOR_EACH (od, key_node, datapaths) {
+- if (!od->nbr) {
+- continue;
+- }
++ if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic",
++ false)) {
++ copy_ra_to_sb(op, address_mode);
++ }
+
+- /* Packets are allowed by default. */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;");
++ ds_clear(match);
++ ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
++ op->json_key);
++ ds_clear(actions);
+
+- /* Send the IPv6 NS packets to next table. When ovn-controller
+- * generates IPv6 NS (for the action - nd_ns{}), the injected
+- * packet would go through conntrack - which is not required. */
+- ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, "nd_ns", "next;");
++ const char *mtu_s = smap_get(
++ &op->nbrp->ipv6_ra_configs, "mtu");
+
+- /* NAT rules are only valid on Gateway routers and routers with
+- * l3dgw_port (router has a port with gateway chassis
+- * specified). */
+- if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
+- continue;
+- }
++ /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
++ uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0;
+
+- struct sset nat_entries = SSET_INITIALIZER(&nat_entries);
++ ds_put_format(actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts("
++ "addr_mode = \"%s\", slla = %s",
++ address_mode, op->lrp_networks.ea_s);
++ if (mtu > 0) {
++ ds_put_format(actions, ", mtu = %u", mtu);
++ }
+
+- bool dnat_force_snat_ip =
+- !lport_addresses_is_empty(&od->dnat_force_snat_addrs);
+- bool lb_force_snat_ip =
+- !lport_addresses_is_empty(&od->lb_force_snat_addrs);
++ const char *prf = smap_get_def(
++ &op->nbrp->ipv6_ra_configs, "router_preference", "MEDIUM");
++ if (strcmp(prf, "MEDIUM")) {
++ ds_put_format(actions, ", router_preference = \"%s\"", prf);
++ }
+
+- for (int i = 0; i < od->nbr->n_nat; i++) {
+- const struct nbrec_nat *nat;
++ bool add_rs_response_flow = false;
+
+- nat = od->nbr->nat[i];
++ for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
++ if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
++ continue;
++ }
+
+- ovs_be32 ip, mask;
+- struct in6_addr ipv6, mask_v6, v6_exact = IN6ADDR_EXACT_INIT;
+- bool is_v6 = false;
+- bool stateless = lrouter_nat_is_stateless(nat);
+- struct nbrec_address_set *allowed_ext_ips =
+- nat->allowed_ext_ips;
+- struct nbrec_address_set *exempted_ext_ips =
+- nat->exempted_ext_ips;
++ ds_put_format(actions, ", prefix = %s/%u",
++ op->lrp_networks.ipv6_addrs[i].network_s,
++ op->lrp_networks.ipv6_addrs[i].plen);
+
+- if (allowed_ext_ips && exempted_ext_ips) {
+- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+- VLOG_WARN_RL(&rl, "NAT rule: "UUID_FMT" not applied, since "
+- "both allowed and exempt external ips set",
+- UUID_ARGS(&(nat->header_.uuid)));
+- continue;
+- }
++ add_rs_response_flow = true;
++ }
+
+- char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
+- if (error || mask != OVS_BE32_MAX) {
+- free(error);
+- error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6);
+- if (error || memcmp(&mask_v6, &v6_exact, sizeof(mask_v6))) {
+- /* Invalid for both IPv4 and IPv6 */
+- static struct vlog_rate_limit rl =
+- VLOG_RATE_LIMIT_INIT(5, 1);
+- VLOG_WARN_RL(&rl, "bad external ip %s for nat",
+- nat->external_ip);
+- free(error);
+- continue;
+- }
+- /* It was an invalid IPv4 address, but valid IPv6.
+- * Treat the rest of the handling of this NAT rule
+- * as IPv6. */
+- is_v6 = true;
+- }
++ if (add_rs_response_flow) {
++ ds_put_cstr(actions, "); next;");
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS,
++ 50, ds_cstr(match), ds_cstr(actions),
++ &op->nbrp->header_);
++ ds_clear(actions);
++ ds_clear(match);
++ ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && "
++ "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key);
+
+- /* Check the validity of nat->logical_ip. 'logical_ip' can
+- * be a subnet when the type is "snat". */
+- int cidr_bits;
+- if (is_v6) {
+- error = ipv6_parse_masked(nat->logical_ip, &ipv6, &mask_v6);
+- cidr_bits = ipv6_count_cidr_bits(&mask_v6);
+- } else {
+- error = ip_parse_masked(nat->logical_ip, &ip, &mask);
+- cidr_bits = ip_count_cidr_bits(mask);
+- }
+- if (!strcmp(nat->type, "snat")) {
+- if (error) {
+- /* Invalid for both IPv4 and IPv6 */
+- static struct vlog_rate_limit rl =
+- VLOG_RATE_LIMIT_INIT(5, 1);
+- VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
+- "in router "UUID_FMT"",
+- nat->logical_ip, UUID_ARGS(&od->key));
+- free(error);
+- continue;
+- }
+- } else {
+- if (error || (!is_v6 && mask != OVS_BE32_MAX)
+- || (is_v6 && memcmp(&mask_v6, &v6_exact,
+- sizeof mask_v6))) {
+- /* Invalid for both IPv4 and IPv6 */
+- static struct vlog_rate_limit rl =
+- VLOG_RATE_LIMIT_INIT(5, 1);
+- VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
+- ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
+- free(error);
+- continue;
+- }
+- }
++ char ip6_str[INET6_ADDRSTRLEN + 1];
++ struct in6_addr lla;
++ in6_generate_lla(op->lrp_networks.ea, &lla);
++ memset(ip6_str, 0, sizeof(ip6_str));
++ ipv6_string_mapped(ip6_str, &lla);
++ ds_put_format(actions, "eth.dst = eth.src; eth.src = %s; "
++ "ip6.dst = ip6.src; ip6.src = %s; "
++ "outport = inport; flags.loopback = 1; "
++ "output;",
++ op->lrp_networks.ea_s, ip6_str);
++ ovn_lflow_add_with_hint(lflows, op->od,
++ S_ROUTER_IN_ND_RA_RESPONSE, 50,
++ ds_cstr(match), ds_cstr(actions),
++ &op->nbrp->header_);
++ }
++}
+
+- /* For distributed router NAT, determine whether this NAT rule
+- * satisfies the conditions for distributed NAT processing. */
+- bool distributed = false;
+- struct eth_addr mac;
+- if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
+- nat->logical_port && nat->external_mac) {
+- if (eth_addr_from_string(nat->external_mac, &mac)) {
+- distributed = true;
+- } else {
+- static struct vlog_rate_limit rl =
+- VLOG_RATE_LIMIT_INIT(5, 1);
+- VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
+- ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
+- continue;
+- }
+- }
++/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: RS
++ * responder, by default goto next. (priority 0). */
++static void
++build_ND_RA_flows_for_lrouter(struct ovn_datapath *od, struct hmap *lflows)
++{
++ if (od->nbr) {
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;");
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;");
++ }
++}
+
+- /* Ingress UNSNAT table: It is for already established connections'
+- * reverse traffic. i.e., SNAT has already been done in egress
+- * pipeline and now the packet has entered the ingress pipeline as
+- * part of a reply. We undo the SNAT here.
+- *
+- * Undoing SNAT has to happen before DNAT processing. This is
+- * because when the packet was DNATed in ingress pipeline, it did
+- * not know about the possibility of eventual additional SNAT in
+- * egress pipeline. */
+- if (!strcmp(nat->type, "snat")
+- || !strcmp(nat->type, "dnat_and_snat")) {
+- if (!od->l3dgw_port) {
+- /* Gateway router. */
+- ds_clear(&match);
+- ds_clear(&actions);
+- ds_put_format(&match, "ip && ip%s.dst == %s",
+- is_v6 ? "6" : "4",
+- nat->external_ip);
+- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+- ds_put_format(&actions, "ip%s.dst=%s; next;",
+- is_v6 ? "6" : "4", nat->logical_ip);
+- } else {
+- ds_put_cstr(&actions, "ct_snat;");
+- }
++/* Logical router ingress table IP_ROUTING : IP Routing.
++ *
++ * A packet that arrives at this table is an IP packet that should be
++ * routed to the address in 'ip[46].dst'.
++ *
++ * For regular routes without ECMP, table IP_ROUTING sets outport to the
++ * correct output port, eth.src to the output port's MAC address, and
++ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
++ * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
++ * advances to the next table.
++ *
++ * For ECMP routes, i.e. multiple routes with same policy and prefix, table
++ * IP_ROUTING remembers ECMP group id and selects a member id, and advances
++ * to table IP_ROUTING_ECMP, which sets outport, eth.src and
++ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 for the selected ECMP member.
++ */
++static void
++build_ip_routing_flows_for_lrouter_port(
++ struct ovn_port *op, struct hmap *lflows)
++{
++ if (op->nbrp) {
+
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT,
+- 90, ds_cstr(&match),
+- ds_cstr(&actions),
+- &nat->header_);
+- } else {
+- /* Distributed router. */
++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
++ add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
++ op->lrp_networks.ipv4_addrs[i].network_s,
++ op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
++ &op->nbrp->header_);
++ }
+
+- /* Traffic received on l3dgw_port is subject to NAT. */
+- ds_clear(&match);
+- ds_clear(&actions);
+- ds_put_format(&match, "ip && ip%s.dst == %s"
+- " && inport == %s",
+- is_v6 ? "6" : "4",
+- nat->external_ip,
+- od->l3dgw_port->json_key);
+- if (!distributed && od->l3redirect_port) {
+- /* Flows for NAT rules that are centralized are only
+- * programmed on the gateway chassis. */
+- ds_put_format(&match, " && is_chassis_resident(%s)",
+- od->l3redirect_port->json_key);
+- }
++ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
++ add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
++ op->lrp_networks.ipv6_addrs[i].network_s,
++ op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
++ &op->nbrp->header_);
++ }
++ }
++}
+
+- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+- ds_put_format(&actions, "ip%s.dst=%s; next;",
+- is_v6 ? "6" : "4", nat->logical_ip);
+- } else {
+- ds_put_cstr(&actions, "ct_snat;");
+- }
++static void
++build_static_route_flows_for_lrouter(
++ struct ovn_datapath *od, struct hmap *lflows,
++ struct hmap *ports, struct hmap *bfd_connections)
++{
++ if (od->nbr) {
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, 150,
++ REG_ECMP_GROUP_ID" == 0", "next;");
+
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT,
+- 100,
+- ds_cstr(&match), ds_cstr(&actions),
+- &nat->header_);
++ struct hmap ecmp_groups = HMAP_INITIALIZER(&ecmp_groups);
++ struct hmap unique_routes = HMAP_INITIALIZER(&unique_routes);
++ struct ovs_list parsed_routes = OVS_LIST_INITIALIZER(&parsed_routes);
++ struct ecmp_groups_node *group;
++ for (int i = 0; i < od->nbr->n_static_routes; i++) {
++ struct parsed_route *route =
++ parsed_routes_add(&parsed_routes, od->nbr->static_routes[i],
++ bfd_connections);
++ if (!route) {
++ continue;
++ }
++ group = ecmp_groups_find(&ecmp_groups, route);
++ if (group) {
++ ecmp_groups_add_route(group, route);
++ } else {
++ const struct parsed_route *existed_route =
++ unique_routes_remove(&unique_routes, route);
++ if (existed_route) {
++ group = ecmp_groups_add(&ecmp_groups, existed_route);
++ if (group) {
++ ecmp_groups_add_route(group, route);
++ }
++ } else {
++ unique_routes_add(&unique_routes, route);
+ }
+ }
++ }
++ HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) {
++ /* add a flow in IP_ROUTING, and one flow for each member in
++ * IP_ROUTING_ECMP. */
++ build_ecmp_route_flow(lflows, od, ports, group);
++ }
++ const struct unique_routes_node *ur;
++ HMAP_FOR_EACH (ur, hmap_node, &unique_routes) {
++ build_static_route_flow(lflows, od, ports, ur->route);
++ }
++ ecmp_groups_destroy(&ecmp_groups);
++ unique_routes_destroy(&unique_routes);
++ parsed_routes_destroy(&parsed_routes);
++ }
++}
+
+- /* Ingress DNAT table: Packets enter the pipeline with destination
+- * IP address that needs to be DNATted from a external IP address
+- * to a logical IP address. */
+- if (!strcmp(nat->type, "dnat")
+- || !strcmp(nat->type, "dnat_and_snat")) {
+- if (!od->l3dgw_port) {
+- /* Gateway router. */
+- /* Packet when it goes from the initiator to destination.
+- * We need to set flags.loopback because the router can
+- * send the packet back through the same interface. */
+- ds_clear(&match);
+- ds_put_format(&match, "ip && ip%s.dst == %s",
+- is_v6 ? "6" : "4",
+- nat->external_ip);
+- ds_clear(&actions);
+- if (allowed_ext_ips || exempted_ext_ips) {
+- lrouter_nat_add_ext_ip_match(od, lflows, &match, nat,
+- is_v6, true, mask);
+- }
++/* IP Multicast lookup. Here we set the output port, adjust TTL and
++ * advance to next table (priority 500).
++ */
++static void
++build_mcast_lookup_flows_for_lrouter(
++ struct ovn_datapath *od, struct hmap *lflows,
++ struct ds *match, struct ds *actions)
++{
++ if (od->nbr) {
+
+- if (dnat_force_snat_ip) {
+- /* Indicate to the future tables that a DNAT has taken
+- * place and a force SNAT needs to be done in the
+- * Egress SNAT table. */
+- ds_put_format(&actions,
+- "flags.force_snat_for_dnat = 1; ");
+- }
++ /* Drop IPv6 multicast traffic that shouldn't be forwarded,
++ * i.e., router solicitation and router advertisement.
++ */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
++ "nd_rs || nd_ra", "drop;");
++ if (!od->mcast_info.rtr.relay) {
++ return;
++ }
+
+- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+- ds_put_format(&actions, "flags.loopback = 1; "
+- "ip%s.dst=%s; next;",
+- is_v6 ? "6" : "4", nat->logical_ip);
+- } else {
+- ds_put_format(&actions, "flags.loopback = 1; "
+- "ct_dnat(%s", nat->logical_ip);
++ struct ovn_igmp_group *igmp_group;
+
+- if (nat->external_port_range[0]) {
+- ds_put_format(&actions, ",%s",
+- nat->external_port_range);
+- }
+- ds_put_format(&actions, ");");
+- }
++ LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) {
++ ds_clear(match);
++ ds_clear(actions);
++ if (IN6_IS_ADDR_V4MAPPED(&igmp_group->address)) {
++ ds_put_format(match, "ip4 && ip4.dst == %s ",
++ igmp_group->mcgroup.name);
++ } else {
++ ds_put_format(match, "ip6 && ip6.dst == %s ",
++ igmp_group->mcgroup.name);
++ }
++ if (od->mcast_info.rtr.flood_static) {
++ ds_put_cstr(actions,
++ "clone { "
++ "outport = \""MC_STATIC"\"; "
++ "ip.ttl--; "
++ "next; "
++ "};");
++ }
++ ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
++ igmp_group->mcgroup.name);
++ ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
++ ds_cstr(match), ds_cstr(actions));
++ }
+
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100,
+- ds_cstr(&match), ds_cstr(&actions),
+- &nat->header_);
+- } else {
+- /* Distributed router. */
++ /* If needed, flood unregistered multicast on statically configured
++ * ports. Otherwise drop any multicast traffic.
++ */
++ if (od->mcast_info.rtr.flood_static) {
++ ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
++ "ip4.mcast || ip6.mcast",
++ "clone { "
++ "outport = \""MC_STATIC"\"; "
++ "ip.ttl--; "
++ "next; "
++ "};");
++ } else {
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
++ "ip4.mcast || ip6.mcast", "drop;");
++ }
++ }
++}
+
+- /* Traffic received on l3dgw_port is subject to NAT. */
+- ds_clear(&match);
+- ds_put_format(&match, "ip && ip%s.dst == %s"
+- " && inport == %s",
+- is_v6 ? "6" : "4",
+- nat->external_ip,
+- od->l3dgw_port->json_key);
+- if (!distributed && od->l3redirect_port) {
+- /* Flows for NAT rules that are centralized are only
+- * programmed on the gateway chassis. */
+- ds_put_format(&match, " && is_chassis_resident(%s)",
+- od->l3redirect_port->json_key);
+- }
+- ds_clear(&actions);
+- if (allowed_ext_ips || exempted_ext_ips) {
+- lrouter_nat_add_ext_ip_match(od, lflows, &match, nat,
+- is_v6, true, mask);
+- }
++/* Logical router ingress table POLICY: Policy.
++ *
++ * A packet that arrives at this table is an IP packet that should be
++ * permitted/denied/rerouted to the address in the rule's nexthop.
++ * This table sets outport to the correct out_port,
++ * eth.src to the output port's MAC address,
++ * and REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
++ * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
++ * advances to the next table for ARP/ND resolution. */
++static void
++build_ingress_policy_flows_for_lrouter(
++ struct ovn_datapath *od, struct hmap *lflows,
++ struct hmap *ports)
++{
++ if (od->nbr) {
++ /* This is a catch-all rule. It has the lowest priority (0)
++ * does a match-all("1") and pass-through (next) */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1",
++ REG_ECMP_GROUP_ID" = 0; next;");
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY_ECMP, 150,
++ REG_ECMP_GROUP_ID" == 0", "next;");
+
+- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+- ds_put_format(&actions, "ip%s.dst=%s; next;",
+- is_v6 ? "6" : "4", nat->logical_ip);
+- } else {
+- ds_put_format(&actions, "ct_dnat(%s", nat->logical_ip);
+- if (nat->external_port_range[0]) {
+- ds_put_format(&actions, ",%s",
+- nat->external_port_range);
+- }
+- ds_put_format(&actions, ");");
+- }
++ /* Convert routing policies to flows. */
++ uint16_t ecmp_group_id = 1;
++ for (int i = 0; i < od->nbr->n_policies; i++) {
++ const struct nbrec_logical_router_policy *rule
++ = od->nbr->policies[i];
++ bool is_ecmp_reroute =
++ (!strcmp(rule->action, "reroute") && rule->n_nexthops > 1);
+
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100,
+- ds_cstr(&match), ds_cstr(&actions),
+- &nat->header_);
+- }
++ if (is_ecmp_reroute) {
++ build_ecmp_routing_policy_flows(lflows, od, ports, rule,
++ ecmp_group_id);
++ ecmp_group_id++;
++ } else {
++ build_routing_policy_flow(lflows, od, ports, rule,
++ &rule->header_);
+ }
++ }
++ }
++}
+
+- /* ARP resolve for NAT IPs. */
+- if (od->l3dgw_port) {
+- if (!strcmp(nat->type, "snat")) {
+- ds_clear(&match);
+- ds_put_format(
+- &match, "inport == %s && %s == %s",
+- od->l3dgw_port->json_key,
+- is_v6 ? "ip6.src" : "ip4.src", nat->external_ip);
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT,
+- 120, ds_cstr(&match), "next;",
+- &nat->header_);
+- }
++/* Local router ingress table ARP_RESOLVE: ARP Resolution. */
++static void
++build_arp_resolve_flows_for_lrouter(
++ struct ovn_datapath *od, struct hmap *lflows)
++{
++ if (od->nbr) {
++ /* Multicast packets already have the outport set so just advance to
++ * next table (priority 500). */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 500,
++ "ip4.mcast || ip6.mcast", "next;");
+
+- if (!sset_contains(&nat_entries, nat->external_ip)) {
+- ds_clear(&match);
+- ds_put_format(
+- &match, "outport == %s && %s == %s",
+- od->l3dgw_port->json_key,
+- is_v6 ? REG_NEXT_HOP_IPV6 : REG_NEXT_HOP_IPV4,
+- nat->external_ip);
+- ds_clear(&actions);
+- ds_put_format(
+- &actions, "eth.dst = %s; next;",
+- distributed ? nat->external_mac :
+- od->l3dgw_port->lrp_networks.ea_s);
+- ovn_lflow_add_with_hint(lflows, od,
+- S_ROUTER_IN_ARP_RESOLVE,
+- 100, ds_cstr(&match),
+- ds_cstr(&actions),
+- &nat->header_);
+- sset_add(&nat_entries, nat->external_ip);
+- }
+- } else {
+- /* Add the NAT external_ip to the nat_entries even for
+- * gateway routers. This is required for adding load balancer
+- * flows.*/
+- sset_add(&nat_entries, nat->external_ip);
+- }
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
++ "get_arp(outport, " REG_NEXT_HOP_IPV4 "); next;");
+
+- /* Egress UNDNAT table: It is for already established connections'
+- * reverse traffic. i.e., DNAT has already been done in ingress
+- * pipeline and now the packet has entered the egress pipeline as
+- * part of a reply. We undo the DNAT here.
+- *
+- * Note that this only applies for NAT on a distributed router.
+- * Undo DNAT on a gateway router is done in the ingress DNAT
+- * pipeline stage. */
+- if (od->l3dgw_port && (!strcmp(nat->type, "dnat")
+- || !strcmp(nat->type, "dnat_and_snat"))) {
+- ds_clear(&match);
+- ds_put_format(&match, "ip && ip%s.src == %s"
+- " && outport == %s",
+- is_v6 ? "6" : "4",
+- nat->logical_ip,
+- od->l3dgw_port->json_key);
+- if (!distributed && od->l3redirect_port) {
+- /* Flows for NAT rules that are centralized are only
+- * programmed on the gateway chassis. */
+- ds_put_format(&match, " && is_chassis_resident(%s)",
+- od->l3redirect_port->json_key);
+- }
+- ds_clear(&actions);
+- if (distributed) {
+- ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
+- ETH_ADDR_ARGS(mac));
+- }
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
++ "get_nd(outport, " REG_NEXT_HOP_IPV6 "); next;");
++ }
++}
+
+- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+- ds_put_format(&actions, "ip%s.src=%s; next;",
+- is_v6 ? "6" : "4", nat->external_ip);
+- } else {
+- ds_put_format(&actions, "ct_dnat;");
+- }
++/* Local router ingress table ARP_RESOLVE: ARP Resolution.
++ *
++ * Any unicast packet that reaches this table is an IP packet whose
++ * next-hop IP address is in REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6
++ * (ip4.dst/ipv6.dst is the final destination).
++ * This table resolves the IP address in
++ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 into an output port in outport and
++ * an Ethernet address in eth.dst.
++ */
++static void
++build_arp_resolve_flows_for_lrouter_port(
++ struct ovn_port *op, struct hmap *lflows,
++ struct hmap *ports,
++ struct ds *match, struct ds *actions)
++{
++ if (op->nbsp && !lsp_is_enabled(op->nbsp)) {
++ return;
++ }
+
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 100,
+- ds_cstr(&match), ds_cstr(&actions),
+- &nat->header_);
+- }
++ if (op->nbrp) {
++ /* This is a logical router port. If next-hop IP address in
++ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 matches IP address of this
++ * router port, then the packet is intended to eventually be sent
++ * to this logical port. Set the destination mac address using
++ * this port's mac address.
++ *
++ * The packet is still in peer's logical pipeline. So the match
++ * should be on peer's outport. */
++ if (op->peer && op->nbrp->peer) {
++ if (op->lrp_networks.n_ipv4_addrs) {
++ ds_clear(match);
++ ds_put_format(match, "outport == %s && "
++ REG_NEXT_HOP_IPV4 "== ",
++ op->peer->json_key);
++ op_put_v4_networks(match, op, false);
+
+- /* Egress SNAT table: Packets enter the egress pipeline with
+- * source ip address that needs to be SNATted to a external ip
+- * address. */
+- if (!strcmp(nat->type, "snat")
+- || !strcmp(nat->type, "dnat_and_snat")) {
+- if (!od->l3dgw_port) {
+- /* Gateway router. */
+- ds_clear(&match);
+- ds_put_format(&match, "ip && ip%s.src == %s",
+- is_v6 ? "6" : "4",
+- nat->logical_ip);
+- ds_clear(&actions);
++ ds_clear(actions);
++ ds_put_format(actions, "eth.dst = %s; next;",
++ op->lrp_networks.ea_s);
++ ovn_lflow_add_with_hint(lflows, op->peer->od,
++ S_ROUTER_IN_ARP_RESOLVE, 100,
++ ds_cstr(match), ds_cstr(actions),
++ &op->nbrp->header_);
++ }
+
+- if (allowed_ext_ips || exempted_ext_ips) {
+- lrouter_nat_add_ext_ip_match(od, lflows, &match, nat,
+- is_v6, false, mask);
+- }
++ if (op->lrp_networks.n_ipv6_addrs) {
++ ds_clear(match);
++ ds_put_format(match, "outport == %s && "
++ REG_NEXT_HOP_IPV6 " == ",
++ op->peer->json_key);
++ op_put_v6_networks(match, op);
+
+- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+- ds_put_format(&actions, "ip%s.src=%s; next;",
+- is_v6 ? "6" : "4", nat->external_ip);
+- } else {
+- ds_put_format(&actions, "ct_snat(%s",
+- nat->external_ip);
++ ds_clear(actions);
++ ds_put_format(actions, "eth.dst = %s; next;",
++ op->lrp_networks.ea_s);
++ ovn_lflow_add_with_hint(lflows, op->peer->od,
++ S_ROUTER_IN_ARP_RESOLVE, 100,
++ ds_cstr(match), ds_cstr(actions),
++ &op->nbrp->header_);
++ }
++ }
+
+- if (nat->external_port_range[0]) {
+- ds_put_format(&actions, ",%s",
+- nat->external_port_range);
+- }
+- ds_put_format(&actions, ");");
+- }
++ if (!op->derived && op->od->l3redirect_port) {
++ const char *redirect_type = smap_get(&op->nbrp->options,
++ "redirect-type");
++ if (redirect_type && !strcasecmp(redirect_type, "bridged")) {
++ /* Packet is on a non gateway chassis and
++ * has an unresolved ARP on a network behind gateway
++ * chassis attached router port. Since, redirect type
++ * is "bridged", instead of calling "get_arp"
++ * on this node, we will redirect the packet to gateway
++ * chassis, by setting destination mac router port mac.*/
++ ds_clear(match);
++ ds_put_format(match, "outport == %s && "
++ "!is_chassis_resident(%s)", op->json_key,
++ op->od->l3redirect_port->json_key);
++ ds_clear(actions);
++ ds_put_format(actions, "eth.dst = %s; next;",
++ op->lrp_networks.ea_s);
+
+- /* The priority here is calculated such that the
+- * nat->logical_ip with the longest mask gets a higher
+- * priority. */
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT,
+- cidr_bits + 1,
+- ds_cstr(&match), ds_cstr(&actions),
+- &nat->header_);
+- } else {
+- uint16_t priority = cidr_bits + 1;
++ ovn_lflow_add_with_hint(lflows, op->od,
++ S_ROUTER_IN_ARP_RESOLVE, 50,
++ ds_cstr(match), ds_cstr(actions),
++ &op->nbrp->header_);
++ }
++ }
+
+- /* Distributed router. */
+- ds_clear(&match);
+- ds_put_format(&match, "ip && ip%s.src == %s"
+- " && outport == %s",
+- is_v6 ? "6" : "4",
+- nat->logical_ip,
+- od->l3dgw_port->json_key);
+- if (!distributed && od->l3redirect_port) {
+- /* Flows for NAT rules that are centralized are only
+- * programmed on the gateway chassis. */
+- priority += 128;
+- ds_put_format(&match, " && is_chassis_resident(%s)",
+- od->l3redirect_port->json_key);
+- }
+- ds_clear(&actions);
++ /* Drop IP traffic destined to router owned IPs. Part of it is dropped
++ * in stage "lr_in_ip_input" but traffic that could have been unSNATed
++ * but didn't match any existing session might still end up here.
++ *
++ * Priority 1.
++ */
++ build_lrouter_drop_own_dest(op, S_ROUTER_IN_ARP_RESOLVE, 1, true,
++ lflows);
++ } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp)
++ && strcmp(op->nbsp->type, "virtual")) {
++ /* This is a logical switch port that backs a VM or a container.
++ * Extract its addresses. For each of the address, go through all
++ * the router ports attached to the switch (to which this port
++ * connects) and if the address in question is reachable from the
++ * router port, add an ARP/ND entry in that router's pipeline. */
+
+- if (allowed_ext_ips || exempted_ext_ips) {
+- lrouter_nat_add_ext_ip_match(od, lflows, &match, nat,
+- is_v6, false, mask);
++ for (size_t i = 0; i < op->n_lsp_addrs; i++) {
++ const char *ea_s = op->lsp_addrs[i].ea_s;
++ for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
++ const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
++ for (size_t k = 0; k < op->od->n_router_ports; k++) {
++ /* Get the Logical_Router_Port that the
++ * Logical_Switch_Port is connected to, as
++ * 'peer'. */
++ const char *peer_name = smap_get(
++ &op->od->router_ports[k]->nbsp->options,
++ "router-port");
++ if (!peer_name) {
++ continue;
+ }
+
+- if (distributed) {
+- ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
+- ETH_ADDR_ARGS(mac));
++ struct ovn_port *peer = ovn_port_find(ports, peer_name);
++ if (!peer || !peer->nbrp) {
++ continue;
+ }
+
+- if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+- ds_put_format(&actions, "ip%s.src=%s; next;",
+- is_v6 ? "6" : "4", nat->external_ip);
+- } else {
+- ds_put_format(&actions, "ct_snat(%s",
+- nat->external_ip);
+- if (nat->external_port_range[0]) {
+- ds_put_format(&actions, ",%s",
+- nat->external_port_range);
+- }
+- ds_put_format(&actions, ");");
++ if (!find_lrp_member_ip(peer, ip_s)) {
++ continue;
+ }
+
+- /* The priority here is calculated such that the
+- * nat->logical_ip with the longest mask gets a higher
+- * priority. */
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT,
+- priority, ds_cstr(&match),
+- ds_cstr(&actions),
+- &nat->header_);
++ ds_clear(match);
++ ds_put_format(match, "outport == %s && "
++ REG_NEXT_HOP_IPV4 " == %s",
++ peer->json_key, ip_s);
++
++ ds_clear(actions);
++ ds_put_format(actions, "eth.dst = %s; next;", ea_s);
++ ovn_lflow_add_with_hint(lflows, peer->od,
++ S_ROUTER_IN_ARP_RESOLVE, 100,
++ ds_cstr(match),
++ ds_cstr(actions),
++ &op->nbsp->header_);
+ }
+ }
+
+- /* Logical router ingress table 0:
+- * For NAT on a distributed router, add rules allowing
+- * ingress traffic with eth.dst matching nat->external_mac
+- * on the l3dgw_port instance where nat->logical_port is
+- * resident. */
+- if (distributed) {
+- /* Store the ethernet address of the port receiving the packet.
+- * This will save us from having to match on inport further
+- * down in the pipeline.
+- */
+- ds_clear(&actions);
+- ds_put_format(&actions, REG_INPORT_ETH_ADDR " = %s; next;",
+- od->l3dgw_port->lrp_networks.ea_s);
++ for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
++ const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
++ for (size_t k = 0; k < op->od->n_router_ports; k++) {
++ /* Get the Logical_Router_Port that the
++ * Logical_Switch_Port is connected to, as
++ * 'peer'. */
++ const char *peer_name = smap_get(
++ &op->od->router_ports[k]->nbsp->options,
++ "router-port");
++ if (!peer_name) {
++ continue;
++ }
+
+- ds_clear(&match);
+- ds_put_format(&match,
+- "eth.dst == "ETH_ADDR_FMT" && inport == %s"
+- " && is_chassis_resident(\"%s\")",
+- ETH_ADDR_ARGS(mac),
+- od->l3dgw_port->json_key,
+- nat->logical_port);
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ADMISSION, 50,
+- ds_cstr(&match), ds_cstr(&actions),
+- &nat->header_);
+- }
++ struct ovn_port *peer = ovn_port_find(ports, peer_name);
++ if (!peer || !peer->nbrp) {
++ continue;
++ }
+
+- /* Ingress Gateway Redirect Table: For NAT on a distributed
+- * router, add flows that are specific to a NAT rule. These
+- * flows indicate the presence of an applicable NAT rule that
+- * can be applied in a distributed manner.
+- * In particulr REG_SRC_IPV4/REG_SRC_IPV6 and eth.src are set to
+- * NAT external IP and NAT external mac so the ARP request
+- * generated in the following stage is sent out with proper IP/MAC
+- * src addresses.
+- */
+- if (distributed) {
+- ds_clear(&match);
+- ds_clear(&actions);
+- ds_put_format(&match,
+- "ip%s.src == %s && outport == %s && "
+- "is_chassis_resident(\"%s\")",
+- is_v6 ? "6" : "4", nat->logical_ip,
+- od->l3dgw_port->json_key, nat->logical_port);
+- ds_put_format(&actions, "eth.src = %s; %s = %s; next;",
+- nat->external_mac,
+- is_v6 ? REG_SRC_IPV6 : REG_SRC_IPV4,
+- nat->external_ip);
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT,
+- 100, ds_cstr(&match),
+- ds_cstr(&actions), &nat->header_);
+- }
++ if (!find_lrp_member_ip(peer, ip_s)) {
++ continue;
++ }
+
+- /* Egress Loopback table: For NAT on a distributed router.
+- * If packets in the egress pipeline on the distributed
+- * gateway port have ip.dst matching a NAT external IP, then
+- * loop a clone of the packet back to the beginning of the
+- * ingress pipeline with inport = outport. */
+- if (od->l3dgw_port) {
+- /* Distributed router. */
+- ds_clear(&match);
+- ds_put_format(&match, "ip%s.dst == %s && outport == %s",
+- is_v6 ? "6" : "4",
+- nat->external_ip,
+- od->l3dgw_port->json_key);
+- if (!distributed) {
+- ds_put_format(&match, " && is_chassis_resident(%s)",
+- od->l3redirect_port->json_key);
+- } else {
+- ds_put_format(&match, " && is_chassis_resident(\"%s\")",
+- nat->logical_port);
+- }
++ ds_clear(match);
++ ds_put_format(match, "outport == %s && "
++ REG_NEXT_HOP_IPV6 " == %s",
++ peer->json_key, ip_s);
+
+- ds_clear(&actions);
+- ds_put_format(&actions,
+- "clone { ct_clear; "
+- "inport = outport; outport = \"\"; "
+- "flags = 0; flags.loopback = 1; ");
+- for (int j = 0; j < MFF_N_LOG_REGS; j++) {
+- ds_put_format(&actions, "reg%d = 0; ", j);
++ ds_clear(actions);
++ ds_put_format(actions, "eth.dst = %s; next;", ea_s);
++ ovn_lflow_add_with_hint(lflows, peer->od,
++ S_ROUTER_IN_ARP_RESOLVE, 100,
++ ds_cstr(match),
++ ds_cstr(actions),
++ &op->nbsp->header_);
+ }
+- ds_put_format(&actions, REGBIT_EGRESS_LOOPBACK" = 1; "
+- "next(pipeline=ingress, table=%d); };",
+- ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100,
+- ds_cstr(&match), ds_cstr(&actions),
+- &nat->header_);
+ }
+ }
++ } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp)
++ && !strcmp(op->nbsp->type, "virtual")) {
++ /* This is a virtual port. Add ARP replies for the virtual ip with
++ * the mac of the present active virtual parent.
++ * If the logical port doesn't have virtual parent set in
++ * Port_Binding table, then add the flow to set eth.dst to
++ * 00:00:00:00:00:00 and advance to next table so that ARP is
++ * resolved by router pipeline using the arp{} action.
++ * The MAC_Binding entry for the virtual ip might be invalid. */
++ ovs_be32 ip;
+
+- /* Handle force SNAT options set in the gateway router. */
+- if (!od->l3dgw_port) {
+- if (dnat_force_snat_ip) {
+- if (od->dnat_force_snat_addrs.n_ipv4_addrs) {
+- build_lrouter_force_snat_flows(lflows, od, "4",
+- od->dnat_force_snat_addrs.ipv4_addrs[0].addr_s,
+- "dnat");
+- }
+- if (od->dnat_force_snat_addrs.n_ipv6_addrs) {
+- build_lrouter_force_snat_flows(lflows, od, "6",
+- od->dnat_force_snat_addrs.ipv6_addrs[0].addr_s,
+- "dnat");
+- }
+- }
+- if (lb_force_snat_ip) {
+- if (od->lb_force_snat_addrs.n_ipv4_addrs) {
+- build_lrouter_force_snat_flows(lflows, od, "4",
+- od->lb_force_snat_addrs.ipv4_addrs[0].addr_s, "lb");
+- }
+- if (od->lb_force_snat_addrs.n_ipv6_addrs) {
+- build_lrouter_force_snat_flows(lflows, od, "6",
+- od->lb_force_snat_addrs.ipv6_addrs[0].addr_s, "lb");
+- }
+- }
+-
+- /* For gateway router, re-circulate every packet through
+- * the DNAT zone. This helps with the following.
+- *
+- * Any packet that needs to be unDNATed in the reverse
+- * direction gets unDNATed. Ideally this could be done in
+- * the egress pipeline. But since the gateway router
+- * does not have any feature that depends on the source
+- * ip address being external IP address for IP routing,
+- * we can do it here, saving a future re-circulation. */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
+- "ip", "flags.loopback = 1; ct_dnat;");
++ const char *vip = smap_get(&op->nbsp->options,
++ "virtual-ip");
++ const char *virtual_parents = smap_get(&op->nbsp->options,
++ "virtual-parents");
++ if (!vip || !virtual_parents ||
++ !ip_parse(vip, &ip) || !op->sb) {
++ return;
+ }
+
+- /* Load balancing and packet defrag are only valid on
+- * Gateway routers or router with gateway port. */
+- if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
+- sset_destroy(&nat_entries);
+- continue;
+- }
++ if (!op->sb->virtual_parent || !op->sb->virtual_parent[0] ||
++ !op->sb->chassis) {
++ /* The virtual port is not claimed yet. */
++ for (size_t i = 0; i < op->od->n_router_ports; i++) {
++ const char *peer_name = smap_get(
++ &op->od->router_ports[i]->nbsp->options,
++ "router-port");
++ if (!peer_name) {
++ continue;
++ }
+
+- /* A set to hold all ips that need defragmentation and tracking. */
+- struct sset all_ips = SSET_INITIALIZER(&all_ips);
++ struct ovn_port *peer = ovn_port_find(ports, peer_name);
++ if (!peer || !peer->nbrp) {
++ continue;
++ }
+
+- for (int i = 0; i < od->nbr->n_load_balancer; i++) {
+- struct nbrec_load_balancer *nb_lb = od->nbr->load_balancer[i];
+- struct ovn_northd_lb *lb =
+- ovn_northd_lb_find(lbs, &nb_lb->header_.uuid);
+- ovs_assert(lb);
++ if (find_lrp_member_ip(peer, vip)) {
++ ds_clear(match);
++ ds_put_format(match, "outport == %s && "
++ REG_NEXT_HOP_IPV4 " == %s",
++ peer->json_key, vip);
+
+- for (size_t j = 0; j < lb->n_vips; j++) {
+- struct ovn_lb_vip *lb_vip = &lb->vips[j];
+- struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[j];
+- ds_clear(&actions);
+- build_lb_vip_ct_lb_actions(lb_vip, lb_vip_nb, &actions,
+- lb->selection_fields);
++ const char *arp_actions =
++ "eth.dst = 00:00:00:00:00:00; next;";
++ ovn_lflow_add_with_hint(lflows, peer->od,
++ S_ROUTER_IN_ARP_RESOLVE, 100,
++ ds_cstr(match),
++ arp_actions,
++ &op->nbsp->header_);
++ break;
++ }
++ }
++ } else {
++ struct ovn_port *vp =
++ ovn_port_find(ports, op->sb->virtual_parent);
++ if (!vp || !vp->nbsp) {
++ return;
++ }
+
+- if (!sset_contains(&all_ips, lb_vip->vip_str)) {
+- sset_add(&all_ips, lb_vip->vip_str);
+- /* If there are any load balancing rules, we should send
+- * the packet to conntrack for defragmentation and
+- * tracking. This helps with two things.
+- *
+- * 1. With tracking, we can send only new connections to
+- * pick a DNAT ip address from a group.
+- * 2. If there are L4 ports in load balancing rules, we
+- * need the defragmentation to match on L4 ports. */
+- ds_clear(&match);
+- if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) {
+- ds_put_format(&match, "ip && ip4.dst == %s",
+- lb_vip->vip_str);
+- } else {
+- ds_put_format(&match, "ip && ip6.dst == %s",
+- lb_vip->vip_str);
++ for (size_t i = 0; i < vp->n_lsp_addrs; i++) {
++ bool found_vip_network = false;
++ const char *ea_s = vp->lsp_addrs[i].ea_s;
++ for (size_t j = 0; j < vp->od->n_router_ports; j++) {
++ /* Get the Logical_Router_Port that the
++ * Logical_Switch_Port is connected to, as
++ * 'peer'. */
++ const char *peer_name = smap_get(
++ &vp->od->router_ports[j]->nbsp->options,
++ "router-port");
++ if (!peer_name) {
++ continue;
+ }
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG,
+- 100, ds_cstr(&match), "ct_next;",
+- &nb_lb->header_);
+- }
+
+- /* Higher priority rules are added for load-balancing in DNAT
+- * table. For every match (on a VIP[:port]), we add two flows
+- * via add_router_lb_flow(). One flow is for specific matching
+- * on ct.new with an action of "ct_lb($targets);". The other
+- * flow is for ct.est with an action of "ct_dnat;". */
+- ds_clear(&match);
+- if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) {
+- ds_put_format(&match, "ip && ip4.dst == %s",
+- lb_vip->vip_str);
+- } else {
+- ds_put_format(&match, "ip && ip6.dst == %s",
+- lb_vip->vip_str);
+- }
++ struct ovn_port *peer =
++ ovn_port_find(ports, peer_name);
++ if (!peer || !peer->nbrp) {
++ continue;
++ }
+
+- int prio = 110;
+- bool is_udp = nullable_string_is_equal(nb_lb->protocol, "udp");
+- bool is_sctp = nullable_string_is_equal(nb_lb->protocol,
+- "sctp");
+- const char *proto = is_udp ? "udp" : is_sctp ? "sctp" : "tcp";
++ if (!find_lrp_member_ip(peer, vip)) {
++ continue;
++ }
+
+- if (lb_vip->vip_port) {
+- ds_put_format(&match, " && %s && %s.dst == %d", proto,
+- proto, lb_vip->vip_port);
+- prio = 120;
++ ds_clear(match);
++ ds_put_format(match, "outport == %s && "
++ REG_NEXT_HOP_IPV4 " == %s",
++ peer->json_key, vip);
++
++ ds_clear(actions);
++ ds_put_format(actions, "eth.dst = %s; next;", ea_s);
++ ovn_lflow_add_with_hint(lflows, peer->od,
++ S_ROUTER_IN_ARP_RESOLVE, 100,
++ ds_cstr(match),
++ ds_cstr(actions),
++ &op->nbsp->header_);
++ found_vip_network = true;
++ break;
+ }
+
+- if (od->l3redirect_port) {
+- ds_put_format(&match, " && is_chassis_resident(%s)",
+- od->l3redirect_port->json_key);
++ if (found_vip_network) {
++ break;
+ }
+- add_router_lb_flow(lflows, od, &match, &actions, prio,
+- lb_force_snat_ip, lb_vip, proto,
+- nb_lb, meter_groups, &nat_entries);
+ }
+ }
+- sset_destroy(&all_ips);
+- sset_destroy(&nat_entries);
+- }
+-
+- ds_destroy(&match);
+- ds_destroy(&actions);
+-}
++ } else if (lsp_is_router(op->nbsp)) {
++ /* This is a logical switch port that connects to a router. */
+
+-/* Logical router ingress Table 0: L2 Admission Control
+- * Generic admission control flows (without inport check).
+- */
+-static void
+-build_adm_ctrl_flows_for_lrouter(
+- struct ovn_datapath *od, struct hmap *lflows)
+-{
+- if (od->nbr) {
+- /* Logical VLANs not supported.
+- * Broadcast/multicast source address is invalid. */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
+- "vlan.present || eth.src[40]", "drop;");
+- }
+-}
++ /* The peer of this switch port is the router port for which
++ * we need to add logical flows such that it can resolve
++ * ARP entries for all the other router ports connected to
++ * the switch in question. */
+
+-/* Logical router ingress Table 0: L2 Admission Control
+- * This table drops packets that the router shouldn’t see at all based
+- * on their Ethernet headers.
+- */
+-static void
+-build_adm_ctrl_flows_for_lrouter_port(
+- struct ovn_port *op, struct hmap *lflows,
+- struct ds *match, struct ds *actions)
+-{
+- if (op->nbrp) {
+- if (!lrport_is_enabled(op->nbrp)) {
+- /* Drop packets from disabled logical ports (since logical flow
+- * tables are default-drop). */
++ const char *peer_name = smap_get(&op->nbsp->options,
++ "router-port");
++ if (!peer_name) {
+ return;
+ }
+
+- if (op->derived) {
+- /* No ingress packets should be received on a chassisredirect
+- * port. */
++ struct ovn_port *peer = ovn_port_find(ports, peer_name);
++ if (!peer || !peer->nbrp) {
+ return;
+ }
+
+- /* Store the ethernet address of the port receiving the packet.
+- * This will save us from having to match on inport further down in
+- * the pipeline.
+- */
+- ds_clear(actions);
+- ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
+- op->lrp_networks.ea_s);
+-
+- ds_clear(match);
+- ds_put_format(match, "eth.mcast && inport == %s", op->json_key);
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
+- ds_cstr(match), ds_cstr(actions),
+- &op->nbrp->header_);
+-
+- ds_clear(match);
+- ds_put_format(match, "eth.dst == %s && inport == %s",
+- op->lrp_networks.ea_s, op->json_key);
+- if (op->od->l3dgw_port && op == op->od->l3dgw_port
+- && op->od->l3redirect_port) {
+- /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s
+- * should only be received on the gateway chassis. */
+- ds_put_format(match, " && is_chassis_resident(%s)",
+- op->od->l3redirect_port->json_key);
++ if (peer->od->nbr &&
++ smap_get_bool(&peer->od->nbr->options,
++ "dynamic_neigh_routers", false)) {
++ return;
+ }
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
+- ds_cstr(match), ds_cstr(actions),
+- &op->nbrp->header_);
+- }
+-}
+-
+-
+-/* Logical router ingress Table 1 and 2: Neighbor lookup and learning
+- * lflows for logical routers. */
+-static void
+-build_neigh_learning_flows_for_lrouter(
+- struct ovn_datapath *od, struct hmap *lflows,
+- struct ds *match, struct ds *actions)
+-{
+- if (od->nbr) {
+-
+- /* Learn MAC bindings from ARP/IPv6 ND.
+- *
+- * For ARP packets, table LOOKUP_NEIGHBOR does a lookup for the
+- * (arp.spa, arp.sha) in the mac binding table using the 'lookup_arp'
+- * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_RESULT bit.
+- * If "always_learn_from_arp_request" is set to false, it will also
+- * lookup for the (arp.spa) in the mac binding table using the
+- * "lookup_arp_ip" action for ARP request packets, and stores the
+- * result in REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit; or set that bit
+- * to "1" directly for ARP response packets.
+- *
+- * For IPv6 ND NA packets, table LOOKUP_NEIGHBOR does a lookup
+- * for the (nd.target, nd.tll) in the mac binding table using the
+- * 'lookup_nd' action and stores the result in
+- * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. If
+- * "always_learn_from_arp_request" is set to false,
+- * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit is set.
+- *
+- * For IPv6 ND NS packets, table LOOKUP_NEIGHBOR does a lookup
+- * for the (ip6.src, nd.sll) in the mac binding table using the
+- * 'lookup_nd' action and stores the result in
+- * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. If
+- * "always_learn_from_arp_request" is set to false, it will also lookup
+- * for the (ip6.src) in the mac binding table using the "lookup_nd_ip"
+- * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
+- * bit.
+- *
+- * Table LEARN_NEIGHBOR learns the mac-binding using the action
+- * - 'put_arp/put_nd'. Learning mac-binding is skipped if
+- * REGBIT_LOOKUP_NEIGHBOR_RESULT bit is set or
+- * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT is not set.
+- *
+- * */
+-
+- /* Flows for LOOKUP_NEIGHBOR. */
+- bool learn_from_arp_request = smap_get_bool(&od->nbr->options,
+- "always_learn_from_arp_request", true);
+- ds_clear(actions);
+- ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
+- " = lookup_arp(inport, arp.spa, arp.sha); %snext;",
+- learn_from_arp_request ? "" :
+- REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100,
+- "arp.op == 2", ds_cstr(actions));
+-
+- ds_clear(actions);
+- ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
+- " = lookup_nd(inport, nd.target, nd.tll); %snext;",
+- learn_from_arp_request ? "" :
+- REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_na",
+- ds_cstr(actions));
+-
+- ds_clear(actions);
+- ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
+- " = lookup_nd(inport, ip6.src, nd.sll); %snext;",
+- learn_from_arp_request ? "" :
+- REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
+- " = lookup_nd_ip(inport, ip6.src); ");
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_ns",
+- ds_cstr(actions));
+-
+- /* For other packet types, we can skip neighbor learning.
+- * So set REGBIT_LOOKUP_NEIGHBOR_RESULT to 1. */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 0, "1",
+- REGBIT_LOOKUP_NEIGHBOR_RESULT" = 1; next;");
+-
+- /* Flows for LEARN_NEIGHBOR. */
+- /* Skip Neighbor learning if not required. */
+- ds_clear(match);
+- ds_put_format(match, REGBIT_LOOKUP_NEIGHBOR_RESULT" == 1%s",
+- learn_from_arp_request ? "" :
+- " || "REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" == 0");
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 100,
+- ds_cstr(match), "next;");
+-
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
+- "arp", "put_arp(inport, arp.spa, arp.sha); next;");
+
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
+- "nd_na", "put_nd(inport, nd.target, nd.tll); next;");
+-
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
+- "nd_ns", "put_nd(inport, ip6.src, nd.sll); next;");
+- }
++ for (size_t i = 0; i < op->od->n_router_ports; i++) {
++ const char *router_port_name = smap_get(
++ &op->od->router_ports[i]->nbsp->options,
++ "router-port");
++ struct ovn_port *router_port = ovn_port_find(ports,
++ router_port_name);
++ if (!router_port || !router_port->nbrp) {
++ continue;
++ }
+
+-}
++ /* Skip the router port under consideration. */
++ if (router_port == peer) {
++ continue;
++ }
+
+-/* Logical router ingress Table 1: Neighbor lookup lflows
+- * for logical router ports. */
+-static void
+-build_neigh_learning_flows_for_lrouter_port(
+- struct ovn_port *op, struct hmap *lflows,
+- struct ds *match, struct ds *actions)
+-{
+- if (op->nbrp) {
++ if (router_port->lrp_networks.n_ipv4_addrs) {
++ ds_clear(match);
++ ds_put_format(match, "outport == %s && "
++ REG_NEXT_HOP_IPV4 " == ",
++ peer->json_key);
++ op_put_v4_networks(match, router_port, false);
+
+- bool learn_from_arp_request = smap_get_bool(&op->od->nbr->options,
+- "always_learn_from_arp_request", true);
++ ds_clear(actions);
++ ds_put_format(actions, "eth.dst = %s; next;",
++ router_port->lrp_networks.ea_s);
++ ovn_lflow_add_with_hint(lflows, peer->od,
++ S_ROUTER_IN_ARP_RESOLVE, 100,
++ ds_cstr(match), ds_cstr(actions),
++ &op->nbsp->header_);
++ }
+
+- /* Check if we need to learn mac-binding from ARP requests. */
+- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+- if (!learn_from_arp_request) {
+- /* ARP request to this address should always get learned,
+- * so add a priority-110 flow to set
+- * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT to 1. */
++ if (router_port->lrp_networks.n_ipv6_addrs) {
+ ds_clear(match);
+- ds_put_format(match,
+- "inport == %s && arp.spa == %s/%u && "
+- "arp.tpa == %s && arp.op == 1",
+- op->json_key,
+- op->lrp_networks.ipv4_addrs[i].network_s,
+- op->lrp_networks.ipv4_addrs[i].plen,
+- op->lrp_networks.ipv4_addrs[i].addr_s);
+- if (op->od->l3dgw_port && op == op->od->l3dgw_port
+- && op->od->l3redirect_port) {
+- ds_put_format(match, " && is_chassis_resident(%s)",
+- op->od->l3redirect_port->json_key);
+- }
+- const char *actions_s = REGBIT_LOOKUP_NEIGHBOR_RESULT
+- " = lookup_arp(inport, arp.spa, arp.sha); "
+- REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1;"
+- " next;";
+- ovn_lflow_add_with_hint(lflows, op->od,
+- S_ROUTER_IN_LOOKUP_NEIGHBOR, 110,
+- ds_cstr(match), actions_s,
+- &op->nbrp->header_);
+- }
+- ds_clear(match);
+- ds_put_format(match,
+- "inport == %s && arp.spa == %s/%u && arp.op == 1",
+- op->json_key,
+- op->lrp_networks.ipv4_addrs[i].network_s,
+- op->lrp_networks.ipv4_addrs[i].plen);
+- if (op->od->l3dgw_port && op == op->od->l3dgw_port
+- && op->od->l3redirect_port) {
+- ds_put_format(match, " && is_chassis_resident(%s)",
+- op->od->l3redirect_port->json_key);
++ ds_put_format(match, "outport == %s && "
++ REG_NEXT_HOP_IPV6 " == ",
++ peer->json_key);
++ op_put_v6_networks(match, router_port);
++
++ ds_clear(actions);
++ ds_put_format(actions, "eth.dst = %s; next;",
++ router_port->lrp_networks.ea_s);
++ ovn_lflow_add_with_hint(lflows, peer->od,
++ S_ROUTER_IN_ARP_RESOLVE, 100,
++ ds_cstr(match), ds_cstr(actions),
++ &op->nbsp->header_);
+ }
+- ds_clear(actions);
+- ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
+- " = lookup_arp(inport, arp.spa, arp.sha); %snext;",
+- learn_from_arp_request ? "" :
+- REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
+- " = lookup_arp_ip(inport, arp.spa); ");
+- ovn_lflow_add_with_hint(lflows, op->od,
+- S_ROUTER_IN_LOOKUP_NEIGHBOR, 100,
+- ds_cstr(match), ds_cstr(actions),
+- &op->nbrp->header_);
+ }
+ }
++
+ }
+
+-/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: IPv6 Router
+- * Adv (RA) options and response. */
++/* Local router ingress table CHK_PKT_LEN: Check packet length.
++ *
++ * Any IPv4 packet with outport set to the distributed gateway
++ * router port, check the packet length and store the result in the
++ * 'REGBIT_PKT_LARGER' register bit.
++ *
++ * Local router ingress table LARGER_PKTS: Handle larger packets.
++ *
++ * Any IPv4 packet with outport set to the distributed gateway
++ * router port and the 'REGBIT_PKT_LARGER' register bit is set,
++ * generate ICMPv4 packet with type 3 (Destination Unreachable) and
++ * code 4 (Fragmentation needed).
++ * */
+ static void
+-build_ND_RA_flows_for_lrouter_port(
+- struct ovn_port *op, struct hmap *lflows,
++build_check_pkt_len_flows_for_lrouter(
++ struct ovn_datapath *od, struct hmap *lflows,
++ struct hmap *ports,
+ struct ds *match, struct ds *actions)
+ {
+- if (!op->nbrp || op->nbrp->peer || !op->peer) {
+- return;
+- }
+-
+- if (!op->lrp_networks.n_ipv6_addrs) {
+- return;
+- }
+-
+- struct smap options;
+- smap_clone(&options, &op->sb->options);
+-
+- /* enable IPv6 prefix delegation */
+- bool prefix_delegation = smap_get_bool(&op->nbrp->options,
+- "prefix_delegation", false);
+- if (!lrport_is_enabled(op->nbrp)) {
+- prefix_delegation = false;
+- }
+- smap_add(&options, "ipv6_prefix_delegation",
+- prefix_delegation ? "true" : "false");
++ if (od->nbr) {
+
+- bool ipv6_prefix = smap_get_bool(&op->nbrp->options,
+- "prefix", false);
+- if (!lrport_is_enabled(op->nbrp)) {
+- ipv6_prefix = false;
+- }
+- smap_add(&options, "ipv6_prefix",
+- ipv6_prefix ? "true" : "false");
+- sbrec_port_binding_set_options(op->sb, &options);
++ /* Packets are allowed by default. */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 0, "1",
++ "next;");
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_LARGER_PKTS, 0, "1",
++ "next;");
+
+- smap_destroy(&options);
++ if (od->l3dgw_port && od->l3redirect_port) {
++ int gw_mtu = 0;
++ if (od->l3dgw_port->nbrp) {
++ gw_mtu = smap_get_int(&od->l3dgw_port->nbrp->options,
++ "gateway_mtu", 0);
++ }
++ /* Add the flows only if gateway_mtu is configured. */
++ if (gw_mtu <= 0) {
++ return;
++ }
+
+- const char *address_mode = smap_get(
+- &op->nbrp->ipv6_ra_configs, "address_mode");
++ ds_clear(match);
++ ds_put_format(match, "outport == %s", od->l3dgw_port->json_key);
+
+- if (!address_mode) {
+- return;
+- }
+- if (strcmp(address_mode, "slaac") &&
+- strcmp(address_mode, "dhcpv6_stateful") &&
+- strcmp(address_mode, "dhcpv6_stateless")) {
+- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+- VLOG_WARN_RL(&rl, "Invalid address mode [%s] defined",
+- address_mode);
+- return;
+- }
++ ds_clear(actions);
++ ds_put_format(actions,
++ REGBIT_PKT_LARGER" = check_pkt_larger(%d);"
++ " next;", gw_mtu + VLAN_ETH_HEADER_LEN);
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 50,
++ ds_cstr(match), ds_cstr(actions),
++ &od->l3dgw_port->nbrp->header_);
+
+- if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic",
+- false)) {
+- copy_ra_to_sb(op, address_mode);
+- }
++ for (size_t i = 0; i < od->nbr->n_ports; i++) {
++ struct ovn_port *rp = ovn_port_find(ports,
++ od->nbr->ports[i]->name);
++ if (!rp || rp == od->l3dgw_port) {
++ continue;
++ }
+
+- ds_clear(match);
+- ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
+- op->json_key);
+- ds_clear(actions);
++ if (rp->lrp_networks.ipv4_addrs) {
++ ds_clear(match);
++ ds_put_format(match, "inport == %s && outport == %s"
++ " && ip4 && "REGBIT_PKT_LARGER,
++ rp->json_key, od->l3dgw_port->json_key);
+
+- const char *mtu_s = smap_get(
+- &op->nbrp->ipv6_ra_configs, "mtu");
++ ds_clear(actions);
++ /* Set icmp4.frag_mtu to gw_mtu */
++ ds_put_format(actions,
++ "icmp4_error {"
++ REGBIT_EGRESS_LOOPBACK" = 1; "
++ "eth.dst = %s; "
++ "ip4.dst = ip4.src; "
++ "ip4.src = %s; "
++ "ip.ttl = 255; "
++ "icmp4.type = 3; /* Destination Unreachable. */ "
++ "icmp4.code = 4; /* Frag Needed and DF was Set. */ "
++ "icmp4.frag_mtu = %d; "
++ "next(pipeline=ingress, table=%d); };",
++ rp->lrp_networks.ea_s,
++ rp->lrp_networks.ipv4_addrs[0].addr_s,
++ gw_mtu,
++ ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
++ ovn_lflow_add_with_hint(lflows, od,
++ S_ROUTER_IN_LARGER_PKTS, 50,
++ ds_cstr(match), ds_cstr(actions),
++ &rp->nbrp->header_);
++ }
+
+- /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
+- uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0;
++ if (rp->lrp_networks.ipv6_addrs) {
++ ds_clear(match);
++ ds_put_format(match, "inport == %s && outport == %s"
++ " && ip6 && "REGBIT_PKT_LARGER,
++ rp->json_key, od->l3dgw_port->json_key);
+
+- ds_put_format(actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts("
+- "addr_mode = \"%s\", slla = %s",
+- address_mode, op->lrp_networks.ea_s);
+- if (mtu > 0) {
+- ds_put_format(actions, ", mtu = %u", mtu);
++ ds_clear(actions);
++ /* Set icmp6.frag_mtu to gw_mtu */
++ ds_put_format(actions,
++ "icmp6_error {"
++ REGBIT_EGRESS_LOOPBACK" = 1; "
++ "eth.dst = %s; "
++ "ip6.dst = ip6.src; "
++ "ip6.src = %s; "
++ "ip.ttl = 255; "
++ "icmp6.type = 2; /* Packet Too Big. */ "
++ "icmp6.code = 0; "
++ "icmp6.frag_mtu = %d; "
++ "next(pipeline=ingress, table=%d); };",
++ rp->lrp_networks.ea_s,
++ rp->lrp_networks.ipv6_addrs[0].addr_s,
++ gw_mtu,
++ ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
++ ovn_lflow_add_with_hint(lflows, od,
++ S_ROUTER_IN_LARGER_PKTS, 50,
++ ds_cstr(match), ds_cstr(actions),
++ &rp->nbrp->header_);
++ }
++ }
++ }
+ }
++}
+
+- const char *prf = smap_get_def(
+- &op->nbrp->ipv6_ra_configs, "router_preference", "MEDIUM");
+- if (strcmp(prf, "MEDIUM")) {
+- ds_put_format(actions, ", router_preference = \"%s\"", prf);
+- }
++/* Logical router ingress table GW_REDIRECT: Gateway redirect.
++ *
++ * For traffic with outport equal to the l3dgw_port
++ * on a distributed router, this table redirects a subset
++ * of the traffic to the l3redirect_port which represents
++ * the central instance of the l3dgw_port.
++ */
++static void
++build_gateway_redirect_flows_for_lrouter(
++ struct ovn_datapath *od, struct hmap *lflows,
++ struct ds *match, struct ds *actions)
++{
++ if (od->nbr) {
++ if (od->l3dgw_port && od->l3redirect_port) {
++ const struct ovsdb_idl_row *stage_hint = NULL;
+
+- bool add_rs_response_flow = false;
++ if (od->l3dgw_port->nbrp) {
++ stage_hint = &od->l3dgw_port->nbrp->header_;
++ }
+
+- for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+- if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
+- continue;
++ /* For traffic with outport == l3dgw_port, if the
++ * packet did not match any higher priority redirect
++ * rule, then the traffic is redirected to the central
++ * instance of the l3dgw_port. */
++ ds_clear(match);
++ ds_put_format(match, "outport == %s",
++ od->l3dgw_port->json_key);
++ ds_clear(actions);
++ ds_put_format(actions, "outport = %s; next;",
++ od->l3redirect_port->json_key);
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50,
++ ds_cstr(match), ds_cstr(actions),
++ stage_hint);
+ }
+
+- ds_put_format(actions, ", prefix = %s/%u",
+- op->lrp_networks.ipv6_addrs[i].network_s,
+- op->lrp_networks.ipv6_addrs[i].plen);
+-
+- add_rs_response_flow = true;
+- }
+-
+- if (add_rs_response_flow) {
+- ds_put_cstr(actions, "); next;");
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS,
+- 50, ds_cstr(match), ds_cstr(actions),
+- &op->nbrp->header_);
+- ds_clear(actions);
+- ds_clear(match);
+- ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && "
+- "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key);
+-
+- char ip6_str[INET6_ADDRSTRLEN + 1];
+- struct in6_addr lla;
+- in6_generate_lla(op->lrp_networks.ea, &lla);
+- memset(ip6_str, 0, sizeof(ip6_str));
+- ipv6_string_mapped(ip6_str, &lla);
+- ds_put_format(actions, "eth.dst = eth.src; eth.src = %s; "
+- "ip6.dst = ip6.src; ip6.src = %s; "
+- "outport = inport; flags.loopback = 1; "
+- "output;",
+- op->lrp_networks.ea_s, ip6_str);
+- ovn_lflow_add_with_hint(lflows, op->od,
+- S_ROUTER_IN_ND_RA_RESPONSE, 50,
+- ds_cstr(match), ds_cstr(actions),
+- &op->nbrp->header_);
++ /* Packets are allowed by default. */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
+ }
+ }
+
+-/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: RS
+- * responder, by default goto next. (priority 0). */
++/* Local router ingress table ARP_REQUEST: ARP request.
++ *
++ * In the common case where the Ethernet destination has been resolved,
++ * this table outputs the packet (priority 0). Otherwise, it composes
++ * and sends an ARP/IPv6 NA request (priority 100). */
+ static void
+-build_ND_RA_flows_for_lrouter(struct ovn_datapath *od, struct hmap *lflows)
++build_arp_request_flows_for_lrouter(
++ struct ovn_datapath *od, struct hmap *lflows,
++ struct ds *match, struct ds *actions)
+ {
+ if (od->nbr) {
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;");
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;");
++ for (int i = 0; i < od->nbr->n_static_routes; i++) {
++ const struct nbrec_logical_router_static_route *route;
++
++ route = od->nbr->static_routes[i];
++ struct in6_addr gw_ip6;
++ unsigned int plen;
++ char *error = ipv6_parse_cidr(route->nexthop, &gw_ip6, &plen);
++ if (error || plen != 128) {
++ free(error);
++ continue;
++ }
++
++ ds_clear(match);
++ ds_put_format(match, "eth.dst == 00:00:00:00:00:00 && "
++ "ip6 && " REG_NEXT_HOP_IPV6 " == %s",
++ route->nexthop);
++ struct in6_addr sn_addr;
++ struct eth_addr eth_dst;
++ in6_addr_solicited_node(&sn_addr, &gw_ip6);
++ ipv6_multicast_to_ethernet(ð_dst, &sn_addr);
++
++ char sn_addr_s[INET6_ADDRSTRLEN + 1];
++ ipv6_string_mapped(sn_addr_s, &sn_addr);
++
++ ds_clear(actions);
++ ds_put_format(actions,
++ "nd_ns { "
++ "eth.dst = "ETH_ADDR_FMT"; "
++ "ip6.dst = %s; "
++ "nd.target = %s; "
++ "output; "
++ "};", ETH_ADDR_ARGS(eth_dst), sn_addr_s,
++ route->nexthop);
++
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ARP_REQUEST, 200,
++ ds_cstr(match), ds_cstr(actions),
++ &route->header_);
++ }
++
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
++ "eth.dst == 00:00:00:00:00:00 && ip4",
++ "arp { "
++ "eth.dst = ff:ff:ff:ff:ff:ff; "
++ "arp.spa = " REG_SRC_IPV4 "; "
++ "arp.tpa = " REG_NEXT_HOP_IPV4 "; "
++ "arp.op = 1; " /* ARP request */
++ "output; "
++ "};");
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
++ "eth.dst == 00:00:00:00:00:00 && ip6",
++ "nd_ns { "
++ "nd.target = " REG_NEXT_HOP_IPV6 "; "
++ "output; "
++ "};");
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
+ }
+ }
+
+-/* Logical router ingress table IP_ROUTING : IP Routing.
+- *
+- * A packet that arrives at this table is an IP packet that should be
+- * routed to the address in 'ip[46].dst'.
+- *
+- * For regular routes without ECMP, table IP_ROUTING sets outport to the
+- * correct output port, eth.src to the output port's MAC address, and
+- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
+- * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
+- * advances to the next table.
++/* Logical router egress table DELIVERY: Delivery (priority 100-110).
+ *
+- * For ECMP routes, i.e. multiple routes with same policy and prefix, table
+- * IP_ROUTING remembers ECMP group id and selects a member id, and advances
+- * to table IP_ROUTING_ECMP, which sets outport, eth.src and
+- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 for the selected ECMP member.
++ * Priority 100 rules deliver packets to enabled logical ports.
++ * Priority 110 rules match multicast packets and update the source
++ * mac before delivering to enabled logical ports. IP multicast traffic
++ * bypasses S_ROUTER_IN_IP_ROUTING route lookups.
+ */
+ static void
+-build_ip_routing_flows_for_lrouter_port(
+- struct ovn_port *op, struct hmap *lflows)
++build_egress_delivery_flows_for_lrouter_port(
++ struct ovn_port *op, struct hmap *lflows,
++ struct ds *match, struct ds *actions)
+ {
+ if (op->nbrp) {
++ if (!lrport_is_enabled(op->nbrp)) {
++ /* Drop packets to disabled logical ports (since logical flow
++ * tables are default-drop). */
++ return;
++ }
+
+- for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+- add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
+- op->lrp_networks.ipv4_addrs[i].network_s,
+- op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
+- &op->nbrp->header_);
++ if (op->derived) {
++ /* No egress packets should be processed in the context of
++ * a chassisredirect port. The chassisredirect port should
++ * be replaced by the l3dgw port in the local output
++ * pipeline stage before egress processing. */
++ return;
+ }
+
+- for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+- add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
+- op->lrp_networks.ipv6_addrs[i].network_s,
+- op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
+- &op->nbrp->header_);
++ /* If multicast relay is enabled then also adjust source mac for IP
++ * multicast traffic.
++ */
++ if (op->od->mcast_info.rtr.relay) {
++ ds_clear(match);
++ ds_clear(actions);
++ ds_put_format(match, "(ip4.mcast || ip6.mcast) && outport == %s",
++ op->json_key);
++ ds_put_format(actions, "eth.src = %s; output;",
++ op->lrp_networks.ea_s);
++ ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 110,
++ ds_cstr(match), ds_cstr(actions));
+ }
++
++ ds_clear(match);
++ ds_put_format(match, "outport == %s", op->json_key);
++ ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
++ ds_cstr(match), "output;");
++ }
++
++}
++
++static void
++build_misc_local_traffic_drop_flows_for_lrouter(
++ struct ovn_datapath *od, struct hmap *lflows)
++{
++ if (od->nbr) {
++ /* L3 admission control: drop multicast and broadcast source, localhost
++ * source or destination, and zero network source or destination
++ * (priority 100). */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
++ "ip4.src_mcast ||"
++ "ip4.src == 255.255.255.255 || "
++ "ip4.src == 127.0.0.0/8 || "
++ "ip4.dst == 127.0.0.0/8 || "
++ "ip4.src == 0.0.0.0/8 || "
++ "ip4.dst == 0.0.0.0/8",
++ "drop;");
++
++ /* Drop ARP packets (priority 85). ARP request packets for router's own
++ * IPs are handled with priority-90 flows.
++ * Drop IPv6 ND packets (priority 85). ND NA packets for router's own
++ * IPs are handled with priority-90 flows.
++ */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 85,
++ "arp || nd", "drop;");
++
++ /* Allow IPv6 multicast traffic that's supposed to reach the
++ * router pipeline (e.g., router solicitations).
++ */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 84, "nd_rs || nd_ra",
++ "next;");
++
++ /* Drop other reserved multicast. */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 83,
++ "ip6.mcast_rsvd", "drop;");
++
++ /* Allow other multicast if relay enabled (priority 82). */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 82,
++ "ip4.mcast || ip6.mcast",
++ od->mcast_info.rtr.relay ? "next;" : "drop;");
++
++ /* Drop Ethernet local broadcast. By definition this traffic should
++ * not be forwarded.*/
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
++ "eth.bcast", "drop;");
++
++ /* TTL discard */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
++ "ip4 && ip.ttl == {0, 1}", "drop;");
++
++ /* Pass other traffic not already handled to the next table for
++ * routing. */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
+ }
+ }
+
+ static void
+-build_static_route_flows_for_lrouter(
+- struct ovn_datapath *od, struct hmap *lflows,
+- struct hmap *ports)
++build_dhcpv6_reply_flows_for_lrouter_port(
++ struct ovn_port *op, struct hmap *lflows,
++ struct ds *match)
+ {
+- if (od->nbr) {
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, 150,
+- REG_ECMP_GROUP_ID" == 0", "next;");
+-
+- struct hmap ecmp_groups = HMAP_INITIALIZER(&ecmp_groups);
+- struct hmap unique_routes = HMAP_INITIALIZER(&unique_routes);
+- struct ovs_list parsed_routes = OVS_LIST_INITIALIZER(&parsed_routes);
+- struct ecmp_groups_node *group;
+- for (int i = 0; i < od->nbr->n_static_routes; i++) {
+- struct parsed_route *route =
+- parsed_routes_add(&parsed_routes, od->nbr->static_routes[i]);
+- if (!route) {
+- continue;
+- }
+- group = ecmp_groups_find(&ecmp_groups, route);
+- if (group) {
+- ecmp_groups_add_route(group, route);
+- } else {
+- const struct parsed_route *existed_route =
+- unique_routes_remove(&unique_routes, route);
+- if (existed_route) {
+- group = ecmp_groups_add(&ecmp_groups, existed_route);
+- if (group) {
+- ecmp_groups_add_route(group, route);
+- }
+- } else {
+- unique_routes_add(&unique_routes, route);
+- }
+- }
+- }
+- HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) {
+- /* add a flow in IP_ROUTING, and one flow for each member in
+- * IP_ROUTING_ECMP. */
+- build_ecmp_route_flow(lflows, od, ports, group);
+- }
+- const struct unique_routes_node *ur;
+- HMAP_FOR_EACH (ur, hmap_node, &unique_routes) {
+- build_static_route_flow(lflows, od, ports, ur->route);
++ if (op->nbrp && (!op->derived)) {
++ for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
++ ds_clear(match);
++ ds_put_format(match, "ip6.dst == %s && udp.src == 547 &&"
++ " udp.dst == 546",
++ op->lrp_networks.ipv6_addrs[i].addr_s);
++ ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
++ ds_cstr(match),
++ "reg0 = 0; handle_dhcpv6_reply;");
+ }
+- ecmp_groups_destroy(&ecmp_groups);
+- unique_routes_destroy(&unique_routes);
+- parsed_routes_destroy(&parsed_routes);
+ }
++
+ }
+
+-/* IP Multicast lookup. Here we set the output port, adjust TTL and
+- * advance to next table (priority 500).
+- */
+ static void
+-build_mcast_lookup_flows_for_lrouter(
+- struct ovn_datapath *od, struct hmap *lflows,
++build_ipv6_input_flows_for_lrouter_port(
++ struct ovn_port *op, struct hmap *lflows,
+ struct ds *match, struct ds *actions)
+ {
+- if (od->nbr) {
++ if (op->nbrp && (!op->derived)) {
++ /* No ingress packets are accepted on a chassisredirect
++ * port, so no need to program flows for that port. */
++ if (op->lrp_networks.n_ipv6_addrs) {
++ /* ICMPv6 echo reply. These flows reply to echo requests
++ * received for the router's IP address. */
++ ds_clear(match);
++ ds_put_cstr(match, "ip6.dst == ");
++ op_put_v6_networks(match, op);
++ ds_put_cstr(match, " && icmp6.type == 128 && icmp6.code == 0");
+
+- /* Drop IPv6 multicast traffic that shouldn't be forwarded,
+- * i.e., router solicitation and router advertisement.
+- */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
+- "nd_rs || nd_ra", "drop;");
+- if (!od->mcast_info.rtr.relay) {
+- return;
++ const char *lrp_actions =
++ "ip6.dst <-> ip6.src; "
++ "ip.ttl = 255; "
++ "icmp6.type = 129; "
++ "flags.loopback = 1; "
++ "next; ";
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
++ ds_cstr(match), lrp_actions,
++ &op->nbrp->header_);
+ }
+
+- struct ovn_igmp_group *igmp_group;
+-
+- LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) {
++ /* ND reply. These flows reply to ND solicitations for the
++ * router's own IP address. */
++ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+ ds_clear(match);
+- ds_clear(actions);
+- if (IN6_IS_ADDR_V4MAPPED(&igmp_group->address)) {
+- ds_put_format(match, "ip4 && ip4.dst == %s ",
+- igmp_group->mcgroup.name);
+- } else {
+- ds_put_format(match, "ip6 && ip6.dst == %s ",
+- igmp_group->mcgroup.name);
+- }
+- if (od->mcast_info.rtr.flood_static) {
+- ds_put_cstr(actions,
+- "clone { "
+- "outport = \""MC_STATIC"\"; "
+- "ip.ttl--; "
+- "next; "
+- "};");
++ if (op->od->l3dgw_port && op == op->od->l3dgw_port
++ && op->od->l3redirect_port) {
++ /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
++ * should only be sent from the gateway chassi, so that
++ * upstream MAC learning points to the gateway chassis.
++ * Also need to avoid generation of multiple ND replies
++ * from different chassis. */
++ ds_put_format(match, "is_chassis_resident(%s)",
++ op->od->l3redirect_port->json_key);
+ }
+- ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
+- igmp_group->mcgroup.name);
+- ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
+- ds_cstr(match), ds_cstr(actions));
++
++ build_lrouter_nd_flow(op->od, op, "nd_na_router",
++ op->lrp_networks.ipv6_addrs[i].addr_s,
++ op->lrp_networks.ipv6_addrs[i].sn_addr_s,
++ REG_INPORT_ETH_ADDR, match, false, 90,
++ &op->nbrp->header_, lflows);
+ }
+
+- /* If needed, flood unregistered multicast on statically configured
+- * ports. Otherwise drop any multicast traffic.
+- */
+- if (od->mcast_info.rtr.flood_static) {
+- ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
+- "ip4.mcast || ip6.mcast",
+- "clone { "
+- "outport = \""MC_STATIC"\"; "
+- "ip.ttl--; "
+- "next; "
+- "};");
+- } else {
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
+- "ip4.mcast || ip6.mcast", "drop;");
++ /* UDP/TCP/SCTP port unreachable */
++ if (!smap_get(&op->od->nbr->options, "chassis")
++ && !op->od->l3dgw_port) {
++ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
++ ds_clear(match);
++ ds_put_format(match,
++ "ip6 && ip6.dst == %s && !ip.later_frag && tcp",
++ op->lrp_networks.ipv6_addrs[i].addr_s);
++ const char *action = "tcp_reset {"
++ "eth.dst <-> eth.src; "
++ "ip6.dst <-> ip6.src; "
++ "next; };";
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
++ 80, ds_cstr(match), action,
++ &op->nbrp->header_);
++
++ ds_clear(match);
++ ds_put_format(match,
++ "ip6 && ip6.dst == %s && !ip.later_frag && sctp",
++ op->lrp_networks.ipv6_addrs[i].addr_s);
++ action = "sctp_abort {"
++ "eth.dst <-> eth.src; "
++ "ip6.dst <-> ip6.src; "
++ "next; };";
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
++ 80, ds_cstr(match), action,
++ &op->nbrp->header_);
++
++ ds_clear(match);
++ ds_put_format(match,
++ "ip6 && ip6.dst == %s && !ip.later_frag && udp",
++ op->lrp_networks.ipv6_addrs[i].addr_s);
++ action = "icmp6 {"
++ "eth.dst <-> eth.src; "
++ "ip6.dst <-> ip6.src; "
++ "ip.ttl = 255; "
++ "icmp6.type = 1; "
++ "icmp6.code = 4; "
++ "next; };";
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
++ 80, ds_cstr(match), action,
++ &op->nbrp->header_);
++
++ ds_clear(match);
++ ds_put_format(match,
++ "ip6 && ip6.dst == %s && !ip.later_frag",
++ op->lrp_networks.ipv6_addrs[i].addr_s);
++ action = "icmp6 {"
++ "eth.dst <-> eth.src; "
++ "ip6.dst <-> ip6.src; "
++ "ip.ttl = 255; "
++ "icmp6.type = 1; "
++ "icmp6.code = 3; "
++ "next; };";
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
++ 70, ds_cstr(match), action,
++ &op->nbrp->header_);
++ }
+ }
+- }
+-}
+
+-/* Logical router ingress table POLICY: Policy.
+- *
+- * A packet that arrives at this table is an IP packet that should be
+- * permitted/denied/rerouted to the address in the rule's nexthop.
+- * This table sets outport to the correct out_port,
+- * eth.src to the output port's MAC address,
+- * and REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
+- * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
+- * advances to the next table for ARP/ND resolution. */
+-static void
+-build_ingress_policy_flows_for_lrouter(
+- struct ovn_datapath *od, struct hmap *lflows,
+- struct hmap *ports)
+-{
+- if (od->nbr) {
+- /* This is a catch-all rule. It has the lowest priority (0)
+- * does a match-all("1") and pass-through (next) */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1", "next;");
++ /* ICMPv6 time exceeded */
++ for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
++ /* skip link-local address */
++ if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
++ continue;
++ }
+
+- /* Convert routing policies to flows. */
+- for (int i = 0; i < od->nbr->n_policies; i++) {
+- const struct nbrec_logical_router_policy *rule
+- = od->nbr->policies[i];
+- build_routing_policy_flow(lflows, od, ports, rule, &rule->header_);
++ ds_clear(match);
++ ds_clear(actions);
++
++ ds_put_format(match,
++ "inport == %s && ip6 && "
++ "ip6.src == %s/%d && "
++ "ip.ttl == {0, 1} && !ip.later_frag",
++ op->json_key,
++ op->lrp_networks.ipv6_addrs[i].network_s,
++ op->lrp_networks.ipv6_addrs[i].plen);
++ ds_put_format(actions,
++ "icmp6 {"
++ "eth.dst <-> eth.src; "
++ "ip6.dst = ip6.src; "
++ "ip6.src = %s; "
++ "ip.ttl = 255; "
++ "icmp6.type = 3; /* Time exceeded */ "
++ "icmp6.code = 0; /* TTL exceeded in transit */ "
++ "next; };",
++ op->lrp_networks.ipv6_addrs[i].addr_s);
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
++ ds_cstr(match), ds_cstr(actions),
++ &op->nbrp->header_);
+ }
+ }
++
+ }
+
+-/* Local router ingress table ARP_RESOLVE: ARP Resolution. */
+ static void
+-build_arp_resolve_flows_for_lrouter(
+- struct ovn_datapath *od, struct hmap *lflows)
++build_lrouter_arp_nd_for_datapath(struct ovn_datapath *od,
++ struct hmap *lflows)
+ {
+ if (od->nbr) {
+- /* Multicast packets already have the outport set so just advance to
+- * next table (priority 500). */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 500,
+- "ip4.mcast || ip6.mcast", "next;");
+
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
+- "get_arp(outport, " REG_NEXT_HOP_IPV4 "); next;");
++ /* Priority-90-92 flows handle ARP requests and ND packets. Most are
++ * per logical port but DNAT addresses can be handled per datapath
++ * for non gateway router ports.
++ *
++ * Priority 91 and 92 flows are added for each gateway router
++ * port to handle the special cases. In case we get the packet
++ * on a regular port, just reply with the port's ETH address.
++ */
++ for (int i = 0; i < od->nbr->n_nat; i++) {
++ struct ovn_nat *nat_entry = &od->nat_entries[i];
+
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
+- "get_nd(outport, " REG_NEXT_HOP_IPV6 "); next;");
++ /* Skip entries we failed to parse. */
++ if (!nat_entry_is_valid(nat_entry)) {
++ continue;
++ }
++
++ /* Skip SNAT entries for now, we handle unique SNAT IPs separately
++ * below.
++ */
++ if (!strcmp(nat_entry->nb->type, "snat")) {
++ continue;
++ }
++ build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
++ }
++
++ /* Now handle SNAT entries too, one per unique SNAT IP. */
++ struct shash_node *snat_snode;
++ SHASH_FOR_EACH (snat_snode, &od->snat_ips) {
++ struct ovn_snat_ip *snat_ip = snat_snode->data;
++
++ if (ovs_list_is_empty(&snat_ip->snat_entries)) {
++ continue;
++ }
++
++ struct ovn_nat *nat_entry =
++ CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
++ struct ovn_nat, ext_addr_list_node);
++ build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
++ }
+ }
+ }
+
+-/* Local router ingress table ARP_RESOLVE: ARP Resolution.
+- *
+- * Any unicast packet that reaches this table is an IP packet whose
+- * next-hop IP address is in REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6
+- * (ip4.dst/ipv6.dst is the final destination).
+- * This table resolves the IP address in
+- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 into an output port in outport and
+- * an Ethernet address in eth.dst.
+- */
++/* Logical router ingress table 3: IP Input for IPv4. */
+ static void
+-build_arp_resolve_flows_for_lrouter_port(
+- struct ovn_port *op, struct hmap *lflows,
+- struct hmap *ports,
+- struct ds *match, struct ds *actions)
++build_lrouter_ipv4_ip_input(struct ovn_port *op,
++ struct hmap *lflows,
++ struct ds *match, struct ds *actions)
+ {
+- if (op->nbsp && !lsp_is_enabled(op->nbsp)) {
+- return;
+- }
++ /* No ingress packets are accepted on a chassisredirect
++ * port, so no need to program flows for that port. */
++ if (op->nbrp && (!op->derived)) {
++ if (op->lrp_networks.n_ipv4_addrs) {
++ /* L3 admission control: drop packets that originate from an
++ * IPv4 address owned by the router or a broadcast address
++ * known to the router (priority 100). */
++ ds_clear(match);
++ ds_put_cstr(match, "ip4.src == ");
++ op_put_v4_networks(match, op, true);
++ ds_put_cstr(match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
++ ds_cstr(match), "drop;",
++ &op->nbrp->header_);
+
+- if (op->nbrp) {
+- /* This is a logical router port. If next-hop IP address in
+- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 matches IP address of this
+- * router port, then the packet is intended to eventually be sent
+- * to this logical port. Set the destination mac address using
+- * this port's mac address.
+- *
+- * The packet is still in peer's logical pipeline. So the match
+- * should be on peer's outport. */
+- if (op->peer && op->nbrp->peer) {
+- if (op->lrp_networks.n_ipv4_addrs) {
+- ds_clear(match);
+- ds_put_format(match, "outport == %s && "
+- REG_NEXT_HOP_IPV4 "== ",
+- op->peer->json_key);
+- op_put_v4_networks(match, op, false);
++ /* ICMP echo reply. These flows reply to ICMP echo requests
++ * received for the router's IP address. Since packets only
++ * get here as part of the logical router datapath, the inport
++ * (i.e. the incoming locally attached net) does not matter.
++ * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
++ ds_clear(match);
++ ds_put_cstr(match, "ip4.dst == ");
++ op_put_v4_networks(match, op, false);
++ ds_put_cstr(match, " && icmp4.type == 8 && icmp4.code == 0");
+
+- ds_clear(actions);
+- ds_put_format(actions, "eth.dst = %s; next;",
+- op->lrp_networks.ea_s);
+- ovn_lflow_add_with_hint(lflows, op->peer->od,
+- S_ROUTER_IN_ARP_RESOLVE, 100,
+- ds_cstr(match), ds_cstr(actions),
+- &op->nbrp->header_);
+- }
++ const char * icmp_actions = "ip4.dst <-> ip4.src; "
++ "ip.ttl = 255; "
++ "icmp4.type = 0; "
++ "flags.loopback = 1; "
++ "next; ";
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
++ ds_cstr(match), icmp_actions,
++ &op->nbrp->header_);
++ }
+
+- if (op->lrp_networks.n_ipv6_addrs) {
+- ds_clear(match);
+- ds_put_format(match, "outport == %s && "
+- REG_NEXT_HOP_IPV6 " == ",
+- op->peer->json_key);
+- op_put_v6_networks(match, op);
++ /* BFD msg handling */
++ build_lrouter_bfd_flows(lflows, op);
+
+- ds_clear(actions);
+- ds_put_format(actions, "eth.dst = %s; next;",
+- op->lrp_networks.ea_s);
+- ovn_lflow_add_with_hint(lflows, op->peer->od,
+- S_ROUTER_IN_ARP_RESOLVE, 100,
+- ds_cstr(match), ds_cstr(actions),
+- &op->nbrp->header_);
+- }
++ /* ICMP time exceeded */
++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
++ ds_clear(match);
++ ds_clear(actions);
++
++ ds_put_format(match,
++ "inport == %s && ip4 && "
++ "ip.ttl == {0, 1} && !ip.later_frag", op->json_key);
++ ds_put_format(actions,
++ "icmp4 {"
++ "eth.dst <-> eth.src; "
++ "icmp4.type = 11; /* Time exceeded */ "
++ "icmp4.code = 0; /* TTL exceeded in transit */ "
++ "ip4.dst = ip4.src; "
++ "ip4.src = %s; "
++ "ip.ttl = 255; "
++ "next; };",
++ op->lrp_networks.ipv4_addrs[i].addr_s);
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
++ ds_cstr(match), ds_cstr(actions),
++ &op->nbrp->header_);
+ }
+
+- if (!op->derived && op->od->l3redirect_port) {
+- const char *redirect_type = smap_get(&op->nbrp->options,
+- "redirect-type");
+- if (redirect_type && !strcasecmp(redirect_type, "bridged")) {
+- /* Packet is on a non gateway chassis and
+- * has an unresolved ARP on a network behind gateway
+- * chassis attached router port. Since, redirect type
+- * is "bridged", instead of calling "get_arp"
+- * on this node, we will redirect the packet to gateway
+- * chassis, by setting destination mac router port mac.*/
+- ds_clear(match);
+- ds_put_format(match, "outport == %s && "
+- "!is_chassis_resident(%s)", op->json_key,
+- op->od->l3redirect_port->json_key);
+- ds_clear(actions);
+- ds_put_format(actions, "eth.dst = %s; next;",
+- op->lrp_networks.ea_s);
++ /* ARP reply. These flows reply to ARP requests for the router's own
++ * IP address. */
++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
++ ds_clear(match);
++ ds_put_format(match, "arp.spa == %s/%u",
++ op->lrp_networks.ipv4_addrs[i].network_s,
++ op->lrp_networks.ipv4_addrs[i].plen);
+
+- ovn_lflow_add_with_hint(lflows, op->od,
+- S_ROUTER_IN_ARP_RESOLVE, 50,
+- ds_cstr(match), ds_cstr(actions),
+- &op->nbrp->header_);
+- }
+- }
++ if (op->od->l3dgw_port && op->od->l3redirect_port && op->peer
++ && op->peer->od->n_localnet_ports) {
++ bool add_chassis_resident_check = false;
++ if (op == op->od->l3dgw_port) {
++ /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
++ * should only be sent from the gateway chassis, so that
++ * upstream MAC learning points to the gateway chassis.
++ * Also need to avoid generation of multiple ARP responses
++ * from different chassis. */
++ add_chassis_resident_check = true;
++ } else {
++ /* Check if the option 'reside-on-redirect-chassis'
++ * is set to true on the router port. If set to true
++ * and if peer's logical switch has a localnet port, it
++ * means the router pipeline for the packets from
++ * peer's logical switch is be run on the chassis
++ * hosting the gateway port and it should reply to the
++ * ARP requests for the router port IPs.
++ */
++ add_chassis_resident_check = smap_get_bool(
++ &op->nbrp->options,
++ "reside-on-redirect-chassis", false);
++ }
+
+- /* Drop IP traffic destined to router owned IPs. Part of it is dropped
+- * in stage "lr_in_ip_input" but traffic that could have been unSNATed
+- * but didn't match any existing session might still end up here.
+- *
+- * Priority 1.
+- */
+- build_lrouter_drop_own_dest(op, S_ROUTER_IN_ARP_RESOLVE, 1, true,
+- lflows);
+- } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp)
+- && strcmp(op->nbsp->type, "virtual")) {
+- /* This is a logical switch port that backs a VM or a container.
+- * Extract its addresses. For each of the address, go through all
+- * the router ports attached to the switch (to which this port
+- * connects) and if the address in question is reachable from the
+- * router port, add an ARP/ND entry in that router's pipeline. */
++ if (add_chassis_resident_check) {
++ ds_put_format(match, " && is_chassis_resident(%s)",
++ op->od->l3redirect_port->json_key);
++ }
++ }
+
+- for (size_t i = 0; i < op->n_lsp_addrs; i++) {
+- const char *ea_s = op->lsp_addrs[i].ea_s;
+- for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
+- const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
+- for (size_t k = 0; k < op->od->n_router_ports; k++) {
+- /* Get the Logical_Router_Port that the
+- * Logical_Switch_Port is connected to, as
+- * 'peer'. */
+- const char *peer_name = smap_get(
+- &op->od->router_ports[k]->nbsp->options,
+- "router-port");
+- if (!peer_name) {
+- continue;
+- }
++ build_lrouter_arp_flow(op->od, op,
++ op->lrp_networks.ipv4_addrs[i].addr_s,
++ REG_INPORT_ETH_ADDR, match, false, 90,
++ &op->nbrp->header_, lflows);
++ }
+
+- struct ovn_port *peer = ovn_port_find(ports, peer_name);
+- if (!peer || !peer->nbrp) {
+- continue;
+- }
++ /* A set to hold all load-balancer vips that need ARP responses. */
++ struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
++ struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
++ get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6);
+
+- if (!find_lrp_member_ip(peer, ip_s)) {
+- continue;
+- }
++ const char *ip_address;
++ SSET_FOR_EACH (ip_address, &all_ips_v4) {
++ ds_clear(match);
++ if (op == op->od->l3dgw_port) {
++ ds_put_format(match, "is_chassis_resident(%s)",
++ op->od->l3redirect_port->json_key);
++ }
+
+- ds_clear(match);
+- ds_put_format(match, "outport == %s && "
+- REG_NEXT_HOP_IPV4 " == %s",
+- peer->json_key, ip_s);
++ build_lrouter_arp_flow(op->od, op,
++ ip_address, REG_INPORT_ETH_ADDR,
++ match, false, 90, NULL, lflows);
++ }
+
+- ds_clear(actions);
+- ds_put_format(actions, "eth.dst = %s; next;", ea_s);
+- ovn_lflow_add_with_hint(lflows, peer->od,
+- S_ROUTER_IN_ARP_RESOLVE, 100,
+- ds_cstr(match),
+- ds_cstr(actions),
+- &op->nbsp->header_);
+- }
++ SSET_FOR_EACH (ip_address, &all_ips_v6) {
++ ds_clear(match);
++ if (op == op->od->l3dgw_port) {
++ ds_put_format(match, "is_chassis_resident(%s)",
++ op->od->l3redirect_port->json_key);
+ }
+
+- for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
+- const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
+- for (size_t k = 0; k < op->od->n_router_ports; k++) {
+- /* Get the Logical_Router_Port that the
+- * Logical_Switch_Port is connected to, as
+- * 'peer'. */
+- const char *peer_name = smap_get(
+- &op->od->router_ports[k]->nbsp->options,
+- "router-port");
+- if (!peer_name) {
+- continue;
+- }
++ build_lrouter_nd_flow(op->od, op, "nd_na",
++ ip_address, NULL, REG_INPORT_ETH_ADDR,
++ match, false, 90, NULL, lflows);
++ }
+
+- struct ovn_port *peer = ovn_port_find(ports, peer_name);
+- if (!peer || !peer->nbrp) {
+- continue;
+- }
++ sset_destroy(&all_ips_v4);
++ sset_destroy(&all_ips_v6);
+
+- if (!find_lrp_member_ip(peer, ip_s)) {
+- continue;
+- }
++ if (!smap_get(&op->od->nbr->options, "chassis")
++ && !op->od->l3dgw_port) {
++ /* UDP/TCP/SCTP port unreachable. */
++ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
++ ds_clear(match);
++ ds_put_format(match,
++ "ip4 && ip4.dst == %s && !ip.later_frag && udp",
++ op->lrp_networks.ipv4_addrs[i].addr_s);
++ const char *action = "icmp4 {"
++ "eth.dst <-> eth.src; "
++ "ip4.dst <-> ip4.src; "
++ "ip.ttl = 255; "
++ "icmp4.type = 3; "
++ "icmp4.code = 3; "
++ "next; };";
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
++ 80, ds_cstr(match), action,
++ &op->nbrp->header_);
+
+- ds_clear(match);
+- ds_put_format(match, "outport == %s && "
+- REG_NEXT_HOP_IPV6 " == %s",
+- peer->json_key, ip_s);
++ ds_clear(match);
++ ds_put_format(match,
++ "ip4 && ip4.dst == %s && !ip.later_frag && tcp",
++ op->lrp_networks.ipv4_addrs[i].addr_s);
++ action = "tcp_reset {"
++ "eth.dst <-> eth.src; "
++ "ip4.dst <-> ip4.src; "
++ "next; };";
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
++ 80, ds_cstr(match), action,
++ &op->nbrp->header_);
+
+- ds_clear(actions);
+- ds_put_format(actions, "eth.dst = %s; next;", ea_s);
+- ovn_lflow_add_with_hint(lflows, peer->od,
+- S_ROUTER_IN_ARP_RESOLVE, 100,
+- ds_cstr(match),
+- ds_cstr(actions),
+- &op->nbsp->header_);
+- }
++ ds_clear(match);
++ ds_put_format(match,
++ "ip4 && ip4.dst == %s && !ip.later_frag && sctp",
++ op->lrp_networks.ipv4_addrs[i].addr_s);
++ action = "sctp_abort {"
++ "eth.dst <-> eth.src; "
++ "ip4.dst <-> ip4.src; "
++ "next; };";
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
++ 80, ds_cstr(match), action,
++ &op->nbrp->header_);
++
++ ds_clear(match);
++ ds_put_format(match,
++ "ip4 && ip4.dst == %s && !ip.later_frag",
++ op->lrp_networks.ipv4_addrs[i].addr_s);
++ action = "icmp4 {"
++ "eth.dst <-> eth.src; "
++ "ip4.dst <-> ip4.src; "
++ "ip.ttl = 255; "
++ "icmp4.type = 3; "
++ "icmp4.code = 2; "
++ "next; };";
++ ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
++ 70, ds_cstr(match), action,
++ &op->nbrp->header_);
+ }
+ }
+- } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp)
+- && !strcmp(op->nbsp->type, "virtual")) {
+- /* This is a virtual port. Add ARP replies for the virtual ip with
+- * the mac of the present active virtual parent.
+- * If the logical port doesn't have virtual parent set in
+- * Port_Binding table, then add the flow to set eth.dst to
+- * 00:00:00:00:00:00 and advance to next table so that ARP is
+- * resolved by router pipeline using the arp{} action.
+- * The MAC_Binding entry for the virtual ip might be invalid. */
+- ovs_be32 ip;
+
+- const char *vip = smap_get(&op->nbsp->options,
+- "virtual-ip");
+- const char *virtual_parents = smap_get(&op->nbsp->options,
+- "virtual-parents");
+- if (!vip || !virtual_parents ||
+- !ip_parse(vip, &ip) || !op->sb) {
++ /* Drop IP traffic destined to router owned IPs except if the IP is
++ * also a SNAT IP. Those are dropped later, in stage
++ * "lr_in_arp_resolve", if unSNAT was unsuccessful.
++ *
++ * If op->pd->lb_force_snat_router_ip is true, it means the IP of the
++ * router port is also SNAT IP.
++ *
++ * Priority 60.
++ */
++ if (!op->od->lb_force_snat_router_ip) {
++ build_lrouter_drop_own_dest(op, S_ROUTER_IN_IP_INPUT, 60, false,
++ lflows);
++ }
++ /* ARP / ND handling for external IP addresses.
++ *
++ * DNAT and SNAT IP addresses are external IP addresses that need ARP
++ * handling.
++ *
++ * These are already taken care globally, per router. The only
++ * exception is on the l3dgw_port where we might need to use a
++ * different ETH address.
++ */
++ if (op != op->od->l3dgw_port) {
+ return;
+ }
+
+- if (!op->sb->virtual_parent || !op->sb->virtual_parent[0] ||
+- !op->sb->chassis) {
+- /* The virtual port is not claimed yet. */
+- for (size_t i = 0; i < op->od->n_router_ports; i++) {
+- const char *peer_name = smap_get(
+- &op->od->router_ports[i]->nbsp->options,
+- "router-port");
+- if (!peer_name) {
+- continue;
+- }
+-
+- struct ovn_port *peer = ovn_port_find(ports, peer_name);
+- if (!peer || !peer->nbrp) {
+- continue;
+- }
+-
+- if (find_lrp_member_ip(peer, vip)) {
+- ds_clear(match);
+- ds_put_format(match, "outport == %s && "
+- REG_NEXT_HOP_IPV4 " == %s",
+- peer->json_key, vip);
++ for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
++ struct ovn_nat *nat_entry = &op->od->nat_entries[i];
+
+- const char *arp_actions =
+- "eth.dst = 00:00:00:00:00:00; next;";
+- ovn_lflow_add_with_hint(lflows, peer->od,
+- S_ROUTER_IN_ARP_RESOLVE, 100,
+- ds_cstr(match),
+- arp_actions,
+- &op->nbsp->header_);
+- break;
+- }
+- }
+- } else {
+- struct ovn_port *vp =
+- ovn_port_find(ports, op->sb->virtual_parent);
+- if (!vp || !vp->nbsp) {
+- return;
++ /* Skip entries we failed to parse. */
++ if (!nat_entry_is_valid(nat_entry)) {
++ continue;
+ }
+
+- for (size_t i = 0; i < vp->n_lsp_addrs; i++) {
+- bool found_vip_network = false;
+- const char *ea_s = vp->lsp_addrs[i].ea_s;
+- for (size_t j = 0; j < vp->od->n_router_ports; j++) {
+- /* Get the Logical_Router_Port that the
+- * Logical_Switch_Port is connected to, as
+- * 'peer'. */
+- const char *peer_name = smap_get(
+- &vp->od->router_ports[j]->nbsp->options,
+- "router-port");
+- if (!peer_name) {
+- continue;
+- }
++ /* Skip SNAT entries for now, we handle unique SNAT IPs separately
++ * below.
++ */
++ if (!strcmp(nat_entry->nb->type, "snat")) {
++ continue;
++ }
++ build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows);
++ }
+
+- struct ovn_port *peer =
+- ovn_port_find(ports, peer_name);
+- if (!peer || !peer->nbrp) {
+- continue;
+- }
++ /* Now handle SNAT entries too, one per unique SNAT IP. */
++ struct shash_node *snat_snode;
++ SHASH_FOR_EACH (snat_snode, &op->od->snat_ips) {
++ struct ovn_snat_ip *snat_ip = snat_snode->data;
+
+- if (!find_lrp_member_ip(peer, vip)) {
+- continue;
+- }
++ if (ovs_list_is_empty(&snat_ip->snat_entries)) {
++ continue;
++ }
+
+- ds_clear(match);
+- ds_put_format(match, "outport == %s && "
+- REG_NEXT_HOP_IPV4 " == %s",
+- peer->json_key, vip);
++ struct ovn_nat *nat_entry =
++ CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
++ struct ovn_nat, ext_addr_list_node);
++ build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows);
++ }
++ }
++}
+
+- ds_clear(actions);
+- ds_put_format(actions, "eth.dst = %s; next;", ea_s);
+- ovn_lflow_add_with_hint(lflows, peer->od,
+- S_ROUTER_IN_ARP_RESOLVE, 100,
+- ds_cstr(match),
+- ds_cstr(actions),
+- &op->nbsp->header_);
+- found_vip_network = true;
+- break;
+- }
++/* NAT, Defrag and load balancing. */
++static void
++build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od,
++ struct hmap *lflows,
++ struct shash *meter_groups,
++ struct hmap *lbs,
++ struct ds *match, struct ds *actions)
++{
++ if (od->nbr) {
+
+- if (found_vip_network) {
+- break;
+- }
+- }
+- }
+- } else if (lsp_is_router(op->nbsp)) {
+- /* This is a logical switch port that connects to a router. */
++ /* Packets are allowed by default. */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
++ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
++ ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
++ ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;");
+
+- /* The peer of this switch port is the router port for which
+- * we need to add logical flows such that it can resolve
+- * ARP entries for all the other router ports connected to
+- * the switch in question. */
++ /* Send the IPv6 NS packets to next table. When ovn-controller
++ * generates IPv6 NS (for the action - nd_ns{}), the injected
++ * packet would go through conntrack - which is not required. */
++ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, "nd_ns", "next;");
+
+- const char *peer_name = smap_get(&op->nbsp->options,
+- "router-port");
+- if (!peer_name) {
++ /* NAT rules are only valid on Gateway routers and routers with
++ * l3dgw_port (router has a port with gateway chassis
++ * specified). */
++ if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
+ return;
+ }
+
+- struct ovn_port *peer = ovn_port_find(ports, peer_name);
+- if (!peer || !peer->nbrp) {
+- return;
+- }
++ struct sset nat_entries = SSET_INITIALIZER(&nat_entries);
+
+- if (peer->od->nbr &&
+- smap_get_bool(&peer->od->nbr->options,
+- "dynamic_neigh_routers", false)) {
+- return;
+- }
++ bool dnat_force_snat_ip =
++ !lport_addresses_is_empty(&od->dnat_force_snat_addrs);
++ bool lb_force_snat_ip =
++ !lport_addresses_is_empty(&od->lb_force_snat_addrs);
+
+- for (size_t i = 0; i < op->od->n_router_ports; i++) {
+- const char *router_port_name = smap_get(
+- &op->od->router_ports[i]->nbsp->options,
+- "router-port");
+- struct ovn_port *router_port = ovn_port_find(ports,
+- router_port_name);
+- if (!router_port || !router_port->nbrp) {
++ for (int i = 0; i < od->nbr->n_nat; i++) {
++ const struct nbrec_nat *nat;
++
++ nat = od->nbr->nat[i];
++
++ ovs_be32 ip, mask;
++ struct in6_addr ipv6, mask_v6, v6_exact = IN6ADDR_EXACT_INIT;
++ bool is_v6 = false;
++ bool stateless = lrouter_nat_is_stateless(nat);
++ struct nbrec_address_set *allowed_ext_ips =
++ nat->allowed_ext_ips;
++ struct nbrec_address_set *exempted_ext_ips =
++ nat->exempted_ext_ips;
++
++ if (allowed_ext_ips && exempted_ext_ips) {
++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
++ VLOG_WARN_RL(&rl, "NAT rule: "UUID_FMT" not applied, since "
++ "both allowed and exempt external ips set",
++ UUID_ARGS(&(nat->header_.uuid)));
+ continue;
+ }
+
+- /* Skip the router port under consideration. */
+- if (router_port == peer) {
+- continue;
++ char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
++ if (error || mask != OVS_BE32_MAX) {
++ free(error);
++ error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6);
++ if (error || memcmp(&mask_v6, &v6_exact, sizeof(mask_v6))) {
++ /* Invalid for both IPv4 and IPv6 */
++ static struct vlog_rate_limit rl =
++ VLOG_RATE_LIMIT_INIT(5, 1);
++ VLOG_WARN_RL(&rl, "bad external ip %s for nat",
++ nat->external_ip);
++ free(error);
++ continue;
++ }
++ /* It was an invalid IPv4 address, but valid IPv6.
++ * Treat the rest of the handling of this NAT rule
++ * as IPv6. */
++ is_v6 = true;
+ }
+
+- if (router_port->lrp_networks.n_ipv4_addrs) {
+- ds_clear(match);
+- ds_put_format(match, "outport == %s && "
+- REG_NEXT_HOP_IPV4 " == ",
+- peer->json_key);
+- op_put_v4_networks(match, router_port, false);
+-
+- ds_clear(actions);
+- ds_put_format(actions, "eth.dst = %s; next;",
+- router_port->lrp_networks.ea_s);
+- ovn_lflow_add_with_hint(lflows, peer->od,
+- S_ROUTER_IN_ARP_RESOLVE, 100,
+- ds_cstr(match), ds_cstr(actions),
+- &op->nbsp->header_);
++ /* Check the validity of nat->logical_ip. 'logical_ip' can
++ * be a subnet when the type is "snat". */
++ int cidr_bits;
++ if (is_v6) {
++ error = ipv6_parse_masked(nat->logical_ip, &ipv6, &mask_v6);
++ cidr_bits = ipv6_count_cidr_bits(&mask_v6);
++ } else {
++ error = ip_parse_masked(nat->logical_ip, &ip, &mask);
++ cidr_bits = ip_count_cidr_bits(mask);
++ }
++ if (!strcmp(nat->type, "snat")) {
++ if (error) {
++ /* Invalid for both IPv4 and IPv6 */
++ static struct vlog_rate_limit rl =
++ VLOG_RATE_LIMIT_INIT(5, 1);
++ VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
++ "in router "UUID_FMT"",
++ nat->logical_ip, UUID_ARGS(&od->key));
++ free(error);
++ continue;
++ }
++ } else {
++ if (error || (!is_v6 && mask != OVS_BE32_MAX)
++ || (is_v6 && memcmp(&mask_v6, &v6_exact,
++ sizeof mask_v6))) {
++ /* Invalid for both IPv4 and IPv6 */
++ static struct vlog_rate_limit rl =
++ VLOG_RATE_LIMIT_INIT(5, 1);
++ VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
++ ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
++ free(error);
++ continue;
++ }
+ }
+
+- if (router_port->lrp_networks.n_ipv6_addrs) {
+- ds_clear(match);
+- ds_put_format(match, "outport == %s && "
+- REG_NEXT_HOP_IPV6 " == ",
+- peer->json_key);
+- op_put_v6_networks(match, router_port);
+-
+- ds_clear(actions);
+- ds_put_format(actions, "eth.dst = %s; next;",
+- router_port->lrp_networks.ea_s);
+- ovn_lflow_add_with_hint(lflows, peer->od,
+- S_ROUTER_IN_ARP_RESOLVE, 100,
+- ds_cstr(match), ds_cstr(actions),
+- &op->nbsp->header_);
++ /* For distributed router NAT, determine whether this NAT rule
++ * satisfies the conditions for distributed NAT processing. */
++ bool distributed = false;
++ struct eth_addr mac;
++ if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
++ nat->logical_port && nat->external_mac) {
++ if (eth_addr_from_string(nat->external_mac, &mac)) {
++ distributed = true;
++ } else {
++ static struct vlog_rate_limit rl =
++ VLOG_RATE_LIMIT_INIT(5, 1);
++ VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
++ ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
++ continue;
++ }
+ }
+- }
+- }
+
+-}
++ /* Ingress UNSNAT table: It is for already established connections'
++ * reverse traffic. i.e., SNAT has already been done in egress
++ * pipeline and now the packet has entered the ingress pipeline as
++ * part of a reply. We undo the SNAT here.
++ *
++ * Undoing SNAT has to happen before DNAT processing. This is
++ * because when the packet was DNATed in ingress pipeline, it did
++ * not know about the possibility of eventual additional SNAT in
++ * egress pipeline. */
++ if (!strcmp(nat->type, "snat")
++ || !strcmp(nat->type, "dnat_and_snat")) {
++ if (!od->l3dgw_port) {
++ /* Gateway router. */
++ ds_clear(match);
++ ds_clear(actions);
++ ds_put_format(match, "ip && ip%s.dst == %s",
++ is_v6 ? "6" : "4",
++ nat->external_ip);
++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
++ ds_put_format(actions, "ip%s.dst=%s; next;",
++ is_v6 ? "6" : "4", nat->logical_ip);
++ } else {
++ ds_put_cstr(actions, "ct_snat;");
++ }
+
+-/* Local router ingress table CHK_PKT_LEN: Check packet length.
+- *
+- * Any IPv4 packet with outport set to the distributed gateway
+- * router port, check the packet length and store the result in the
+- * 'REGBIT_PKT_LARGER' register bit.
+- *
+- * Local router ingress table LARGER_PKTS: Handle larger packets.
+- *
+- * Any IPv4 packet with outport set to the distributed gateway
+- * router port and the 'REGBIT_PKT_LARGER' register bit is set,
+- * generate ICMPv4 packet with type 3 (Destination Unreachable) and
+- * code 4 (Fragmentation needed).
+- * */
+-static void
+-build_check_pkt_len_flows_for_lrouter(
+- struct ovn_datapath *od, struct hmap *lflows,
+- struct hmap *ports,
+- struct ds *match, struct ds *actions)
+-{
+- if (od->nbr) {
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT,
++ 90, ds_cstr(match),
++ ds_cstr(actions),
++ &nat->header_);
++ } else {
++ /* Distributed router. */
+
+- /* Packets are allowed by default. */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 0, "1",
+- "next;");
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_LARGER_PKTS, 0, "1",
+- "next;");
++ /* Traffic received on l3dgw_port is subject to NAT. */
++ ds_clear(match);
++ ds_clear(actions);
++ ds_put_format(match, "ip && ip%s.dst == %s"
++ " && inport == %s",
++ is_v6 ? "6" : "4",
++ nat->external_ip,
++ od->l3dgw_port->json_key);
++ if (!distributed && od->l3redirect_port) {
++ /* Flows for NAT rules that are centralized are only
++ * programmed on the gateway chassis. */
++ ds_put_format(match, " && is_chassis_resident(%s)",
++ od->l3redirect_port->json_key);
++ }
+
+- if (od->l3dgw_port && od->l3redirect_port) {
+- int gw_mtu = 0;
+- if (od->l3dgw_port->nbrp) {
+- gw_mtu = smap_get_int(&od->l3dgw_port->nbrp->options,
+- "gateway_mtu", 0);
+- }
+- /* Add the flows only if gateway_mtu is configured. */
+- if (gw_mtu <= 0) {
+- return;
++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
++ ds_put_format(actions, "ip%s.dst=%s; next;",
++ is_v6 ? "6" : "4", nat->logical_ip);
++ } else {
++ ds_put_cstr(actions, "ct_snat;");
++ }
++
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT,
++ 100,
++ ds_cstr(match), ds_cstr(actions),
++ &nat->header_);
++ }
+ }
+
+- ds_clear(match);
+- ds_put_format(match, "outport == %s", od->l3dgw_port->json_key);
++ /* Ingress DNAT table: Packets enter the pipeline with destination
++ * IP address that needs to be DNATted from a external IP address
++ * to a logical IP address. */
++ if (!strcmp(nat->type, "dnat")
++ || !strcmp(nat->type, "dnat_and_snat")) {
++ if (!od->l3dgw_port) {
++ /* Gateway router. */
++ /* Packet when it goes from the initiator to destination.
++ * We need to set flags.loopback because the router can
++ * send the packet back through the same interface. */
++ ds_clear(match);
++ ds_put_format(match, "ip && ip%s.dst == %s",
++ is_v6 ? "6" : "4",
++ nat->external_ip);
++ ds_clear(actions);
++ if (allowed_ext_ips || exempted_ext_ips) {
++ lrouter_nat_add_ext_ip_match(od, lflows, match, nat,
++ is_v6, true, mask);
++ }
++
++ if (dnat_force_snat_ip) {
++ /* Indicate to the future tables that a DNAT has taken
++ * place and a force SNAT needs to be done in the
++ * Egress SNAT table. */
++ ds_put_format(actions,
++ "flags.force_snat_for_dnat = 1; ");
++ }
+
+- ds_clear(actions);
+- ds_put_format(actions,
+- REGBIT_PKT_LARGER" = check_pkt_larger(%d);"
+- " next;", gw_mtu + VLAN_ETH_HEADER_LEN);
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 50,
+- ds_cstr(match), ds_cstr(actions),
+- &od->l3dgw_port->nbrp->header_);
++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
++ ds_put_format(actions, "flags.loopback = 1; "
++ "ip%s.dst=%s; next;",
++ is_v6 ? "6" : "4", nat->logical_ip);
++ } else {
++ ds_put_format(actions, "flags.loopback = 1; "
++ "ct_dnat(%s", nat->logical_ip);
+
+- for (size_t i = 0; i < od->nbr->n_ports; i++) {
+- struct ovn_port *rp = ovn_port_find(ports,
+- od->nbr->ports[i]->name);
+- if (!rp || rp == od->l3dgw_port) {
+- continue;
+- }
++ if (nat->external_port_range[0]) {
++ ds_put_format(actions, ",%s",
++ nat->external_port_range);
++ }
++ ds_put_format(actions, ");");
++ }
+
+- if (rp->lrp_networks.ipv4_addrs) {
+- ds_clear(match);
+- ds_put_format(match, "inport == %s && outport == %s"
+- " && ip4 && "REGBIT_PKT_LARGER,
+- rp->json_key, od->l3dgw_port->json_key);
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100,
++ ds_cstr(match), ds_cstr(actions),
++ &nat->header_);
++ } else {
++ /* Distributed router. */
+
++ /* Traffic received on l3dgw_port is subject to NAT. */
++ ds_clear(match);
++ ds_put_format(match, "ip && ip%s.dst == %s"
++ " && inport == %s",
++ is_v6 ? "6" : "4",
++ nat->external_ip,
++ od->l3dgw_port->json_key);
++ if (!distributed && od->l3redirect_port) {
++ /* Flows for NAT rules that are centralized are only
++ * programmed on the gateway chassis. */
++ ds_put_format(match, " && is_chassis_resident(%s)",
++ od->l3redirect_port->json_key);
++ }
+ ds_clear(actions);
+- /* Set icmp4.frag_mtu to gw_mtu */
+- ds_put_format(actions,
+- "icmp4_error {"
+- REGBIT_EGRESS_LOOPBACK" = 1; "
+- "eth.dst = %s; "
+- "ip4.dst = ip4.src; "
+- "ip4.src = %s; "
+- "ip.ttl = 255; "
+- "icmp4.type = 3; /* Destination Unreachable. */ "
+- "icmp4.code = 4; /* Frag Needed and DF was Set. */ "
+- "icmp4.frag_mtu = %d; "
+- "next(pipeline=ingress, table=%d); };",
+- rp->lrp_networks.ea_s,
+- rp->lrp_networks.ipv4_addrs[0].addr_s,
+- gw_mtu,
+- ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
+- ovn_lflow_add_with_hint(lflows, od,
+- S_ROUTER_IN_LARGER_PKTS, 50,
++ if (allowed_ext_ips || exempted_ext_ips) {
++ lrouter_nat_add_ext_ip_match(od, lflows, match, nat,
++ is_v6, true, mask);
++ }
++
++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
++ ds_put_format(actions, "ip%s.dst=%s; next;",
++ is_v6 ? "6" : "4", nat->logical_ip);
++ } else {
++ ds_put_format(actions, "ct_dnat(%s", nat->logical_ip);
++ if (nat->external_port_range[0]) {
++ ds_put_format(actions, ",%s",
++ nat->external_port_range);
++ }
++ ds_put_format(actions, ");");
++ }
++
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100,
+ ds_cstr(match), ds_cstr(actions),
+- &rp->nbrp->header_);
++ &nat->header_);
+ }
++ }
+
+- if (rp->lrp_networks.ipv6_addrs) {
++ /* ARP resolve for NAT IPs. */
++ if (od->l3dgw_port) {
++ if (!strcmp(nat->type, "snat")) {
+ ds_clear(match);
+- ds_put_format(match, "inport == %s && outport == %s"
+- " && ip6 && "REGBIT_PKT_LARGER,
+- rp->json_key, od->l3dgw_port->json_key);
++ ds_put_format(
++ match, "inport == %s && %s == %s",
++ od->l3dgw_port->json_key,
++ is_v6 ? "ip6.src" : "ip4.src", nat->external_ip);
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT,
++ 120, ds_cstr(match), "next;",
++ &nat->header_);
++ }
+
++ if (!sset_contains(&nat_entries, nat->external_ip)) {
++ ds_clear(match);
++ ds_put_format(
++ match, "outport == %s && %s == %s",
++ od->l3dgw_port->json_key,
++ is_v6 ? REG_NEXT_HOP_IPV6 : REG_NEXT_HOP_IPV4,
++ nat->external_ip);
+ ds_clear(actions);
+- /* Set icmp6.frag_mtu to gw_mtu */
+- ds_put_format(actions,
+- "icmp6_error {"
+- REGBIT_EGRESS_LOOPBACK" = 1; "
+- "eth.dst = %s; "
+- "ip6.dst = ip6.src; "
+- "ip6.src = %s; "
+- "ip.ttl = 255; "
+- "icmp6.type = 2; /* Packet Too Big. */ "
+- "icmp6.code = 0; "
+- "icmp6.frag_mtu = %d; "
+- "next(pipeline=ingress, table=%d); };",
+- rp->lrp_networks.ea_s,
+- rp->lrp_networks.ipv6_addrs[0].addr_s,
+- gw_mtu,
+- ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
++ ds_put_format(
++ actions, "eth.dst = %s; next;",
++ distributed ? nat->external_mac :
++ od->l3dgw_port->lrp_networks.ea_s);
+ ovn_lflow_add_with_hint(lflows, od,
+- S_ROUTER_IN_LARGER_PKTS, 50,
+- ds_cstr(match), ds_cstr(actions),
+- &rp->nbrp->header_);
++ S_ROUTER_IN_ARP_RESOLVE,
++ 100, ds_cstr(match),
++ ds_cstr(actions),
++ &nat->header_);
++ sset_add(&nat_entries, nat->external_ip);
+ }
++ } else {
++ /* Add the NAT external_ip to the nat_entries even for
++ * gateway routers. This is required for adding load balancer
++ * flows.*/
++ sset_add(&nat_entries, nat->external_ip);
+ }
+- }
+- }
+-}
+-
+-/* Logical router ingress table GW_REDIRECT: Gateway redirect.
+- *
+- * For traffic with outport equal to the l3dgw_port
+- * on a distributed router, this table redirects a subset
+- * of the traffic to the l3redirect_port which represents
+- * the central instance of the l3dgw_port.
+- */
+-static void
+-build_gateway_redirect_flows_for_lrouter(
+- struct ovn_datapath *od, struct hmap *lflows,
+- struct ds *match, struct ds *actions)
+-{
+- if (od->nbr) {
+- if (od->l3dgw_port && od->l3redirect_port) {
+- const struct ovsdb_idl_row *stage_hint = NULL;
+-
+- if (od->l3dgw_port->nbrp) {
+- stage_hint = &od->l3dgw_port->nbrp->header_;
+- }
+-
+- /* For traffic with outport == l3dgw_port, if the
+- * packet did not match any higher priority redirect
+- * rule, then the traffic is redirected to the central
+- * instance of the l3dgw_port. */
+- ds_clear(match);
+- ds_put_format(match, "outport == %s",
+- od->l3dgw_port->json_key);
+- ds_clear(actions);
+- ds_put_format(actions, "outport = %s; next;",
+- od->l3redirect_port->json_key);
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50,
+- ds_cstr(match), ds_cstr(actions),
+- stage_hint);
+- }
+
+- /* Packets are allowed by default. */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
+- }
+-}
++ /* Egress UNDNAT table: It is for already established connections'
++ * reverse traffic. i.e., DNAT has already been done in ingress
++ * pipeline and now the packet has entered the egress pipeline as
++ * part of a reply. We undo the DNAT here.
++ *
++ * Note that this only applies for NAT on a distributed router.
++ * Undo DNAT on a gateway router is done in the ingress DNAT
++ * pipeline stage. */
++ if (od->l3dgw_port && (!strcmp(nat->type, "dnat")
++ || !strcmp(nat->type, "dnat_and_snat"))) {
++ ds_clear(match);
++ ds_put_format(match, "ip && ip%s.src == %s"
++ " && outport == %s",
++ is_v6 ? "6" : "4",
++ nat->logical_ip,
++ od->l3dgw_port->json_key);
++ if (!distributed && od->l3redirect_port) {
++ /* Flows for NAT rules that are centralized are only
++ * programmed on the gateway chassis. */
++ ds_put_format(match, " && is_chassis_resident(%s)",
++ od->l3redirect_port->json_key);
++ }
++ ds_clear(actions);
++ if (distributed) {
++ ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ",
++ ETH_ADDR_ARGS(mac));
++ }
+
+-/* Local router ingress table ARP_REQUEST: ARP request.
+- *
+- * In the common case where the Ethernet destination has been resolved,
+- * this table outputs the packet (priority 0). Otherwise, it composes
+- * and sends an ARP/IPv6 NA request (priority 100). */
+-static void
+-build_arp_request_flows_for_lrouter(
+- struct ovn_datapath *od, struct hmap *lflows,
+- struct ds *match, struct ds *actions)
+-{
+- if (od->nbr) {
+- for (int i = 0; i < od->nbr->n_static_routes; i++) {
+- const struct nbrec_logical_router_static_route *route;
++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
++ ds_put_format(actions, "ip%s.src=%s; next;",
++ is_v6 ? "6" : "4", nat->external_ip);
++ } else {
++ ds_put_format(actions, "ct_dnat;");
++ }
+
+- route = od->nbr->static_routes[i];
+- struct in6_addr gw_ip6;
+- unsigned int plen;
+- char *error = ipv6_parse_cidr(route->nexthop, &gw_ip6, &plen);
+- if (error || plen != 128) {
+- free(error);
+- continue;
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 100,
++ ds_cstr(match), ds_cstr(actions),
++ &nat->header_);
+ }
+
+- ds_clear(match);
+- ds_put_format(match, "eth.dst == 00:00:00:00:00:00 && "
+- "ip6 && " REG_NEXT_HOP_IPV6 " == %s",
+- route->nexthop);
+- struct in6_addr sn_addr;
+- struct eth_addr eth_dst;
+- in6_addr_solicited_node(&sn_addr, &gw_ip6);
+- ipv6_multicast_to_ethernet(ð_dst, &sn_addr);
+-
+- char sn_addr_s[INET6_ADDRSTRLEN + 1];
+- ipv6_string_mapped(sn_addr_s, &sn_addr);
+-
+- ds_clear(actions);
+- ds_put_format(actions,
+- "nd_ns { "
+- "eth.dst = "ETH_ADDR_FMT"; "
+- "ip6.dst = %s; "
+- "nd.target = %s; "
+- "output; "
+- "};", ETH_ADDR_ARGS(eth_dst), sn_addr_s,
+- route->nexthop);
+-
+- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ARP_REQUEST, 200,
+- ds_cstr(match), ds_cstr(actions),
+- &route->header_);
+- }
+-
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
+- "eth.dst == 00:00:00:00:00:00 && ip4",
+- "arp { "
+- "eth.dst = ff:ff:ff:ff:ff:ff; "
+- "arp.spa = " REG_SRC_IPV4 "; "
+- "arp.tpa = " REG_NEXT_HOP_IPV4 "; "
+- "arp.op = 1; " /* ARP request */
+- "output; "
+- "};");
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
+- "eth.dst == 00:00:00:00:00:00 && ip6",
+- "nd_ns { "
+- "nd.target = " REG_NEXT_HOP_IPV6 "; "
+- "output; "
+- "};");
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
+- }
+-}
++ /* Egress SNAT table: Packets enter the egress pipeline with
++ * source ip address that needs to be SNATted to a external ip
++ * address. */
++ if (!strcmp(nat->type, "snat")
++ || !strcmp(nat->type, "dnat_and_snat")) {
++ if (!od->l3dgw_port) {
++ /* Gateway router. */
++ ds_clear(match);
++ ds_put_format(match, "ip && ip%s.src == %s",
++ is_v6 ? "6" : "4",
++ nat->logical_ip);
++ ds_clear(actions);
+
+-/* Logical router egress table DELIVERY: Delivery (priority 100-110).
+- *
+- * Priority 100 rules deliver packets to enabled logical ports.
+- * Priority 110 rules match multicast packets and update the source
+- * mac before delivering to enabled logical ports. IP multicast traffic
+- * bypasses S_ROUTER_IN_IP_ROUTING route lookups.
+- */
+-static void
+-build_egress_delivery_flows_for_lrouter_port(
+- struct ovn_port *op, struct hmap *lflows,
+- struct ds *match, struct ds *actions)
+-{
+- if (op->nbrp) {
+- if (!lrport_is_enabled(op->nbrp)) {
+- /* Drop packets to disabled logical ports (since logical flow
+- * tables are default-drop). */
+- return;
+- }
++ if (allowed_ext_ips || exempted_ext_ips) {
++ lrouter_nat_add_ext_ip_match(od, lflows, match, nat,
++ is_v6, false, mask);
++ }
+
+- if (op->derived) {
+- /* No egress packets should be processed in the context of
+- * a chassisredirect port. The chassisredirect port should
+- * be replaced by the l3dgw port in the local output
+- * pipeline stage before egress processing. */
+- return;
+- }
++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
++ ds_put_format(actions, "ip%s.src=%s; next;",
++ is_v6 ? "6" : "4", nat->external_ip);
++ } else {
++ ds_put_format(actions, "ct_snat(%s",
++ nat->external_ip);
+
+- /* If multicast relay is enabled then also adjust source mac for IP
+- * multicast traffic.
+- */
+- if (op->od->mcast_info.rtr.relay) {
+- ds_clear(match);
+- ds_clear(actions);
+- ds_put_format(match, "(ip4.mcast || ip6.mcast) && outport == %s",
+- op->json_key);
+- ds_put_format(actions, "eth.src = %s; output;",
+- op->lrp_networks.ea_s);
+- ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 110,
+- ds_cstr(match), ds_cstr(actions));
+- }
++ if (nat->external_port_range[0]) {
++ ds_put_format(actions, ",%s",
++ nat->external_port_range);
++ }
++ ds_put_format(actions, ");");
++ }
+
+- ds_clear(match);
+- ds_put_format(match, "outport == %s", op->json_key);
+- ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
+- ds_cstr(match), "output;");
+- }
++ /* The priority here is calculated such that the
++ * nat->logical_ip with the longest mask gets a higher
++ * priority. */
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT,
++ cidr_bits + 1,
++ ds_cstr(match), ds_cstr(actions),
++ &nat->header_);
++ } else {
++ uint16_t priority = cidr_bits + 1;
+
+-}
++ /* Distributed router. */
++ ds_clear(match);
++ ds_put_format(match, "ip && ip%s.src == %s"
++ " && outport == %s",
++ is_v6 ? "6" : "4",
++ nat->logical_ip,
++ od->l3dgw_port->json_key);
++ if (!distributed && od->l3redirect_port) {
++ /* Flows for NAT rules that are centralized are only
++ * programmed on the gateway chassis. */
++ priority += 128;
++ ds_put_format(match, " && is_chassis_resident(%s)",
++ od->l3redirect_port->json_key);
++ }
++ ds_clear(actions);
+
+-static void
+-build_misc_local_traffic_drop_flows_for_lrouter(
+- struct ovn_datapath *od, struct hmap *lflows)
+-{
+- if (od->nbr) {
+- /* L3 admission control: drop multicast and broadcast source, localhost
+- * source or destination, and zero network source or destination
+- * (priority 100). */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
+- "ip4.src_mcast ||"
+- "ip4.src == 255.255.255.255 || "
+- "ip4.src == 127.0.0.0/8 || "
+- "ip4.dst == 127.0.0.0/8 || "
+- "ip4.src == 0.0.0.0/8 || "
+- "ip4.dst == 0.0.0.0/8",
+- "drop;");
++ if (allowed_ext_ips || exempted_ext_ips) {
++ lrouter_nat_add_ext_ip_match(od, lflows, match, nat,
++ is_v6, false, mask);
++ }
+
+- /* Drop ARP packets (priority 85). ARP request packets for router's own
+- * IPs are handled with priority-90 flows.
+- * Drop IPv6 ND packets (priority 85). ND NA packets for router's own
+- * IPs are handled with priority-90 flows.
+- */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 85,
+- "arp || nd", "drop;");
++ if (distributed) {
++ ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ",
++ ETH_ADDR_ARGS(mac));
++ }
+
+- /* Allow IPv6 multicast traffic that's supposed to reach the
+- * router pipeline (e.g., router solicitations).
+- */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 84, "nd_rs || nd_ra",
+- "next;");
++ if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
++ ds_put_format(actions, "ip%s.src=%s; next;",
++ is_v6 ? "6" : "4", nat->external_ip);
++ } else {
++ ds_put_format(actions, "ct_snat(%s",
++ nat->external_ip);
++ if (nat->external_port_range[0]) {
++ ds_put_format(actions, ",%s",
++ nat->external_port_range);
++ }
++ ds_put_format(actions, ");");
++ }
+
+- /* Drop other reserved multicast. */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 83,
+- "ip6.mcast_rsvd", "drop;");
++ /* The priority here is calculated such that the
++ * nat->logical_ip with the longest mask gets a higher
++ * priority. */
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT,
++ priority, ds_cstr(match),
++ ds_cstr(actions),
++ &nat->header_);
++ }
++ }
+
+- /* Allow other multicast if relay enabled (priority 82). */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 82,
+- "ip4.mcast || ip6.mcast",
+- od->mcast_info.rtr.relay ? "next;" : "drop;");
++ /* Logical router ingress table 0:
++ * For NAT on a distributed router, add rules allowing
++ * ingress traffic with eth.dst matching nat->external_mac
++ * on the l3dgw_port instance where nat->logical_port is
++ * resident. */
++ if (distributed) {
++ /* Store the ethernet address of the port receiving the packet.
++ * This will save us from having to match on inport further
++ * down in the pipeline.
++ */
++ ds_clear(actions);
++ ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
++ od->l3dgw_port->lrp_networks.ea_s);
+
+- /* Drop Ethernet local broadcast. By definition this traffic should
+- * not be forwarded.*/
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
+- "eth.bcast", "drop;");
++ ds_clear(match);
++ ds_put_format(match,
++ "eth.dst == "ETH_ADDR_FMT" && inport == %s"
++ " && is_chassis_resident(\"%s\")",
++ ETH_ADDR_ARGS(mac),
++ od->l3dgw_port->json_key,
++ nat->logical_port);
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ADMISSION, 50,
++ ds_cstr(match), ds_cstr(actions),
++ &nat->header_);
++ }
+
+- /* TTL discard */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
+- "ip4 && ip.ttl == {0, 1}", "drop;");
++ /* Ingress Gateway Redirect Table: For NAT on a distributed
++ * router, add flows that are specific to a NAT rule. These
++ * flows indicate the presence of an applicable NAT rule that
++ * can be applied in a distributed manner.
++ * In particulr REG_SRC_IPV4/REG_SRC_IPV6 and eth.src are set to
++ * NAT external IP and NAT external mac so the ARP request
++ * generated in the following stage is sent out with proper IP/MAC
++ * src addresses.
++ */
++ if (distributed) {
++ ds_clear(match);
++ ds_clear(actions);
++ ds_put_format(match,
++ "ip%s.src == %s && outport == %s && "
++ "is_chassis_resident(\"%s\")",
++ is_v6 ? "6" : "4", nat->logical_ip,
++ od->l3dgw_port->json_key, nat->logical_port);
++ ds_put_format(actions, "eth.src = %s; %s = %s; next;",
++ nat->external_mac,
++ is_v6 ? REG_SRC_IPV6 : REG_SRC_IPV4,
++ nat->external_ip);
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT,
++ 100, ds_cstr(match),
++ ds_cstr(actions), &nat->header_);
++ }
+
+- /* Pass other traffic not already handled to the next table for
+- * routing. */
+- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
+- }
+-}
++ /* Egress Loopback table: For NAT on a distributed router.
++ * If packets in the egress pipeline on the distributed
++ * gateway port have ip.dst matching a NAT external IP, then
++ * loop a clone of the packet back to the beginning of the
++ * ingress pipeline with inport = outport. */
++ if (od->l3dgw_port) {
++ /* Distributed router. */
++ ds_clear(match);
++ ds_put_format(match, "ip%s.dst == %s && outport == %s",
++ is_v6 ? "6" : "4",
++ nat->external_ip,
++ od->l3dgw_port->json_key);
++ if (!distributed) {
++ ds_put_format(match, " && is_chassis_resident(%s)",
++ od->l3redirect_port->json_key);
++ } else {
++ ds_put_format(match, " && is_chassis_resident(\"%s\")",
++ nat->logical_port);
++ }
+
+-static void
+-build_dhcpv6_reply_flows_for_lrouter_port(
+- struct ovn_port *op, struct hmap *lflows,
+- struct ds *match)
+-{
+- if (op->nbrp && (!op->derived)) {
+- for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+- ds_clear(match);
+- ds_put_format(match, "ip6.dst == %s && udp.src == 547 &&"
+- " udp.dst == 546",
+- op->lrp_networks.ipv6_addrs[i].addr_s);
+- ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
+- ds_cstr(match),
+- "reg0 = 0; handle_dhcpv6_reply;");
++ ds_clear(actions);
++ ds_put_format(actions,
++ "clone { ct_clear; "
++ "inport = outport; outport = \"\"; "
++ "flags = 0; flags.loopback = 1; ");
++ for (int j = 0; j < MFF_N_LOG_REGS; j++) {
++ ds_put_format(actions, "reg%d = 0; ", j);
++ }
++ ds_put_format(actions, REGBIT_EGRESS_LOOPBACK" = 1; "
++ "next(pipeline=ingress, table=%d); };",
++ ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100,
++ ds_cstr(match), ds_cstr(actions),
++ &nat->header_);
++ }
+ }
+- }
+
+-}
++ /* Handle force SNAT options set in the gateway router. */
++ if (!od->l3dgw_port) {
++ if (dnat_force_snat_ip) {
++ if (od->dnat_force_snat_addrs.n_ipv4_addrs) {
++ build_lrouter_force_snat_flows(lflows, od, "4",
++ od->dnat_force_snat_addrs.ipv4_addrs[0].addr_s,
++ "dnat");
++ }
++ if (od->dnat_force_snat_addrs.n_ipv6_addrs) {
++ build_lrouter_force_snat_flows(lflows, od, "6",
++ od->dnat_force_snat_addrs.ipv6_addrs[0].addr_s,
++ "dnat");
++ }
++ }
++ if (lb_force_snat_ip) {
++ if (od->lb_force_snat_addrs.n_ipv4_addrs) {
++ build_lrouter_force_snat_flows(lflows, od, "4",
++ od->lb_force_snat_addrs.ipv4_addrs[0].addr_s, "lb");
++ }
++ if (od->lb_force_snat_addrs.n_ipv6_addrs) {
++ build_lrouter_force_snat_flows(lflows, od, "6",
++ od->lb_force_snat_addrs.ipv6_addrs[0].addr_s, "lb");
++ }
++ }
+
+-static void
+-build_ipv6_input_flows_for_lrouter_port(
+- struct ovn_port *op, struct hmap *lflows,
+- struct ds *match, struct ds *actions)
+-{
+- if (op->nbrp && (!op->derived)) {
+- /* No ingress packets are accepted on a chassisredirect
+- * port, so no need to program flows for that port. */
+- if (op->lrp_networks.n_ipv6_addrs) {
+- /* ICMPv6 echo reply. These flows reply to echo requests
+- * received for the router's IP address. */
+- ds_clear(match);
+- ds_put_cstr(match, "ip6.dst == ");
+- op_put_v6_networks(match, op);
+- ds_put_cstr(match, " && icmp6.type == 128 && icmp6.code == 0");
++ /* For gateway router, re-circulate every packet through
++ * the DNAT zone. This helps with the following.
++ *
++ * Any packet that needs to be unDNATed in the reverse
++ * direction gets unDNATed. Ideally this could be done in
++ * the egress pipeline. But since the gateway router
++ * does not have any feature that depends on the source
++ * ip address being external IP address for IP routing,
++ * we can do it here, saving a future re-circulation. */
++ ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
++ "ip", "flags.loopback = 1; ct_dnat;");
++ }
+
+- const char *lrp_actions =
+- "ip6.dst <-> ip6.src; "
+- "ip.ttl = 255; "
+- "icmp6.type = 129; "
+- "flags.loopback = 1; "
+- "next; ";
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
+- ds_cstr(match), lrp_actions,
+- &op->nbrp->header_);
++ /* Load balancing and packet defrag are only valid on
++ * Gateway routers or router with gateway port. */
++ if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
++ sset_destroy(&nat_entries);
++ return;
+ }
+
+- /* ND reply. These flows reply to ND solicitations for the
+- * router's own IP address. */
+- for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+- ds_clear(match);
+- if (op->od->l3dgw_port && op == op->od->l3dgw_port
+- && op->od->l3redirect_port) {
+- /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
+- * should only be sent from the gateway chassi, so that
+- * upstream MAC learning points to the gateway chassis.
+- * Also need to avoid generation of multiple ND replies
+- * from different chassis. */
+- ds_put_format(match, "is_chassis_resident(%s)",
+- op->od->l3redirect_port->json_key);
+- }
++ /* A set to hold all ips that need defragmentation and tracking. */
++ struct sset all_ips = SSET_INITIALIZER(&all_ips);
+
+- build_lrouter_nd_flow(op->od, op, "nd_na_router",
+- op->lrp_networks.ipv6_addrs[i].addr_s,
+- op->lrp_networks.ipv6_addrs[i].sn_addr_s,
+- REG_INPORT_ETH_ADDR, match, false, 90,
+- &op->nbrp->header_, lflows);
+- }
++ for (int i = 0; i < od->nbr->n_load_balancer; i++) {
++ struct nbrec_load_balancer *nb_lb = od->nbr->load_balancer[i];
++ struct ovn_northd_lb *lb =
++ ovn_northd_lb_find(lbs, &nb_lb->header_.uuid);
++ ovs_assert(lb);
+
+- /* UDP/TCP port unreachable */
+- if (!smap_get(&op->od->nbr->options, "chassis")
+- && !op->od->l3dgw_port) {
+- for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+- ds_clear(match);
+- ds_put_format(match,
+- "ip6 && ip6.dst == %s && !ip.later_frag && tcp",
+- op->lrp_networks.ipv6_addrs[i].addr_s);
+- const char *action = "tcp_reset {"
+- "eth.dst <-> eth.src; "
+- "ip6.dst <-> ip6.src; "
+- "next; };";
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+- 80, ds_cstr(match), action,
+- &op->nbrp->header_);
++ for (size_t j = 0; j < lb->n_vips; j++) {
++ struct ovn_lb_vip *lb_vip = &lb->vips[j];
++ struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[j];
++ ds_clear(actions);
++ build_lb_vip_actions(lb_vip, lb_vip_nb, actions,
++ lb->selection_fields, false);
+
+- ds_clear(match);
+- ds_put_format(match,
+- "ip6 && ip6.dst == %s && !ip.later_frag && udp",
+- op->lrp_networks.ipv6_addrs[i].addr_s);
+- action = "icmp6 {"
+- "eth.dst <-> eth.src; "
+- "ip6.dst <-> ip6.src; "
+- "ip.ttl = 255; "
+- "icmp6.type = 1; "
+- "icmp6.code = 4; "
+- "next; };";
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+- 80, ds_cstr(match), action,
+- &op->nbrp->header_);
++ if (!sset_contains(&all_ips, lb_vip->vip_str)) {
++ sset_add(&all_ips, lb_vip->vip_str);
++ /* If there are any load balancing rules, we should send
++ * the packet to conntrack for defragmentation and
++ * tracking. This helps with two things.
++ *
++ * 1. With tracking, we can send only new connections to
++ * pick a DNAT ip address from a group.
++ * 2. If there are L4 ports in load balancing rules, we
++ * need the defragmentation to match on L4 ports. */
++ ds_clear(match);
++ if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) {
++ ds_put_format(match, "ip && ip4.dst == %s",
++ lb_vip->vip_str);
++ } else {
++ ds_put_format(match, "ip && ip6.dst == %s",
++ lb_vip->vip_str);
++ }
++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG,
++ 100, ds_cstr(match), "ct_next;",
++ &nb_lb->header_);
++ }
+
++ /* Higher priority rules are added for load-balancing in DNAT
++ * table. For every match (on a VIP[:port]), we add two flows
++ * via add_router_lb_flow(). One flow is for specific matching
++ * on ct.new with an action of "ct_lb($targets);". The other
++ * flow is for ct.est with an action of "ct_dnat;". */
+ ds_clear(match);
+- ds_put_format(match,
+- "ip6 && ip6.dst == %s && !ip.later_frag",
+- op->lrp_networks.ipv6_addrs[i].addr_s);
+- action = "icmp6 {"
+- "eth.dst <-> eth.src; "
+- "ip6.dst <-> ip6.src; "
+- "ip.ttl = 255; "
+- "icmp6.type = 1; "
+- "icmp6.code = 3; "
+- "next; };";
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+- 70, ds_cstr(match), action,
+- &op->nbrp->header_);
+- }
+- }
++ if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) {
++ ds_put_format(match, "ip && ip4.dst == %s",
++ lb_vip->vip_str);
++ } else {
++ ds_put_format(match, "ip && ip6.dst == %s",
++ lb_vip->vip_str);
++ }
+
+- /* ICMPv6 time exceeded */
+- for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+- /* skip link-local address */
+- if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
+- continue;
+- }
++ int prio = 110;
++ bool is_udp = nullable_string_is_equal(nb_lb->protocol, "udp");
++ bool is_sctp = nullable_string_is_equal(nb_lb->protocol,
++ "sctp");
++ const char *proto = is_udp ? "udp" : is_sctp ? "sctp" : "tcp";
+
+- ds_clear(match);
+- ds_clear(actions);
++ if (lb_vip->vip_port) {
++ ds_put_format(match, " && %s && %s.dst == %d", proto,
++ proto, lb_vip->vip_port);
++ prio = 120;
++ }
+
+- ds_put_format(match,
+- "inport == %s && ip6 && "
+- "ip6.src == %s/%d && "
+- "ip.ttl == {0, 1} && !ip.later_frag",
+- op->json_key,
+- op->lrp_networks.ipv6_addrs[i].network_s,
+- op->lrp_networks.ipv6_addrs[i].plen);
+- ds_put_format(actions,
+- "icmp6 {"
+- "eth.dst <-> eth.src; "
+- "ip6.dst = ip6.src; "
+- "ip6.src = %s; "
+- "ip.ttl = 255; "
+- "icmp6.type = 3; /* Time exceeded */ "
+- "icmp6.code = 0; /* TTL exceeded in transit */ "
+- "next; };",
+- op->lrp_networks.ipv6_addrs[i].addr_s);
+- ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
+- ds_cstr(match), ds_cstr(actions),
+- &op->nbrp->header_);
++ if (od->l3redirect_port &&
++ (lb_vip->n_backends || !lb_vip->empty_backend_rej)) {
++ ds_put_format(match, " && is_chassis_resident(%s)",
++ od->l3redirect_port->json_key);
++ }
++ bool force_snat_for_lb =
++ lb_force_snat_ip || od->lb_force_snat_router_ip;
++ add_router_lb_flow(lflows, od, match, actions, prio,
++ force_snat_for_lb, lb_vip, proto,
++ nb_lb, meter_groups, &nat_entries);
++ }
+ }
++ sset_destroy(&all_ips);
++ sset_destroy(&nat_entries);
+ }
+-
+ }
+
++
++
+ struct lswitch_flow_build_info {
+ struct hmap *datapaths;
+ struct hmap *ports;
+@@ -11177,7 +11918,8 @@ struct lswitch_flow_build_info {
+
+ static void
+ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od,
+- struct lswitch_flow_build_info *lsi)
++ struct lswitch_flow_build_info *lsi,
++ struct hmap *bfd_connections)
+ {
+ /* Build Logical Switch Flows. */
+ build_lswitch_lflows_pre_acl_and_acl(od, lsi->port_groups, lsi->lflows,
+@@ -11186,13 +11928,20 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od,
+ build_fwd_group_lflows(od, lsi->lflows);
+ build_lswitch_lflows_admission_control(od, lsi->lflows);
+ build_lswitch_input_port_sec_od(od, lsi->lflows);
++ build_lswitch_learn_fdb_od(od, lsi->lflows);
++ build_lswitch_arp_nd_responder_default(od, lsi->lflows);
++ build_lswitch_dns_lookup_and_response(od, lsi->lflows);
++ build_lswitch_dhcp_and_dns_defaults(od, lsi->lflows);
++ build_lswitch_destination_lookup_bmcast(od, lsi->lflows, &lsi->actions);
++ build_lswitch_output_port_sec_od(od, lsi->lflows);
+
+ /* Build Logical Router Flows. */
+ build_adm_ctrl_flows_for_lrouter(od, lsi->lflows);
+ build_neigh_learning_flows_for_lrouter(od, lsi->lflows, &lsi->match,
+ &lsi->actions);
+ build_ND_RA_flows_for_lrouter(od, lsi->lflows);
+- build_static_route_flows_for_lrouter(od, lsi->lflows, lsi->ports);
++ build_static_route_flows_for_lrouter(od, lsi->lflows, lsi->ports,
++ bfd_connections);
+ build_mcast_lookup_flows_for_lrouter(od, lsi->lflows, &lsi->match,
+ &lsi->actions);
+ build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->ports);
+@@ -11204,6 +11953,9 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od,
+ build_arp_request_flows_for_lrouter(od, lsi->lflows, &lsi->match,
+ &lsi->actions);
+ build_misc_local_traffic_drop_flows_for_lrouter(od, lsi->lflows);
++ build_lrouter_arp_nd_for_datapath(od, lsi->lflows);
++ build_lrouter_nat_defrag_and_lb(od, lsi->lflows, lsi->meter_groups,
++ lsi->lbs, &lsi->match, &lsi->actions);
+ }
+
+ /* Helper function to combine all lflow generation which is iterated by port.
+@@ -11216,6 +11968,20 @@ build_lswitch_and_lrouter_iterate_by_op(struct ovn_port *op,
+ /* Build Logical Switch Flows. */
+ build_lswitch_input_port_sec_op(op, lsi->lflows, &lsi->actions,
+ &lsi->match);
++ build_lswitch_learn_fdb_op(op, lsi->lflows, &lsi->actions,
++ &lsi->match);
++ build_lswitch_arp_nd_responder_skip_local(op, lsi->lflows,
++ &lsi->match);
++ build_lswitch_arp_nd_responder_known_ips(op, lsi->lflows,
++ lsi->ports,
++ &lsi->actions,
++ &lsi->match);
++ build_lswitch_dhcp_options_and_response(op,lsi->lflows);
++ build_lswitch_external_port(op, lsi->lflows);
++ build_lswitch_ip_unicast_lookup(op, lsi->lflows, lsi->mcgroups,
++ &lsi->actions, &lsi->match);
++ build_lswitch_output_port_sec_op(op, lsi->lflows,
++ &lsi->actions, &lsi->match);
+
+ /* Build Logical Router Flows. */
+ build_adm_ctrl_flows_for_lrouter_port(op, lsi->lflows, &lsi->match,
+@@ -11232,6 +11998,10 @@ build_lswitch_and_lrouter_iterate_by_op(struct ovn_port *op,
+ build_dhcpv6_reply_flows_for_lrouter_port(op, lsi->lflows, &lsi->match);
+ build_ipv6_input_flows_for_lrouter_port(op, lsi->lflows,
+ &lsi->match, &lsi->actions);
++ build_lrouter_ipv4_ip_input(op, lsi->lflows,
++ &lsi->match, &lsi->actions);
++ build_lrouter_force_snat_flows_op(op, lsi->lflows, &lsi->match,
++ &lsi->actions);
+ }
+
+ static void
+@@ -11239,10 +12009,13 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
+ struct hmap *port_groups, struct hmap *lflows,
+ struct hmap *mcgroups,
+ struct hmap *igmp_groups,
+- struct shash *meter_groups, struct hmap *lbs)
++ struct shash *meter_groups, struct hmap *lbs,
++ struct hmap *bfd_connections)
+ {
+ struct ovn_datapath *od;
+ struct ovn_port *op;
++ struct ovn_northd_lb *lb;
++ struct ovn_igmp_group *igmp_group;
+
+ char *svc_check_match = xasprintf("eth.dst == %s", svc_monitor_mac);
+
+@@ -11264,22 +12037,28 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
+ * will move here and will be reogranized by iterator type.
+ */
+ HMAP_FOR_EACH (od, key_node, datapaths) {
+- build_lswitch_and_lrouter_iterate_by_od(od, &lsi);
++ build_lswitch_and_lrouter_iterate_by_od(od, &lsi, bfd_connections);
+ }
+ HMAP_FOR_EACH (op, key_node, ports) {
+ build_lswitch_and_lrouter_iterate_by_op(op, &lsi);
+ }
++ HMAP_FOR_EACH (lb, hmap_node, lbs) {
++ build_lswitch_arp_nd_service_monitor(lb, lsi.lflows,
++ &lsi.actions,
++ &lsi.match);
++ }
++ HMAP_FOR_EACH (igmp_group, hmap_node, igmp_groups) {
++ build_lswitch_ip_mcast_igmp_mld(igmp_group,
++ lsi.lflows,
++ &lsi.actions,
++ &lsi.match);
++ }
+ free(svc_check_match);
+
+ ds_destroy(&lsi.match);
+ ds_destroy(&lsi.actions);
+
+- /* Legacy lswitch build - to be migrated. */
+- build_lswitch_flows(datapaths, ports, lflows, mcgroups,
+- igmp_groups, lbs);
+-
+- /* Legacy lrouter build - to be migrated. */
+- build_lrouter_flows(datapaths, ports, lflows, meter_groups, lbs);
++ build_lswitch_flows(datapaths, lflows);
+ }
+
+ struct ovn_dp_group {
+@@ -11356,13 +12135,14 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
+ struct hmap *ports, struct hmap *port_groups,
+ struct hmap *mcgroups, struct hmap *igmp_groups,
+ struct shash *meter_groups,
+- struct hmap *lbs)
++ struct hmap *lbs, struct hmap *bfd_connections)
+ {
+ struct hmap lflows = HMAP_INITIALIZER(&lflows);
+
+ build_lswitch_and_lrouter_flows(datapaths, ports,
+ port_groups, &lflows, mcgroups,
+- igmp_groups, meter_groups, lbs);
++ igmp_groups, meter_groups, lbs,
++ bfd_connections);
+
+ /* Collecting all unique datapath groups. */
+ struct hmap dp_groups = HMAP_INITIALIZER(&dp_groups);
+@@ -11801,17 +12581,20 @@ static void
+ sync_meters_iterate_nb_meter(struct northd_context *ctx,
+ const char *meter_name,
+ const struct nbrec_meter *nb_meter,
+- struct shash *sb_meters)
++ struct shash *sb_meters,
++ struct sset *used_sb_meters)
+ {
++ const struct sbrec_meter *sb_meter;
+ bool new_sb_meter = false;
+
+- const struct sbrec_meter *sb_meter = shash_find_and_delete(sb_meters,
+- meter_name);
++ sb_meter = shash_find_data(sb_meters, meter_name);
+ if (!sb_meter) {
+ sb_meter = sbrec_meter_insert(ctx->ovnsb_txn);
+ sbrec_meter_set_name(sb_meter, meter_name);
++ shash_add(sb_meters, sb_meter->name, sb_meter);
+ new_sb_meter = true;
+ }
++ sset_add(used_sb_meters, meter_name);
+
+ if (new_sb_meter || bands_need_update(nb_meter, sb_meter)) {
+ struct sbrec_meter_band **sb_bands;
+@@ -11833,6 +12616,24 @@ sync_meters_iterate_nb_meter(struct northd_context *ctx,
+ sbrec_meter_set_unit(sb_meter, nb_meter->unit);
+ }
+
++static void
++sync_acl_fair_meter(struct northd_context *ctx, struct shash *meter_groups,
++ const struct nbrec_acl *acl, struct shash *sb_meters,
++ struct sset *used_sb_meters)
++{
++ const struct nbrec_meter *nb_meter =
++ fair_meter_lookup_by_name(meter_groups, acl->meter);
++
++ if (!nb_meter) {
++ return;
++ }
++
++ char *meter_name = alloc_acl_log_unique_meter_name(acl);
++ sync_meters_iterate_nb_meter(ctx, meter_name, nb_meter, sb_meters,
++ used_sb_meters);
++ free(meter_name);
++}
++
+ /* Each entry in the Meter and Meter_Band tables in OVN_Northbound have
+ * a corresponding entries in the Meter and Meter_Band tables in
+ * OVN_Southbound. Additionally, ACL logs that use fair meters have
+@@ -11840,9 +12641,10 @@ sync_meters_iterate_nb_meter(struct northd_context *ctx,
+ */
+ static void
+ sync_meters(struct northd_context *ctx, struct hmap *datapaths,
+- struct shash *meter_groups)
++ struct shash *meter_groups, struct hmap *port_groups)
+ {
+ struct shash sb_meters = SHASH_INITIALIZER(&sb_meters);
++ struct sset used_sb_meters = SSET_INITIALIZER(&used_sb_meters);
+
+ const struct sbrec_meter *sb_meter;
+ SBREC_METER_FOR_EACH (sb_meter, ctx->ovnsb_idl) {
+@@ -11852,7 +12654,7 @@ sync_meters(struct northd_context *ctx, struct hmap *datapaths,
+ const struct nbrec_meter *nb_meter;
+ NBREC_METER_FOR_EACH (nb_meter, ctx->ovnnb_idl) {
+ sync_meters_iterate_nb_meter(ctx, nb_meter->name, nb_meter,
+- &sb_meters);
++ &sb_meters, &used_sb_meters);
+ }
+
+ /*
+@@ -11866,19 +12668,28 @@ sync_meters(struct northd_context *ctx, struct hmap *datapaths,
+ continue;
+ }
+ for (size_t i = 0; i < od->nbs->n_acls; i++) {
+- struct nbrec_acl *acl = od->nbs->acls[i];
+- nb_meter = fair_meter_lookup_by_name(meter_groups, acl->meter);
+- if (!nb_meter) {
+- continue;
++ sync_acl_fair_meter(ctx, meter_groups, od->nbs->acls[i],
++ &sb_meters, &used_sb_meters);
++ }
++ struct ovn_port_group *pg;
++ HMAP_FOR_EACH (pg, key_node, port_groups) {
++ if (ovn_port_group_ls_find(pg, &od->nbs->header_.uuid)) {
++ for (size_t i = 0; i < pg->nb_pg->n_acls; i++) {
++ sync_acl_fair_meter(ctx, meter_groups, pg->nb_pg->acls[i],
++ &sb_meters, &used_sb_meters);
++ }
+ }
+-
+- char *meter_name = alloc_acl_log_unique_meter_name(acl);
+- sync_meters_iterate_nb_meter(ctx, meter_name, nb_meter,
+- &sb_meters);
+- free(meter_name);
+ }
+ }
+
++ const char *used_meter;
++ const char *used_meter_next;
++ SSET_FOR_EACH_SAFE (used_meter, used_meter_next, &used_sb_meters) {
++ shash_find_and_delete(&sb_meters, used_meter);
++ sset_delete(&used_sb_meters, SSET_NODE_FROM_NAME(used_meter));
++ }
++ sset_destroy(&used_sb_meters);
++
+ struct shash_node *node, *next;
+ SHASH_FOR_EACH_SAFE (node, next, &sb_meters) {
+ sbrec_meter_delete(node->data);
+@@ -12274,6 +13085,7 @@ ovnnb_db_run(struct northd_context *ctx,
+ struct hmap igmp_groups;
+ struct shash meter_groups = SHASH_INITIALIZER(&meter_groups);
+ struct hmap lbs;
++ struct hmap bfd_connections = HMAP_INITIALIZER(&bfd_connections);
+
+ /* Sync ipsec configuration.
+ * Copy nb_cfg from northbound to southbound database.
+@@ -12354,6 +13166,7 @@ ovnnb_db_run(struct northd_context *ctx,
+
+ use_logical_dp_groups = smap_get_bool(&nb->options,
+ "use_logical_dp_groups", false);
++ /* deprecated, use --event instead */
+ controller_event_en = smap_get_bool(&nb->options,
+ "controller_event", false);
+ check_lsp_is_up = !smap_get_bool(&nb->options,
+@@ -12368,14 +13181,16 @@ ovnnb_db_run(struct northd_context *ctx,
+ build_ip_mcast(ctx, datapaths);
+ build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups);
+ build_meter_groups(ctx, &meter_groups);
++ build_bfd_table(ctx, &bfd_connections, ports);
+ build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups,
+- &igmp_groups, &meter_groups, &lbs);
++ &igmp_groups, &meter_groups, &lbs, &bfd_connections);
+ ovn_update_ipv6_prefix(ports);
+
+ sync_address_sets(ctx);
+ sync_port_groups(ctx, &port_groups);
+- sync_meters(ctx, datapaths, &meter_groups);
++ sync_meters(ctx, datapaths, &meter_groups, &port_groups);
+ sync_dns_entries(ctx, datapaths);
++ cleanup_stale_fdp_entries(ctx, datapaths);
+
+ struct ovn_northd_lb *lb;
+ HMAP_FOR_EACH_POP (lb, hmap_node, &lbs) {
+@@ -12393,9 +13208,13 @@ ovnnb_db_run(struct northd_context *ctx,
+ HMAP_FOR_EACH_SAFE (pg, next_pg, key_node, &port_groups) {
+ ovn_port_group_destroy(&port_groups, pg);
+ }
++
++ bfd_cleanup_connections(ctx, &bfd_connections);
++
+ hmap_destroy(&igmp_groups);
+ hmap_destroy(&mcast_groups);
+ hmap_destroy(&port_groups);
++ hmap_destroy(&bfd_connections);
+
+ struct shash_node *node, *next;
+ SHASH_FOR_EACH_SAFE (node, next, &meter_groups) {
+@@ -12542,7 +13361,17 @@ handle_port_binding_changes(struct northd_context *ctx, struct hmap *ports,
+ continue;
+ }
+
+- bool up = (sb->chassis || lsp_is_router(op->nbsp));
++ bool up = false;
++
++ if (lsp_is_router(op->nbsp)) {
++ up = true;
++ } else if (sb->chassis) {
++ up = smap_get_bool(&sb->chassis->other_config,
++ OVN_FEATURE_PORT_UP_NOTIF, false)
++ ? sb->n_up && sb->up[0]
++ : true;
++ }
++
+ if (!op->nbsp->up || *op->nbsp->up != up) {
+ nbrec_logical_switch_port_set_up(op->nbsp, &up, 1);
+ }
+@@ -12690,7 +13519,7 @@ static const char *rbac_encap_update[] =
+ static const char *rbac_port_binding_auth[] =
+ {""};
+ static const char *rbac_port_binding_update[] =
+- {"chassis"};
++ {"chassis", "up"};
+
+ static const char *rbac_mac_binding_auth[] =
+ {""};
+@@ -13176,6 +14005,8 @@ main(int argc, char *argv[])
+ &sbrec_port_binding_col_ha_chassis_group);
+ ovsdb_idl_add_column(ovnsb_idl_loop.idl,
+ &sbrec_port_binding_col_virtual_parent);
++ ovsdb_idl_add_column(ovnsb_idl_loop.idl,
++ &sbrec_port_binding_col_up);
+ ovsdb_idl_add_column(ovnsb_idl_loop.idl,
+ &sbrec_gateway_chassis_col_chassis);
+ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_gateway_chassis_col_name);
+@@ -13324,9 +14155,25 @@ main(int argc, char *argv[])
+ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_load_balancer_col_name);
+ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_load_balancer_col_vips);
+ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_load_balancer_col_protocol);
++ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_load_balancer_col_options);
+ add_column_noalert(ovnsb_idl_loop.idl,
+ &sbrec_load_balancer_col_external_ids);
+
++ ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_bfd);
++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_logical_port);
++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_dst_ip);
++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_status);
++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_min_tx);
++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_min_rx);
++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_detect_mult);
++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_disc);
++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_src_port);
++
++ ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_fdb);
++ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_fdb_col_mac);
++ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_fdb_col_dp_key);
++ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_fdb_col_port_key);
++
+ struct ovsdb_idl_index *sbrec_chassis_by_name
+ = chassis_index_create(ovnsb_idl_loop.idl);
+
+@@ -13449,6 +14296,7 @@ main(int argc, char *argv[])
+ }
+ }
+
++
+ free(ovn_internal_version);
+ unixctl_server_destroy(unixctl);
+ ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
+diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
+index 269e3a888..29019809c 100644
+--- a/ovn-nb.ovsschema
++++ b/ovn-nb.ovsschema
+@@ -1,7 +1,7 @@
+ {
+ "name": "OVN_Northbound",
+- "version": "5.28.0",
+- "cksum": "610359755 26847",
++ "version": "5.31.0",
++ "cksum": "2352750632 28701",
+ "tables": {
+ "NB_Global": {
+ "columns": {
+@@ -188,6 +188,11 @@
+ ["eth_src", "eth_dst", "ip_src", "ip_dst",
+ "tp_src", "tp_dst"]]},
+ "min": 0, "max": "unlimited"}},
++ "options": {
++ "type": {"key": "string",
++ "value": "string",
++ "min": 0,
++ "max": "unlimited"}},
+ "external_ids": {
+ "type": {"key": "string", "value": "string",
+ "min": 0, "max": "unlimited"}}},
+@@ -369,6 +374,10 @@
+ "min": 0, "max": 1}},
+ "nexthop": {"type": "string"},
+ "output_port": {"type": {"key": "string", "min": 0, "max": 1}},
++ "bfd": {"type": {"key": {"type": "uuid", "refTable": "BFD",
++ "refType": "weak"},
++ "min": 0,
++ "max": 1}},
+ "options": {
+ "type": {"key": "string", "value": "string",
+ "min": 0, "max": "unlimited"}},
+@@ -386,6 +395,8 @@
+ "key": {"type": "string",
+ "enum": ["set", ["allow", "drop", "reroute"]]}}},
+ "nexthop": {"type": {"key": "string", "min": 0, "max": 1}},
++ "nexthops": {"type": {
++ "key": "string", "min": 0, "max": "unlimited"}},
+ "options": {
+ "type": {"key": "string", "value": "string",
+ "min": 0, "max": "unlimited"}},
+@@ -519,5 +530,30 @@
+ "type": {"key": "string", "value": "string",
+ "min": 0, "max": "unlimited"}}},
+ "indexes": [["name"]],
++ "isRoot": true},
++ "BFD": {
++ "columns": {
++ "logical_port": {"type": "string"},
++ "dst_ip": {"type": "string"},
++ "min_tx": {"type": {"key": {"type": "integer",
++ "minInteger": 1},
++ "min": 0, "max": 1}},
++ "min_rx": {"type": {"key": {"type": "integer"},
++ "min": 0, "max": 1}},
++ "detect_mult": {"type": {"key": {"type": "integer",
++ "minInteger": 1},
++ "min": 0, "max": 1}},
++ "status": {
++ "type": {"key": {"type": "string",
++ "enum": ["set", ["down", "init", "up",
++ "admin_down"]]},
++ "min": 0, "max": 1}},
++ "external_ids": {
++ "type": {"key": "string", "value": "string",
++ "min": 0, "max": "unlimited"}},
++ "options": {
++ "type": {"key": "string", "value": "string",
++ "min": 0, "max": "unlimited"}}},
++ "indexes": [["logical_port", "dst_ip"]],
+ "isRoot": true}}
+ }
+diff --git a/ovn-nb.xml b/ovn-nb.xml
+index c9ab25ceb..09b755f1a 100644
+--- a/ovn-nb.xml
++++ b/ovn-nb.xml
+@@ -1635,6 +1635,24 @@
+ See External IDs at the beginning of this document.
+
+
++ --reject
option and
++ it has no active backends, a TCP reset segment (for tcp) or an ICMP
++ port unreachable packet (for all other kind of traffic) will be sent
++ whenever an incoming packet is received for this load-balancer.
++ Please note using --reject
option will disable empty_lb
++ SB controller event for this load balancer.
++
ovn-controller
(8) for more information.
+
+
++ ovn-controller
populates this key with true
++ when it supports Port_Binding.up
.
++ Common
+ Columns
at the beginning of this document.
+@@ -1521,6 +1526,68 @@
+
+
+
++ P = get_fdb(A);
++ Parameters:48-bit MAC address field A. ++
++ ++
++ Looks up A in fdb table. If an entry is found, stores
++ the logical port key to the out parameter P
.
++
Example: outport = get_fdb(eth.src);
put_fdb(P, A);
++ ++ Parameters: logical port string field P, 48-bit ++ MAC address field A. ++
++ ++++ Adds or updates the entry for Ethernet address A in ++ fdb table, setting its logical port key to P. ++
++ ++Example: put_fdb(inport, arp.spa);
R = lookup_fdb(P, A);
++ ++ Parameters: 48-bit MAC address field M, ++ logical port string field P. ++
++ ++++ Result: stored to a 1-bit subfield R. ++
++ ++
++ Looks up A in fdb table. If an entry is found
++ and the the logical port key is P, P
,
++ stores 1
in the 1-bit subfield
++ R, else 0.
++
++ Example:
++
++ reg0[0] = lookup_fdb(inport, eth.src);
++
++
nd_ns { action;
... };
+@@ -2771,6 +2838,14 @@ tcp.flags = RST; +
+ + ++
++ This is set to true
whenever all OVS flows
++ required by this Port_Binding have been installed. This is
++ populated by ovn-controller
.
++
+ A number that represents the logical port in the key (e.g. STT key or +@@ -4225,10 +4300,126 @@ tcp.flags = RST; + Datapaths to which this load balancer applies to. +
ovn-northd
.
++ true
by
++ ovn-northd
when original destination IP and transport port
++ of the load balanced packets are stored in registers
++ reg1, reg2, xxreg1
.
++ --may-exist
] [--policy
=POLICY]
+ [--ecmp
] [--ecmp-symmetric-reply
]
++ [--bfd[=UUID
]]
+ lr-route-add
router
+ prefix nexthop [port]
++ --bfd
option is used to link a BFD session to the
++ OVN route. If the BFD session UUID is provided, it will be used
++ for the OVN route otherwise the next-hop will be used to perform
++ a lookup in the OVN BFD table.
++ If the lookup fails and port is specified, a new entry
++ in the BFD table will be created using the nexthop as
++ dst_ip and port as logical_port.
++
+ It is an error if a route with prefix and
+ POLICY already exists, unless --may-exist
,
+@@ -739,7 +750,7 @@
+
--may-exist
]lr-policy-add
+ router priority match
+- action [nexthop]
++ action [nexthop[,nexthop,...]]
+ [options key=value]]
+@@ -748,10 +759,12 @@
+ are similar to OVN ACLs, but exist on the logical-router. Reroute
+ policies are needed for service-insertion and service-chaining.
+ nexthop is an optional parameter. It needs to be provided
+- only when action is reroute. A policy is
+- uniquely identified by priority and match.
+- Multiple policies can have the same priority.
+- options sets the router policy options as key-value pair.
++ only when action is reroute. Multiple
++ nexthops
can be specified for ECMP routing.
++ A policy is uniquely identified by priority and
++ match. Multiple policies can have the same
++ priority. options sets the router policy
++ options as key-value pair.
+ The supported option is : pkt_mark
.
+
--may-exist
| --add-duplicate
] lb-add
lb vip ips [protocol]--may-exist
| --add-duplicate
| --reject
| --event
] lb-add
lb vip ips [protocol]+ Creates a new load balancer named lb with the provided +@@ -936,6 +949,23 @@ + creates a new load balancer with a duplicate name. +
+ ++
++ If the load balancer is created with --reject
option and
++ it has no active backends, a TCP reset segment (for tcp) or an ICMP
++ port unreachable packet (for all other kind of traffic) will be sent
++ whenever an incoming packet is received for this load-balancer.
++ Please note using --reject
option will disable
++ empty_lb SB controller event for this load balancer.
++
++ If the load balancer is created with --event
option and
++ it has no active backends, whenever the lb receives traffic, the event
++ is reported in the Controller_Event table in the SB db.
++ Please note --event
option can't be specified with
++ --reject
one.
++
+ The following example adds a load balancer. +
+diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c +index d19e1b6c6..dc0c50854 100644 +--- a/utilities/ovn-nbctl.c ++++ b/utilities/ovn-nbctl.c +@@ -125,6 +125,65 @@ static char * OVS_WARN_UNUSED_RESULT main_loop(const char *args, + const struct timer *); + static void server_loop(struct ovsdb_idl *idl, int argc, char *argv[]); + ++/* A context for keeping track of which switch/router certain ports are ++ * connected to. ++ * ++ * It is required to track changes that we did within current set of commands ++ * because partial updates of sets in database are not reflected in the idl ++ * until transaction is committed and updates received from the server. */ ++struct nbctl_context { ++ struct ctl_context base; ++ struct shash lsp_to_ls_map; ++ struct shash lrp_to_lr_map; ++ bool context_valid; ++}; ++ ++static void ++nbctl_context_init(struct nbctl_context *nbctx) ++{ ++ nbctx->context_valid = false; ++ shash_init(&nbctx->lsp_to_ls_map); ++ shash_init(&nbctx->lrp_to_lr_map); ++} ++ ++static void ++nbctl_context_destroy(struct nbctl_context *nbctx) ++{ ++ nbctx->context_valid = false; ++ shash_destroy(&nbctx->lsp_to_ls_map); ++ shash_destroy(&nbctx->lrp_to_lr_map); ++} ++ ++/* Casts 'base' into 'struct nbctl_context' and initializes it if needed. */ ++static struct nbctl_context * ++nbctl_context_get(struct ctl_context *base) ++{ ++ struct nbctl_context *nbctx; ++ ++ nbctx = CONTAINER_OF(base, struct nbctl_context, base); ++ ++ if (nbctx->context_valid) { ++ return nbctx; ++ } ++ ++ const struct nbrec_logical_switch *ls; ++ NBREC_LOGICAL_SWITCH_FOR_EACH (ls, base->idl) { ++ for (size_t i = 0; i < ls->n_ports; i++) { ++ shash_add_once(&nbctx->lsp_to_ls_map, ls->ports[i]->name, ls); ++ } ++ } ++ ++ const struct nbrec_logical_router *lr; ++ NBREC_LOGICAL_ROUTER_FOR_EACH (lr, base->idl) { ++ for (size_t i = 0; i < lr->n_ports; i++) { ++ shash_add_once(&nbctx->lrp_to_lr_map, lr->ports[i]->name, lr); ++ } ++ } ++ ++ nbctx->context_valid = true; ++ return nbctx; ++} ++ + int + main(int argc, char *argv[]) + { +@@ -707,7 +766,7 @@ Route commands:\n\ + lr-route-list ROUTER print routes for ROUTER\n\ + \n\ + Policy commands:\n\ +- lr-policy-add ROUTER PRIORITY MATCH ACTION [NEXTHOP] \ ++ lr-policy-add ROUTER PRIORITY MATCH ACTION [NEXTHOP,[NEXTHOP,...]] \ + [OPTIONS KEY=VALUE ...] \n\ + add a policy to router\n\ + lr-policy-del ROUTER [{PRIORITY | UUID} [MATCH]]\n\ +@@ -1249,6 +1308,7 @@ static void + nbctl_ls_del(struct ctl_context *ctx) + { + bool must_exist = !shash_find(&ctx->options, "--if-exists"); ++ struct nbctl_context *nbctx = nbctl_context_get(ctx); + const char *id = ctx->argv[1]; + const struct nbrec_logical_switch *ls = NULL; + +@@ -1261,6 +1321,11 @@ nbctl_ls_del(struct ctl_context *ctx) + return; + } + ++ /* Updating runtime cache. */ ++ for (size_t i = 0; i < ls->n_ports; i++) { ++ shash_find_and_delete(&nbctx->lsp_to_ls_map, ls->ports[i]->name); ++ } ++ + nbrec_logical_switch_delete(ls); + } + +@@ -1317,22 +1382,19 @@ lsp_by_name_or_uuid(struct ctl_context *ctx, const char *id, + + /* Returns the logical switch that contains 'lsp'. */ + static char * OVS_WARN_UNUSED_RESULT +-lsp_to_ls(const struct ovsdb_idl *idl, ++lsp_to_ls(struct ctl_context *ctx, + const struct nbrec_logical_switch_port *lsp, + const struct nbrec_logical_switch **ls_p) + { ++ struct nbctl_context *nbctx = nbctl_context_get(ctx); + const struct nbrec_logical_switch *ls; + *ls_p = NULL; + +- NBREC_LOGICAL_SWITCH_FOR_EACH (ls, idl) { +- for (size_t i = 0; i < ls->n_ports; i++) { +- if (ls->ports[i] == lsp) { +- *ls_p = ls; +- return NULL; +- } +- } ++ ls = shash_find_data(&nbctx->lsp_to_ls_map, lsp->name); ++ if (ls) { ++ *ls_p = ls; ++ return NULL; + } +- + /* Can't happen because of the database schema */ + return xasprintf("logical port %s is not part of any logical switch", + lsp->name); +@@ -1353,6 +1415,7 @@ static void + nbctl_lsp_add(struct ctl_context *ctx) + { + bool may_exist = shash_find(&ctx->options, "--may-exist") != NULL; ++ struct nbctl_context *nbctx = nbctl_context_get(ctx); + + const struct nbrec_logical_switch *ls = NULL; + char *error = ls_by_name_or_uuid(ctx, ctx->argv[1], true, &ls); +@@ -1395,7 +1458,7 @@ nbctl_lsp_add(struct ctl_context *ctx) + } + + const struct nbrec_logical_switch *lsw; +- error = lsp_to_ls(ctx->idl, lsp, &lsw); ++ error = lsp_to_ls(ctx, lsp, &lsw); + if (error) { + ctx->error = error; + return; +@@ -1448,31 +1511,27 @@ nbctl_lsp_add(struct ctl_context *ctx) + } + + /* Insert the logical port into the logical switch. */ +- nbrec_logical_switch_verify_ports(ls); +- struct nbrec_logical_switch_port **new_ports = xmalloc(sizeof *new_ports * +- (ls->n_ports + 1)); +- nullable_memcpy(new_ports, ls->ports, sizeof *new_ports * ls->n_ports); +- new_ports[ls->n_ports] = CONST_CAST(struct nbrec_logical_switch_port *, +- lsp); +- nbrec_logical_switch_set_ports(ls, new_ports, ls->n_ports + 1); +- free(new_ports); ++ nbrec_logical_switch_update_ports_addvalue(ls, lsp); ++ ++ /* Updating runtime cache. */ ++ shash_add(&nbctx->lsp_to_ls_map, lsp_name, ls); + } + +-/* Removes logical switch port 'ls->ports[idx]'. */ ++/* Removes logical switch port 'lsp' from the logical switch 'ls'. */ + static void +-remove_lsp(const struct nbrec_logical_switch *ls, size_t idx) ++remove_lsp(struct ctl_context *ctx, ++ const struct nbrec_logical_switch *ls, ++ const struct nbrec_logical_switch_port *lsp) + { +- const struct nbrec_logical_switch_port *lsp = ls->ports[idx]; ++ struct nbctl_context *nbctx = nbctl_context_get(ctx); ++ ++ /* Updating runtime cache. */ ++ shash_find_and_delete(&nbctx->lsp_to_ls_map, lsp->name); + + /* First remove 'lsp' from the array of ports. This is what will + * actually cause the logical port to be deleted when the transaction is + * sent to the database server (due to garbage collection). */ +- struct nbrec_logical_switch_port **new_ports +- = xmemdup(ls->ports, sizeof *new_ports * ls->n_ports); +- new_ports[idx] = new_ports[ls->n_ports - 1]; +- nbrec_logical_switch_verify_ports(ls); +- nbrec_logical_switch_set_ports(ls, new_ports, ls->n_ports - 1); +- free(new_ports); ++ nbrec_logical_switch_update_ports_delvalue(ls, lsp); + + /* Delete 'lsp' from the IDL. This won't have a real effect on the + * database server (the IDL will suppress it in fact) but it means that it +@@ -1498,18 +1557,13 @@ nbctl_lsp_del(struct ctl_context *ctx) + + /* Find the switch that contains 'lsp', then delete it. */ + const struct nbrec_logical_switch *ls; +- NBREC_LOGICAL_SWITCH_FOR_EACH (ls, ctx->idl) { +- for (size_t i = 0; i < ls->n_ports; i++) { +- if (ls->ports[i] == lsp) { +- remove_lsp(ls, i); +- return; +- } +- } +- } + +- /* Can't happen because of the database schema. */ +- ctl_error(ctx, "logical port %s is not part of any logical switch", +- ctx->argv[1]); ++ error = lsp_to_ls(ctx, lsp, &ls); ++ if (error) { ++ ctx->error = error; ++ return; ++ } ++ remove_lsp(ctx, ls, lsp); + } + + static void +@@ -1658,7 +1712,7 @@ nbctl_lsp_set_addresses(struct ctl_context *ctx) + } + + const struct nbrec_logical_switch *ls; +- error = lsp_to_ls(ctx->idl, lsp, &ls); ++ error = lsp_to_ls(ctx, lsp, &ls); + if (error) { + ctx->error = error; + return; +@@ -2299,17 +2353,11 @@ nbctl_acl_add(struct ctl_context *ctx) + } + + /* Insert the acl into the logical switch/port group. */ +- struct nbrec_acl **new_acls = xmalloc(sizeof *new_acls * (n_acls + 1)); +- nullable_memcpy(new_acls, acls, sizeof *new_acls * n_acls); +- new_acls[n_acls] = acl; + if (pg) { +- nbrec_port_group_verify_acls(pg); +- nbrec_port_group_set_acls(pg, new_acls, n_acls + 1); ++ nbrec_port_group_update_acls_addvalue(pg, acl); + } else { +- nbrec_logical_switch_verify_acls(ls); +- nbrec_logical_switch_set_acls(ls, new_acls, n_acls + 1); ++ nbrec_logical_switch_update_acls_addvalue(ls, acl); + } +- free(new_acls); + } + + static void +@@ -2349,23 +2397,15 @@ nbctl_acl_del(struct ctl_context *ctx) + /* If priority and match are not specified, delete all ACLs with the + * specified direction. */ + if (ctx->argc == 3) { +- struct nbrec_acl **new_acls = xmalloc(sizeof *new_acls * n_acls); +- +- int n_new_acls = 0; + for (size_t i = 0; i < n_acls; i++) { +- if (strcmp(direction, acls[i]->direction)) { +- new_acls[n_new_acls++] = acls[i]; ++ if (!strcmp(direction, acls[i]->direction)) { ++ if (pg) { ++ nbrec_port_group_update_acls_delvalue(pg, acls[i]); ++ } else { ++ nbrec_logical_switch_update_acls_delvalue(ls, acls[i]); ++ } + } + } +- +- if (pg) { +- nbrec_port_group_verify_acls(pg); +- nbrec_port_group_set_acls(pg, new_acls, n_new_acls); +- } else { +- nbrec_logical_switch_verify_acls(ls); +- nbrec_logical_switch_set_acls(ls, new_acls, n_new_acls); +- } +- free(new_acls); + return; + } + +@@ -2387,19 +2427,11 @@ nbctl_acl_del(struct ctl_context *ctx) + + if (priority == acl->priority && !strcmp(ctx->argv[4], acl->match) && + !strcmp(direction, acl->direction)) { +- struct nbrec_acl **new_acls +- = xmemdup(acls, sizeof *new_acls * n_acls); +- new_acls[i] = acls[n_acls - 1]; + if (pg) { +- nbrec_port_group_verify_acls(pg); +- nbrec_port_group_set_acls(pg, new_acls, +- n_acls - 1); ++ nbrec_port_group_update_acls_delvalue(pg, acl); + } else { +- nbrec_logical_switch_verify_acls(ls); +- nbrec_logical_switch_set_acls(ls, new_acls, +- n_acls - 1); ++ nbrec_logical_switch_update_acls_delvalue(ls, acl); + } +- free(new_acls); + return; + } + } +@@ -2552,15 +2584,7 @@ nbctl_qos_add(struct ctl_context *ctx) + } + + /* Insert the qos rule the logical switch. */ +- nbrec_logical_switch_verify_qos_rules(ls); +- struct nbrec_qos **new_qos_rules +- = xmalloc(sizeof *new_qos_rules * (ls->n_qos_rules + 1)); +- nullable_memcpy(new_qos_rules, +- ls->qos_rules, sizeof *new_qos_rules * ls->n_qos_rules); +- new_qos_rules[ls->n_qos_rules] = qos; +- nbrec_logical_switch_set_qos_rules(ls, new_qos_rules, +- ls->n_qos_rules + 1); +- free(new_qos_rules); ++ nbrec_logical_switch_update_qos_rules_addvalue(ls, qos); + } + + static void +@@ -2597,34 +2621,31 @@ nbctl_qos_del(struct ctl_context *ctx) + /* If uuid was specified, delete qos_rule with the + * specified uuid. */ + if (ctx->argc == 3) { +- struct nbrec_qos **new_qos_rules +- = xmalloc(sizeof *new_qos_rules * ls->n_qos_rules); ++ size_t i; + +- int n_qos_rules = 0; + if (qos_rule_uuid) { +- for (size_t i = 0; i < ls->n_qos_rules; i++) { +- if (!uuid_equals(qos_rule_uuid, +- &(ls->qos_rules[i]->header_.uuid))) { +- new_qos_rules[n_qos_rules++] = ls->qos_rules[i]; ++ for (i = 0; i < ls->n_qos_rules; i++) { ++ if (uuid_equals(qos_rule_uuid, ++ &(ls->qos_rules[i]->header_.uuid))) { ++ nbrec_logical_switch_update_qos_rules_delvalue( ++ ls, ls->qos_rules[i]); ++ break; + } + } +- if (n_qos_rules == ls->n_qos_rules) { ++ if (i == ls->n_qos_rules) { + ctl_error(ctx, "uuid is not found"); + } + + /* If priority and match are not specified, delete all qos_rules + * with the specified direction. */ + } else { +- for (size_t i = 0; i < ls->n_qos_rules; i++) { +- if (strcmp(direction, ls->qos_rules[i]->direction)) { +- new_qos_rules[n_qos_rules++] = ls->qos_rules[i]; ++ for (i = 0; i < ls->n_qos_rules; i++) { ++ if (!strcmp(direction, ls->qos_rules[i]->direction)) { ++ nbrec_logical_switch_update_qos_rules_delvalue( ++ ls, ls->qos_rules[i]); + } + } + } +- +- nbrec_logical_switch_verify_qos_rules(ls); +- nbrec_logical_switch_set_qos_rules(ls, new_qos_rules, n_qos_rules); +- free(new_qos_rules); + return; + } + +@@ -2651,14 +2672,7 @@ nbctl_qos_del(struct ctl_context *ctx) + + if (priority == qos->priority && !strcmp(ctx->argv[4], qos->match) && + !strcmp(direction, qos->direction)) { +- struct nbrec_qos **new_qos_rules +- = xmemdup(ls->qos_rules, +- sizeof *new_qos_rules * ls->n_qos_rules); +- new_qos_rules[i] = ls->qos_rules[ls->n_qos_rules - 1]; +- nbrec_logical_switch_verify_qos_rules(ls); +- nbrec_logical_switch_set_qos_rules(ls, new_qos_rules, +- ls->n_qos_rules - 1); +- free(new_qos_rules); ++ nbrec_logical_switch_update_qos_rules_delvalue(ls, qos); + return; + } + } +@@ -2821,6 +2835,14 @@ nbctl_lb_add(struct ctl_context *ctx) + + bool may_exist = shash_find(&ctx->options, "--may-exist") != NULL; + bool add_duplicate = shash_find(&ctx->options, "--add-duplicate") != NULL; ++ bool empty_backend_rej = shash_find(&ctx->options, "--reject") != NULL; ++ bool empty_backend_event = shash_find(&ctx->options, "--event") != NULL; ++ ++ if (empty_backend_event && empty_backend_rej) { ++ ctl_error(ctx, ++ "--reject and --event can't specified at the same time"); ++ return; ++ } + + const char *lb_proto; + bool is_update_proto = false; +@@ -2934,6 +2956,14 @@ nbctl_lb_add(struct ctl_context *ctx) + smap_add(CONST_CAST(struct smap *, &lb->vips), + lb_vip_normalized, ds_cstr(&lb_ips_new)); + nbrec_load_balancer_set_vips(lb, &lb->vips); ++ if (empty_backend_rej) { ++ const struct smap options = SMAP_CONST1(&options, "reject", "true"); ++ nbrec_load_balancer_set_options(lb, &options); ++ } ++ if (empty_backend_event) { ++ const struct smap options = SMAP_CONST1(&options, "event", "true"); ++ nbrec_load_balancer_set_options(lb, &options); ++ } + out: + ds_destroy(&lb_ips_new); + +@@ -3115,17 +3145,7 @@ nbctl_lr_lb_add(struct ctl_context *ctx) + } + + /* Insert the load balancer into the logical router. */ +- nbrec_logical_router_verify_load_balancer(lr); +- struct nbrec_load_balancer **new_lbs +- = xmalloc(sizeof *new_lbs * (lr->n_load_balancer + 1)); +- +- nullable_memcpy(new_lbs, lr->load_balancer, +- sizeof *new_lbs * lr->n_load_balancer); +- new_lbs[lr->n_load_balancer] = CONST_CAST(struct nbrec_load_balancer *, +- new_lb); +- nbrec_logical_router_set_load_balancer(lr, new_lbs, +- lr->n_load_balancer + 1); +- free(new_lbs); ++ nbrec_logical_router_update_load_balancer_addvalue(lr, new_lb); + } + + static void +@@ -3158,15 +3178,7 @@ nbctl_lr_lb_del(struct ctl_context *ctx) + + if (uuid_equals(&del_lb->header_.uuid, &lb->header_.uuid)) { + /* Remove the matching rule. */ +- nbrec_logical_router_verify_load_balancer(lr); +- +- struct nbrec_load_balancer **new_lbs +- = xmemdup(lr->load_balancer, +- sizeof *new_lbs * lr->n_load_balancer); +- new_lbs[i] = lr->load_balancer[lr->n_load_balancer - 1]; +- nbrec_logical_router_set_load_balancer(lr, new_lbs, +- lr->n_load_balancer - 1); +- free(new_lbs); ++ nbrec_logical_router_update_load_balancer_delvalue(lr, lb); + return; + } + } +@@ -3240,17 +3252,7 @@ nbctl_ls_lb_add(struct ctl_context *ctx) + } + + /* Insert the load balancer into the logical switch. */ +- nbrec_logical_switch_verify_load_balancer(ls); +- struct nbrec_load_balancer **new_lbs +- = xmalloc(sizeof *new_lbs * (ls->n_load_balancer + 1)); +- +- nullable_memcpy(new_lbs, ls->load_balancer, +- sizeof *new_lbs * ls->n_load_balancer); +- new_lbs[ls->n_load_balancer] = CONST_CAST(struct nbrec_load_balancer *, +- new_lb); +- nbrec_logical_switch_set_load_balancer(ls, new_lbs, +- ls->n_load_balancer + 1); +- free(new_lbs); ++ nbrec_logical_switch_update_load_balancer_addvalue(ls, new_lb); + } + + static void +@@ -3283,15 +3285,7 @@ nbctl_ls_lb_del(struct ctl_context *ctx) + + if (uuid_equals(&del_lb->header_.uuid, &lb->header_.uuid)) { + /* Remove the matching rule. */ +- nbrec_logical_switch_verify_load_balancer(ls); +- +- struct nbrec_load_balancer **new_lbs +- = xmemdup(ls->load_balancer, +- sizeof *new_lbs * ls->n_load_balancer); +- new_lbs[i] = ls->load_balancer[ls->n_load_balancer - 1]; +- nbrec_logical_switch_set_load_balancer(ls, new_lbs, +- ls->n_load_balancer - 1); +- free(new_lbs); ++ nbrec_logical_switch_update_load_balancer_delvalue(ls, lb); + return; + } + } +@@ -3378,6 +3372,7 @@ static void + nbctl_lr_del(struct ctl_context *ctx) + { + bool must_exist = !shash_find(&ctx->options, "--if-exists"); ++ struct nbctl_context *nbctx = nbctl_context_get(ctx); + const char *id = ctx->argv[1]; + const struct nbrec_logical_router *lr = NULL; + +@@ -3390,6 +3385,11 @@ nbctl_lr_del(struct ctl_context *ctx) + return; + } + ++ /* Updating runtime cache. */ ++ for (size_t i = 0; i < lr->n_ports; i++) { ++ shash_find_and_delete(&nbctx->lrp_to_lr_map, lr->ports[i]->name); ++ } ++ + nbrec_logical_router_delete(lr); + } + +@@ -3645,7 +3645,8 @@ nbctl_lr_policy_add(struct ctl_context *ctx) + return; + } + const char *action = ctx->argv[4]; +- char *next_hop = NULL; ++ size_t n_nexthops = 0; ++ char **nexthops = NULL; + + bool reroute = false; + /* Validate action. */ +@@ -3665,7 +3666,8 @@ nbctl_lr_policy_add(struct ctl_context *ctx) + /* Check if same routing policy already exists. + * A policy is uniquely identified by priority and match */ + bool may_exist = !!shash_find(&ctx->options, "--may-exist"); +- for (int i = 0; i < lr->n_policies; i++) { ++ size_t i; ++ for (i = 0; i < lr->n_policies; i++) { + const struct nbrec_logical_router_policy *policy = lr->policies[i]; + if (policy->priority == priority && + !strcmp(policy->match, ctx->argv[3])) { +@@ -3676,12 +3678,53 @@ nbctl_lr_policy_add(struct ctl_context *ctx) + return; + } + } ++ + if (reroute) { +- next_hop = normalize_prefix_str(ctx->argv[5]); +- if (!next_hop) { +- ctl_error(ctx, "bad next hop argument: %s", ctx->argv[5]); +- return; ++ char *nexthops_arg = xstrdup(ctx->argv[5]); ++ char *save_ptr, *next_hop, *token; ++ ++ n_nexthops = 0; ++ size_t n_allocs = 0; ++ ++ bool nexthops_is_ipv4 = true; ++ for (token = strtok_r(nexthops_arg, ",", &save_ptr); ++ token != NULL; token = strtok_r(NULL, ",", &save_ptr)) { ++ next_hop = normalize_addr_str(token); ++ ++ if (!next_hop) { ++ ctl_error(ctx, "bad next hop argument: %s", ctx->argv[5]); ++ free(nexthops_arg); ++ for (i = 0; i < n_nexthops; i++) { ++ free(nexthops[i]); ++ } ++ free(nexthops); ++ return; ++ } ++ if (n_nexthops == n_allocs) { ++ nexthops = x2nrealloc(nexthops, &n_allocs, sizeof *nexthops); ++ } ++ ++ bool is_ipv4 = strchr(next_hop, '.') ? true : false; ++ if (n_nexthops == 0) { ++ nexthops_is_ipv4 = is_ipv4; ++ } ++ ++ if (is_ipv4 != nexthops_is_ipv4) { ++ ctl_error(ctx, "bad next hops argument, not in the same " ++ "addr family : %s", ctx->argv[5]); ++ free(nexthops_arg); ++ free(next_hop); ++ for (i = 0; i < n_nexthops; i++) { ++ free(nexthops[i]); ++ } ++ free(nexthops); ++ return; ++ } ++ nexthops[n_nexthops] = next_hop; ++ n_nexthops++; + } ++ ++ free(nexthops_arg); + } + + struct nbrec_logical_router_policy *policy; +@@ -3690,12 +3733,13 @@ nbctl_lr_policy_add(struct ctl_context *ctx) + nbrec_logical_router_policy_set_match(policy, ctx->argv[3]); + nbrec_logical_router_policy_set_action(policy, action); + if (reroute) { +- nbrec_logical_router_policy_set_nexthop(policy, next_hop); ++ nbrec_logical_router_policy_set_nexthops( ++ policy, (const char **)nexthops, n_nexthops); + } + + /* Parse the options. */ + struct smap options = SMAP_INITIALIZER(&options); +- for (size_t i = reroute ? 6 : 5; i < ctx->argc; i++) { ++ for (i = reroute ? 6 : 5; i < ctx->argc; i++) { + char *key, *value; + value = xstrdup(ctx->argv[i]); + key = strsep(&value, "="); +@@ -3705,7 +3749,10 @@ nbctl_lr_policy_add(struct ctl_context *ctx) + ctl_error(ctx, "No value specified for the option : %s", key); + smap_destroy(&options); + free(key); +- free(next_hop); ++ for (i = 0; i < n_nexthops; i++) { ++ free(nexthops[i]); ++ } ++ free(nexthops); + return; + } + free(key); +@@ -3713,18 +3760,12 @@ nbctl_lr_policy_add(struct ctl_context *ctx) + nbrec_logical_router_policy_set_options(policy, &options); + smap_destroy(&options); + +- nbrec_logical_router_verify_policies(lr); +- struct nbrec_logical_router_policy **new_policies +- = xmalloc(sizeof *new_policies * (lr->n_policies + 1)); +- memcpy(new_policies, lr->policies, +- sizeof *new_policies * lr->n_policies); +- new_policies[lr->n_policies] = policy; +- nbrec_logical_router_set_policies(lr, new_policies, +- lr->n_policies + 1); +- free(new_policies); +- if (next_hop != NULL) { +- free(next_hop); ++ nbrec_logical_router_update_policies_addvalue(lr, policy); ++ ++ for (i = 0; i < n_nexthops; i++) { ++ free(nexthops[i]); + } ++ free(nexthops); + } + + static void +@@ -3758,38 +3799,34 @@ nbctl_lr_policy_del(struct ctl_context *ctx) + /* If uuid was specified, delete routing policy with the + * specified uuid. */ + if (ctx->argc == 3) { +- struct nbrec_logical_router_policy **new_policies +- = xmemdup(lr->policies, +- sizeof *new_policies * lr->n_policies); +- int n_policies = 0; ++ size_t i; + + if (lr_policy_uuid) { +- for (size_t i = 0; i < lr->n_policies; i++) { +- if (!uuid_equals(lr_policy_uuid, +- &(lr->policies[i]->header_.uuid))) { +- new_policies[n_policies++] = lr->policies[i]; ++ for (i = 0; i < lr->n_policies; i++) { ++ if (uuid_equals(lr_policy_uuid, ++ &(lr->policies[i]->header_.uuid))) { ++ nbrec_logical_router_update_policies_delvalue( ++ lr, lr->policies[i]); ++ break; + } + } +- if (n_policies == lr->n_policies) { ++ if (i == lr->n_policies) { + if (!shash_find(&ctx->options, "--if-exists")) { + ctl_error(ctx, "Logical router policy uuid is not found."); + } +- free(new_policies); + return; + } + +- /* If match is not specified, delete all routing policies with the +- * specified priority. */ ++ /* If match is not specified, delete all routing policies with the ++ * specified priority. */ + } else { +- for (int i = 0; i < lr->n_policies; i++) { +- if (priority != lr->policies[i]->priority) { +- new_policies[n_policies++] = lr->policies[i]; ++ for (i = 0; i < lr->n_policies; i++) { ++ if (priority == lr->policies[i]->priority) { ++ nbrec_logical_router_update_policies_delvalue( ++ lr, lr->policies[i]); + } + } + } +- nbrec_logical_router_verify_policies(lr); +- nbrec_logical_router_set_policies(lr, new_policies, n_policies); +- free(new_policies); + return; + } + +@@ -3798,14 +3835,7 @@ nbctl_lr_policy_del(struct ctl_context *ctx) + struct nbrec_logical_router_policy *routing_policy = lr->policies[i]; + if (priority == routing_policy->priority && + !strcmp(ctx->argv[3], routing_policy->match)) { +- struct nbrec_logical_router_policy **new_policies +- = xmemdup(lr->policies, +- sizeof *new_policies * lr->n_policies); +- new_policies[i] = lr->policies[lr->n_policies - 1]; +- nbrec_logical_router_verify_policies(lr); +- nbrec_logical_router_set_policies(lr, new_policies, +- lr->n_policies - 1); +- free(new_policies); ++ nbrec_logical_router_update_policies_delvalue(lr, routing_policy); + return; + } + } +@@ -3884,6 +3914,47 @@ nbctl_lr_policy_list(struct ctl_context *ctx) + } + free(policies); + } ++ ++static struct nbrec_logical_router_static_route * ++nbctl_lr_get_route(const struct nbrec_logical_router *lr, char *prefix, ++ char *next_hop, bool is_src_route, bool ecmp) ++{ ++ for (int i = 0; i < lr->n_static_routes; i++) { ++ struct nbrec_logical_router_static_route *route = lr->static_routes[i]; ++ ++ /* Compare route policy. */ ++ char *nb_policy = route->policy; ++ bool nb_is_src_route = false; ++ if (nb_policy && !strcmp(nb_policy, "src-ip")) { ++ nb_is_src_route = true; ++ } ++ if (is_src_route != nb_is_src_route) { ++ continue; ++ } ++ ++ /* Compare route prefix. */ ++ char *rt_prefix = normalize_prefix_str(route->ip_prefix); ++ if (!rt_prefix) { ++ /* Ignore existing prefix we couldn't parse. */ ++ continue; ++ } ++ ++ if (strcmp(rt_prefix, prefix)) { ++ free(rt_prefix); ++ continue; ++ } ++ ++ if (ecmp && strcmp(next_hop, route->nexthop)) { ++ free(rt_prefix); ++ continue; ++ } ++ ++ free(rt_prefix); ++ return route; ++ } ++ return NULL; ++} ++ + + static void + nbctl_lr_route_add(struct ctl_context *ctx) +@@ -3927,44 +3998,42 @@ nbctl_lr_route_add(struct ctl_context *ctx) + goto cleanup; + } + ++ struct shash_node *bfd = shash_find(&ctx->options, "--bfd"); ++ const struct nbrec_bfd *nb_bt = NULL; ++ if (bfd) { ++ if (bfd->data) { ++ struct uuid bfd_uuid; ++ if (uuid_from_string(&bfd_uuid, bfd->data)) { ++ nb_bt = nbrec_bfd_get_for_uuid(ctx->idl, &bfd_uuid); ++ } ++ if (!nb_bt) { ++ ctl_error(ctx, "no entry found in the BFD table"); ++ goto cleanup; ++ } ++ } else { ++ const struct nbrec_bfd *iter; ++ NBREC_BFD_FOR_EACH (iter, ctx->idl) { ++ if (!strcmp(iter->dst_ip, next_hop)) { ++ nb_bt = iter; ++ break; ++ } ++ } ++ } ++ } ++ + bool may_exist = shash_find(&ctx->options, "--may-exist") != NULL; + bool ecmp_symmetric_reply = shash_find(&ctx->options, + "--ecmp-symmetric-reply") != NULL; + bool ecmp = shash_find(&ctx->options, "--ecmp") != NULL || + ecmp_symmetric_reply; ++ struct nbrec_logical_router_static_route *route = ++ nbctl_lr_get_route(lr, prefix, next_hop, is_src_route, ecmp); + if (!ecmp) { +- for (int i = 0; i < lr->n_static_routes; i++) { +- const struct nbrec_logical_router_static_route *route +- = lr->static_routes[i]; +- char *rt_prefix; +- +- /* Compare route policy. */ +- char *nb_policy = lr->static_routes[i]->policy; +- bool nb_is_src_route = false; +- if (nb_policy && !strcmp(nb_policy, "src-ip")) { +- nb_is_src_route = true; +- } +- if (is_src_route != nb_is_src_route) { +- continue; +- } +- +- /* Compare route prefix. */ +- rt_prefix = normalize_prefix_str(lr->static_routes[i]->ip_prefix); +- if (!rt_prefix) { +- /* Ignore existing prefix we couldn't parse. */ +- continue; +- } +- +- if (strcmp(rt_prefix, prefix)) { +- free(rt_prefix); +- continue; +- } +- ++ if (route) { + if (!may_exist) { + ctl_error(ctx, "duplicate prefix: %s (policy: %s). Use option" + " --ecmp to allow this for ECMP routing.", + prefix, is_src_route ? "src-ip" : "dst-ip"); +- free(rt_prefix); + goto cleanup; + } + +@@ -3981,12 +4050,25 @@ nbctl_lr_route_add(struct ctl_context *ctx) + if (policy) { + nbrec_logical_router_static_route_set_policy(route, policy); + } +- free(rt_prefix); ++ if (bfd) { ++ if (!nb_bt) { ++ if (ctx->argc != 5) { ++ ctl_error(ctx, "insert entry in the BFD table failed"); ++ goto cleanup; ++ } ++ nb_bt = nbrec_bfd_insert(ctx->txn); ++ nbrec_bfd_set_dst_ip(nb_bt, next_hop); ++ nbrec_bfd_set_logical_port(nb_bt, ctx->argv[4]); ++ } ++ nbrec_logical_router_static_route_set_bfd(route, nb_bt); ++ } + goto cleanup; + } ++ } else if (route) { ++ ctl_error(ctx, "duplicate nexthop for the same ECMP route"); ++ goto cleanup; + } + +- struct nbrec_logical_router_static_route *route; + route = nbrec_logical_router_static_route_insert(ctx->txn); + nbrec_logical_router_static_route_set_ip_prefix(route, prefix); + nbrec_logical_router_static_route_set_nexthop(route, next_hop); +@@ -4004,15 +4086,19 @@ nbctl_lr_route_add(struct ctl_context *ctx) + nbrec_logical_router_static_route_set_options(route, &options); + } + +- nbrec_logical_router_verify_static_routes(lr); +- struct nbrec_logical_router_static_route **new_routes +- = xmalloc(sizeof *new_routes * (lr->n_static_routes + 1)); +- nullable_memcpy(new_routes, lr->static_routes, +- sizeof *new_routes * lr->n_static_routes); +- new_routes[lr->n_static_routes] = route; +- nbrec_logical_router_set_static_routes(lr, new_routes, +- lr->n_static_routes + 1); +- free(new_routes); ++ nbrec_logical_router_update_static_routes_addvalue(lr, route); ++ if (bfd) { ++ if (!nb_bt) { ++ if (ctx->argc != 5) { ++ ctl_error(ctx, "insert entry in the BFD table failed"); ++ goto cleanup; ++ } ++ nb_bt = nbrec_bfd_insert(ctx->txn); ++ nbrec_bfd_set_dst_ip(nb_bt, next_hop); ++ nbrec_bfd_set_logical_port(nb_bt, ctx->argv[4]); ++ } ++ nbrec_logical_router_static_route_set_bfd(route, nb_bt); ++ } + + cleanup: + free(next_hop); +@@ -4069,11 +4155,8 @@ nbctl_lr_route_del(struct ctl_context *ctx) + output_port = ctx->argv[4]; + } + +- struct nbrec_logical_router_static_route **new_routes +- = xmemdup(lr->static_routes, +- sizeof *new_routes * lr->n_static_routes); +- size_t n_new = 0; +- for (int i = 0; i < lr->n_static_routes; i++) { ++ size_t n_removed = 0; ++ for (size_t i = 0; i < lr->n_static_routes; i++) { + /* Compare route policy, if specified. */ + if (policy) { + char *nb_policy = lr->static_routes[i]->policy; +@@ -4082,7 +4165,6 @@ nbctl_lr_route_del(struct ctl_context *ctx) + nb_is_src_route = true; + } + if (is_src_route != nb_is_src_route) { +- new_routes[n_new++] = lr->static_routes[i]; + continue; + } + } +@@ -4093,14 +4175,12 @@ nbctl_lr_route_del(struct ctl_context *ctx) + normalize_prefix_str(lr->static_routes[i]->ip_prefix); + if (!rt_prefix) { + /* Ignore existing prefix we couldn't parse. */ +- new_routes[n_new++] = lr->static_routes[i]; + continue; + } + + int ret = strcmp(prefix, rt_prefix); + free(rt_prefix); + if (ret) { +- new_routes[n_new++] = lr->static_routes[i]; + continue; + } + } +@@ -4111,13 +4191,11 @@ nbctl_lr_route_del(struct ctl_context *ctx) + normalize_prefix_str(lr->static_routes[i]->nexthop); + if (!rt_nexthop) { + /* Ignore existing nexthop we couldn't parse. */ +- new_routes[n_new++] = lr->static_routes[i]; + continue; + } + int ret = strcmp(nexthop, rt_nexthop); + free(rt_nexthop); + if (ret) { +- new_routes[n_new++] = lr->static_routes[i]; + continue; + } + } +@@ -4126,18 +4204,17 @@ nbctl_lr_route_del(struct ctl_context *ctx) + if (output_port) { + char *rt_output_port = lr->static_routes[i]->output_port; + if (!rt_output_port || strcmp(output_port, rt_output_port)) { +- new_routes[n_new++] = lr->static_routes[i]; ++ continue; + } + } +- } + +- if (n_new < lr->n_static_routes) { +- nbrec_logical_router_verify_static_routes(lr); +- nbrec_logical_router_set_static_routes(lr, new_routes, n_new); +- goto out; ++ /* Everything matched. Removing. */ ++ nbrec_logical_router_update_static_routes_delvalue( ++ lr, lr->static_routes[i]); ++ n_removed++; + } + +- if (!shash_find(&ctx->options, "--if-exists")) { ++ if (!n_removed && !shash_find(&ctx->options, "--if-exists")) { + ctl_error(ctx, "no matching route: policy '%s', prefix '%s', nexthop " + "'%s', output_port '%s'.", + policy ? policy : "any", +@@ -4146,8 +4223,6 @@ nbctl_lr_route_del(struct ctl_context *ctx) + output_port ? output_port : "any"); + } + +-out: +- free(new_routes); + free(prefix); + free(nexthop); + } +@@ -4418,12 +4493,7 @@ nbctl_lr_nat_add(struct ctl_context *ctx) + smap_destroy(&nat_options); + + /* Insert the NAT into the logical router. */ +- nbrec_logical_router_verify_nat(lr); +- struct nbrec_nat **new_nats = xmalloc(sizeof *new_nats * (lr->n_nat + 1)); +- nullable_memcpy(new_nats, lr->nat, sizeof *new_nats * lr->n_nat); +- new_nats[lr->n_nat] = nat; +- nbrec_logical_router_set_nat(lr, new_nats, lr->n_nat + 1); +- free(new_nats); ++ nbrec_logical_router_update_nat_addvalue(lr, nat); + + cleanup: + free(new_logical_ip); +@@ -4459,17 +4529,11 @@ nbctl_lr_nat_del(struct ctl_context *ctx) + + if (ctx->argc == 3) { + /*Deletes all NATs with the specified type. */ +- struct nbrec_nat **new_nats = xmalloc(sizeof *new_nats * lr->n_nat); +- int n_nat = 0; + for (size_t i = 0; i < lr->n_nat; i++) { +- if (strcmp(nat_type, lr->nat[i]->type)) { +- new_nats[n_nat++] = lr->nat[i]; ++ if (!strcmp(nat_type, lr->nat[i]->type)) { ++ nbrec_logical_router_update_nat_delvalue(lr, lr->nat[i]); + } + } +- +- nbrec_logical_router_verify_nat(lr); +- nbrec_logical_router_set_nat(lr, new_nats, n_nat); +- free(new_nats); + return; + } + +@@ -4491,13 +4555,7 @@ nbctl_lr_nat_del(struct ctl_context *ctx) + continue; + } + if (!strcmp(nat_type, nat->type) && !strcmp(nat_ip, old_ip)) { +- struct nbrec_nat **new_nats +- = xmemdup(lr->nat, sizeof *new_nats * lr->n_nat); +- new_nats[i] = lr->nat[lr->n_nat - 1]; +- nbrec_logical_router_verify_nat(lr); +- nbrec_logical_router_set_nat(lr, new_nats, +- lr->n_nat - 1); +- free(new_nats); ++ nbrec_logical_router_update_nat_delvalue(lr, nat); + should_return = true; + } + free(old_ip); +@@ -4667,20 +4725,18 @@ lrp_by_name_or_uuid(struct ctl_context *ctx, const char *id, bool must_exist, + + /* Returns the logical router that contains 'lrp'. */ + static char * OVS_WARN_UNUSED_RESULT +-lrp_to_lr(const struct ovsdb_idl *idl, ++lrp_to_lr(struct ctl_context *ctx, + const struct nbrec_logical_router_port *lrp, + const struct nbrec_logical_router **lr_p) + { ++ struct nbctl_context *nbctx = nbctl_context_get(ctx); + const struct nbrec_logical_router *lr; + *lr_p = NULL; + +- NBREC_LOGICAL_ROUTER_FOR_EACH (lr, idl) { +- for (size_t i = 0; i < lr->n_ports; i++) { +- if (lr->ports[i] == lrp) { +- *lr_p = lr; +- return NULL; +- } +- } ++ lr = shash_find_data(&nbctx->lrp_to_lr_map, lrp->name); ++ if (lr) { ++ *lr_p = lr; ++ return NULL; + } + + /* Can't happen because of the database schema */ +@@ -4777,15 +4833,7 @@ nbctl_lrp_set_gateway_chassis(struct ctl_context *ctx) + nbrec_gateway_chassis_set_priority(gc, priority); + + /* Insert the logical gateway chassis into the logical router port. */ +- nbrec_logical_router_port_verify_gateway_chassis(lrp); +- struct nbrec_gateway_chassis **new_gc = xmalloc( +- sizeof *new_gc * (lrp->n_gateway_chassis + 1)); +- nullable_memcpy(new_gc, lrp->gateway_chassis, +- sizeof *new_gc * lrp->n_gateway_chassis); +- new_gc[lrp->n_gateway_chassis] = gc; +- nbrec_logical_router_port_set_gateway_chassis( +- lrp, new_gc, lrp->n_gateway_chassis + 1); +- free(new_gc); ++ nbrec_logical_router_port_update_gateway_chassis_addvalue(lrp, gc); + free(gc_name); + } + +@@ -4802,14 +4850,7 @@ remove_gc(const struct nbrec_logical_router_port *lrp, size_t idx) + * will actually cause the gateway chassis to be deleted when the + * transaction is sent to the database server (due to garbage + * collection). */ +- struct nbrec_gateway_chassis **new_gc +- = xmemdup(lrp->gateway_chassis, +- sizeof *new_gc * lrp->n_gateway_chassis); +- new_gc[idx] = new_gc[lrp->n_gateway_chassis - 1]; +- nbrec_logical_router_port_verify_gateway_chassis(lrp); +- nbrec_logical_router_port_set_gateway_chassis( +- lrp, new_gc, lrp->n_gateway_chassis - 1); +- free(new_gc); ++ nbrec_logical_router_port_update_gateway_chassis_delvalue(lrp, gc); + } + + /* Delete 'gc' from the IDL. This won't have a real effect on +@@ -4893,6 +4934,7 @@ static void + nbctl_lrp_add(struct ctl_context *ctx) + { + bool may_exist = shash_find(&ctx->options, "--may-exist") != NULL; ++ struct nbctl_context *nbctx = nbctl_context_get(ctx); + + const struct nbrec_logical_router *lr = NULL; + char *error = lr_by_name_or_uuid(ctx, ctx->argv[1], true, &lr); +@@ -4942,7 +4984,7 @@ nbctl_lrp_add(struct ctl_context *ctx) + } + + const struct nbrec_logical_router *bound_lr; +- error = lrp_to_lr(ctx->idl, lrp, &bound_lr); ++ error = lrp_to_lr(ctx, lrp, &bound_lr); + if (error) { + ctx->error = error; + return; +@@ -5040,31 +5082,27 @@ nbctl_lrp_add(struct ctl_context *ctx) + } + + /* Insert the logical port into the logical router. */ +- nbrec_logical_router_verify_ports(lr); +- struct nbrec_logical_router_port **new_ports = xmalloc(sizeof *new_ports * +- (lr->n_ports + 1)); +- nullable_memcpy(new_ports, lr->ports, sizeof *new_ports * lr->n_ports); +- new_ports[lr->n_ports] = CONST_CAST(struct nbrec_logical_router_port *, +- lrp); +- nbrec_logical_router_set_ports(lr, new_ports, lr->n_ports + 1); +- free(new_ports); ++ nbrec_logical_router_update_ports_addvalue(lr, lrp); ++ ++ /* Updating runtime cache. */ ++ shash_add(&nbctx->lrp_to_lr_map, lrp->name, lr); + } + +-/* Removes logical router port 'lr->ports[idx]'. */ ++/* Removes logical router port 'lrp' from logical router 'lr'. */ + static void +-remove_lrp(const struct nbrec_logical_router *lr, size_t idx) ++remove_lrp(struct ctl_context *ctx, ++ const struct nbrec_logical_router *lr, ++ const struct nbrec_logical_router_port *lrp) + { +- const struct nbrec_logical_router_port *lrp = lr->ports[idx]; ++ struct nbctl_context *nbctx = nbctl_context_get(ctx); ++ ++ /* Updating runtime cache. */ ++ shash_find_and_delete(&nbctx->lrp_to_lr_map, lrp->name); + + /* First remove 'lrp' from the array of ports. This is what will + * actually cause the logical port to be deleted when the transaction is + * sent to the database server (due to garbage collection). */ +- struct nbrec_logical_router_port **new_ports +- = xmemdup(lr->ports, sizeof *new_ports * lr->n_ports); +- new_ports[idx] = new_ports[lr->n_ports - 1]; +- nbrec_logical_router_verify_ports(lr); +- nbrec_logical_router_set_ports(lr, new_ports, lr->n_ports - 1); +- free(new_ports); ++ nbrec_logical_router_update_ports_delvalue(lr, lrp); + + /* Delete 'lrp' from the IDL. This won't have a real effect on + * the database server (the IDL will suppress it in fact) but it +@@ -5090,18 +5128,13 @@ nbctl_lrp_del(struct ctl_context *ctx) + + /* Find the router that contains 'lrp', then delete it. */ + const struct nbrec_logical_router *lr; +- NBREC_LOGICAL_ROUTER_FOR_EACH (lr, ctx->idl) { +- for (size_t i = 0; i < lr->n_ports; i++) { +- if (lr->ports[i] == lrp) { +- remove_lrp(lr, i); +- return; +- } +- } +- } + +- /* Can't happen because of the database schema. */ +- ctl_error(ctx, "logical port %s is not part of any logical router", +- ctx->argv[1]); ++ error = lrp_to_lr(ctx, lrp, &lr); ++ if (error) { ++ ctx->error = error; ++ return; ++ } ++ remove_lrp(ctx, lr, lrp); + } + + /* Print a list of logical router ports. */ +@@ -5275,7 +5308,7 @@ fwd_group_to_logical_switch(struct ctl_context *ctx, + } + + const struct nbrec_logical_switch *ls; +- error = lsp_to_ls(ctx->idl, lsp, &ls); ++ error = lsp_to_ls(ctx, lsp, &ls); + if (error) { + ctx->error = error; + return NULL; +@@ -5350,7 +5383,7 @@ nbctl_fwd_group_add(struct ctl_context *ctx) + return; + } + if (lsp) { +- error = lsp_to_ls(ctx->idl, lsp, &ls); ++ error = lsp_to_ls(ctx, lsp, &ls); + if (error) { + ctx->error = error; + return; +@@ -5373,15 +5406,7 @@ nbctl_fwd_group_add(struct ctl_context *ctx) + nbrec_forwarding_group_set_liveness(fwd_group, true); + } + +- struct nbrec_forwarding_group **new_fwd_groups = +- xmalloc(sizeof(*new_fwd_groups) * (ls->n_forwarding_groups + 1)); +- memcpy(new_fwd_groups, ls->forwarding_groups, +- sizeof *new_fwd_groups * ls->n_forwarding_groups); +- new_fwd_groups[ls->n_forwarding_groups] = fwd_group; +- nbrec_logical_switch_set_forwarding_groups(ls, new_fwd_groups, +- (ls->n_forwarding_groups + 1)); +- free(new_fwd_groups); +- ++ nbrec_logical_switch_update_forwarding_groups_addvalue(ls, fwd_group); + } + + static void +@@ -5403,14 +5428,8 @@ nbctl_fwd_group_del(struct ctl_context *ctx) + + for (int i = 0; i < ls->n_forwarding_groups; ++i) { + if (!strcmp(ls->forwarding_groups[i]->name, fwd_group->name)) { +- struct nbrec_forwarding_group **new_fwd_groups = +- xmemdup(ls->forwarding_groups, +- sizeof *new_fwd_groups * ls->n_forwarding_groups); +- new_fwd_groups[i] = +- ls->forwarding_groups[ls->n_forwarding_groups - 1]; +- nbrec_logical_switch_set_forwarding_groups(ls, new_fwd_groups, +- (ls->n_forwarding_groups - 1)); +- free(new_fwd_groups); ++ nbrec_logical_switch_update_forwarding_groups_delvalue( ++ ls, ls->forwarding_groups[i]); + nbrec_forwarding_group_delete(fwd_group); + return; + } +@@ -5498,17 +5517,27 @@ struct ipv4_route { + const struct nbrec_logical_router_static_route *route; + }; + ++static int ++__ipv4_route_cmp(const struct ipv4_route *r1, const struct ipv4_route *r2) ++{ ++ if (r1->priority != r2->priority) { ++ return r1->priority > r2->priority ? -1 : 1; ++ } ++ if (r1->addr != r2->addr) { ++ return ntohl(r1->addr) < ntohl(r2->addr) ? -1 : 1; ++ } ++ return 0; ++} ++ + static int + ipv4_route_cmp(const void *route1_, const void *route2_) + { + const struct ipv4_route *route1p = route1_; + const struct ipv4_route *route2p = route2_; + +- if (route1p->priority != route2p->priority) { +- return route1p->priority > route2p->priority ? -1 : 1; +- } +- if (route1p->addr != route2p->addr) { +- return ntohl(route1p->addr) < ntohl(route2p->addr) ? -1 : 1; ++ int ret = __ipv4_route_cmp(route1p, route2p); ++ if (ret) { ++ return ret; + } + return route_cmp_details(route1p->route, route2p->route); + } +@@ -5519,16 +5548,22 @@ struct ipv6_route { + const struct nbrec_logical_router_static_route *route; + }; + ++static int ++__ipv6_route_cmp(const struct ipv6_route *r1, const struct ipv6_route *r2) ++{ ++ if (r1->priority != r2->priority) { ++ return r1->priority > r2->priority ? -1 : 1; ++ } ++ return memcmp(&r1->addr, &r2->addr, sizeof(r1->addr)); ++} ++ + static int + ipv6_route_cmp(const void *route1_, const void *route2_) + { + const struct ipv6_route *route1p = route1_; + const struct ipv6_route *route2p = route2_; + +- if (route1p->priority != route2p->priority) { +- return route1p->priority > route2p->priority ? -1 : 1; +- } +- int ret = memcmp(&route1p->addr, &route2p->addr, sizeof(route1p->addr)); ++ int ret = __ipv6_route_cmp(route1p, route2p); + if (ret) { + return ret; + } +@@ -5536,7 +5571,8 @@ ipv6_route_cmp(const void *route1_, const void *route2_) + } + + static void +-print_route(const struct nbrec_logical_router_static_route *route, struct ds *s) ++print_route(const struct nbrec_logical_router_static_route *route, ++ struct ds *s, bool ecmp) + { + + char *prefix = normalize_prefix_str(route->ip_prefix); +@@ -5558,6 +5594,19 @@ print_route(const struct nbrec_logical_router_static_route *route, struct ds *s) + if (smap_get(&route->external_ids, "ic-learned-route")) { + ds_put_format(s, " (learned)"); + } ++ ++ if (ecmp) { ++ ds_put_cstr(s, " ecmp"); ++ } ++ ++ if (smap_get_bool(&route->options, "ecmp_symmetric_reply", false)) { ++ ds_put_cstr(s, " ecmp-symmetric-reply"); ++ } ++ ++ if (route->bfd) { ++ ds_put_cstr(s, " bfd"); ++ } ++ + ds_put_char(s, '\n'); + } + +@@ -5623,7 +5672,16 @@ nbctl_lr_route_list(struct ctl_context *ctx) + ds_put_cstr(&ctx->output, "IPv4 Routes\n"); + } + for (int i = 0; i < n_ipv4_routes; i++) { +- print_route(ipv4_routes[i].route, &ctx->output); ++ bool ecmp = false; ++ if (i < n_ipv4_routes - 1 && ++ !__ipv4_route_cmp(&ipv4_routes[i], &ipv4_routes[i + 1])) { ++ ecmp = true; ++ } else if (i > 0 && ++ !__ipv4_route_cmp(&ipv4_routes[i], ++ &ipv4_routes[i - 1])) { ++ ecmp = true; ++ } ++ print_route(ipv4_routes[i].route, &ctx->output, ecmp); + } + + if (n_ipv6_routes) { +@@ -5631,7 +5689,16 @@ nbctl_lr_route_list(struct ctl_context *ctx) + n_ipv4_routes ? "\n" : ""); + } + for (int i = 0; i < n_ipv6_routes; i++) { +- print_route(ipv6_routes[i].route, &ctx->output); ++ bool ecmp = false; ++ if (i < n_ipv6_routes - 1 && ++ !__ipv6_route_cmp(&ipv6_routes[i], &ipv6_routes[i + 1])) { ++ ecmp = true; ++ } else if (i > 0 && ++ !__ipv6_route_cmp(&ipv6_routes[i], ++ &ipv6_routes[i - 1])) { ++ ecmp = true; ++ } ++ print_route(ipv6_routes[i].route, &ctx->output, ecmp); + } + + free(ipv4_routes); +@@ -6007,17 +6074,7 @@ cmd_ha_ch_grp_add_chassis(struct ctl_context *ctx) + nbrec_ha_chassis_set_chassis_name(ha_chassis, chassis_name); + nbrec_ha_chassis_set_priority(ha_chassis, priority); + +- nbrec_ha_chassis_group_verify_ha_chassis(ha_ch_grp); +- +- struct nbrec_ha_chassis **new_ha_chs = +- xmalloc(sizeof *new_ha_chs * (ha_ch_grp->n_ha_chassis + 1)); +- nullable_memcpy(new_ha_chs, ha_ch_grp->ha_chassis, +- sizeof *new_ha_chs * ha_ch_grp->n_ha_chassis); +- new_ha_chs[ha_ch_grp->n_ha_chassis] = +- CONST_CAST(struct nbrec_ha_chassis *, ha_chassis); +- nbrec_ha_chassis_group_set_ha_chassis(ha_ch_grp, new_ha_chs, +- ha_ch_grp->n_ha_chassis + 1); +- free(new_ha_chs); ++ nbrec_ha_chassis_group_update_ha_chassis_addvalue(ha_ch_grp, ha_chassis); + } + + static void +@@ -6032,11 +6089,9 @@ cmd_ha_ch_grp_remove_chassis(struct ctl_context *ctx) + + const char *chassis_name = ctx->argv[2]; + struct nbrec_ha_chassis *ha_chassis = NULL; +- size_t idx = 0; + for (size_t i = 0; i < ha_ch_grp->n_ha_chassis; i++) { + if (!strcmp(ha_ch_grp->ha_chassis[i]->chassis_name, chassis_name)) { + ha_chassis = ha_ch_grp->ha_chassis[i]; +- idx = i; + break; + } + } +@@ -6047,14 +6102,7 @@ cmd_ha_ch_grp_remove_chassis(struct ctl_context *ctx) + return; + } + +- struct nbrec_ha_chassis **new_ha_ch +- = xmemdup(ha_ch_grp->ha_chassis, +- sizeof *new_ha_ch * ha_ch_grp->n_ha_chassis); +- new_ha_ch[idx] = new_ha_ch[ha_ch_grp->n_ha_chassis - 1]; +- nbrec_ha_chassis_group_verify_ha_chassis(ha_ch_grp); +- nbrec_ha_chassis_group_set_ha_chassis(ha_ch_grp, new_ha_ch, +- ha_ch_grp->n_ha_chassis - 1); +- free(new_ha_ch); ++ nbrec_ha_chassis_group_update_ha_chassis_delvalue(ha_ch_grp, ha_chassis); + nbrec_ha_chassis_delete(ha_chassis); + } + +@@ -6231,7 +6279,7 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands, + struct ovsdb_idl_txn *txn; + enum ovsdb_idl_txn_status status; + struct ovsdb_symbol_table *symtab; +- struct ctl_context ctx; ++ struct nbctl_context ctx; + struct ctl_command *c; + struct shash_node *node; + int64_t next_cfg = 0; +@@ -6268,25 +6316,26 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands, + ds_init(&c->output); + c->table = NULL; + } +- ctl_context_init(&ctx, NULL, idl, txn, symtab, NULL); ++ nbctl_context_init(&ctx); ++ ctl_context_init(&ctx.base, NULL, idl, txn, symtab, NULL); + for (c = commands; c < &commands[n_commands]; c++) { +- ctl_context_init_command(&ctx, c); ++ ctl_context_init_command(&ctx.base, c); + if (c->syntax->run) { +- (c->syntax->run)(&ctx); ++ (c->syntax->run)(&ctx.base); + } +- if (ctx.error) { +- error = xstrdup(ctx.error); +- ctl_context_done(&ctx, c); ++ if (ctx.base.error) { ++ error = xstrdup(ctx.base.error); ++ ctl_context_done(&ctx.base, c); + goto out_error; + } +- ctl_context_done_command(&ctx, c); ++ ctl_context_done_command(&ctx.base, c); + +- if (ctx.try_again) { +- ctl_context_done(&ctx, NULL); ++ if (ctx.base.try_again) { ++ ctl_context_done(&ctx.base, NULL); + goto try_again; + } + } +- ctl_context_done(&ctx, NULL); ++ ctl_context_done(&ctx.base, NULL); + + SHASH_FOR_EACH (node, &symtab->sh) { + struct ovsdb_symbol *symbol = node->data; +@@ -6317,14 +6366,14 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands, + if (status == TXN_UNCHANGED || status == TXN_SUCCESS) { + for (c = commands; c < &commands[n_commands]; c++) { + if (c->syntax->postprocess) { +- ctl_context_init(&ctx, c, idl, txn, symtab, NULL); +- (c->syntax->postprocess)(&ctx); +- if (ctx.error) { +- error = xstrdup(ctx.error); +- ctl_context_done(&ctx, c); ++ ctl_context_init(&ctx.base, c, idl, txn, symtab, NULL); ++ (c->syntax->postprocess)(&ctx.base); ++ if (ctx.base.error) { ++ error = xstrdup(ctx.base.error); ++ ctl_context_done(&ctx.base, c); + goto out_error; + } +- ctl_context_done(&ctx, c); ++ ctl_context_done(&ctx.base, c); + } + } + } +@@ -6412,6 +6461,7 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands, + done: ; + } + ++ nbctl_context_destroy(&ctx); + ovsdb_symbol_table_destroy(symtab); + ovsdb_idl_txn_destroy(txn); + the_idl_txn = NULL; +@@ -6429,6 +6479,7 @@ out_error: + ovsdb_idl_txn_destroy(txn); + the_idl_txn = NULL; + ++ nbctl_context_destroy(&ctx); + ovsdb_symbol_table_destroy(symtab); + return error; + } +@@ -6561,7 +6612,7 @@ static const struct ctl_command_syntax nbctl_commands[] = { + /* logical router route commands. */ + { "lr-route-add", 3, 4, "ROUTER PREFIX NEXTHOP [PORT]", NULL, + nbctl_lr_route_add, NULL, "--may-exist,--ecmp,--ecmp-symmetric-reply," +- "--policy=", RW }, ++ "--policy=,--bfd?", RW }, + { "lr-route-del", 1, 4, "ROUTER [PREFIX [NEXTHOP [PORT]]]", NULL, + nbctl_lr_route_del, NULL, "--if-exists,--policy=", RW }, + { "lr-route-list", 1, 1, "ROUTER", NULL, nbctl_lr_route_list, NULL, +@@ -6588,7 +6639,7 @@ static const struct ctl_command_syntax nbctl_commands[] = { + nbctl_lr_nat_set_ext_ips, NULL, "--is-exempted", RW}, + /* load balancer commands. */ + { "lb-add", 3, 4, "LB VIP[:PORT] IP[:PORT]... [PROTOCOL]", NULL, +- nbctl_lb_add, NULL, "--may-exist,--add-duplicate", RW }, ++ nbctl_lb_add, NULL, "--may-exist,--add-duplicate,--reject,--event", RW }, + { "lb-del", 1, 2, "LB [VIP]", NULL, nbctl_lb_del, NULL, + "--if-exists", RW }, + { "lb-list", 0, 1, "[LB]", NULL, nbctl_lb_list, NULL, "", RO }, +diff --git a/utilities/ovn-sbctl.c b/utilities/ovn-sbctl.c +index 0a1b9ffdc..c38e8ec3b 100644 +--- a/utilities/ovn-sbctl.c ++++ b/utilities/ovn-sbctl.c +@@ -526,6 +526,7 @@ pre_get_info(struct ctl_context *ctx) + ovsdb_idl_add_column(ctx->idl, &sbrec_port_binding_col_tunnel_key); + ovsdb_idl_add_column(ctx->idl, &sbrec_port_binding_col_chassis); + ovsdb_idl_add_column(ctx->idl, &sbrec_port_binding_col_datapath); ++ ovsdb_idl_add_column(ctx->idl, &sbrec_port_binding_col_up); + + ovsdb_idl_add_column(ctx->idl, &sbrec_logical_flow_col_logical_datapath); + ovsdb_idl_add_column(ctx->idl, &sbrec_logical_flow_col_logical_dp_group); +@@ -665,6 +666,7 @@ cmd_lsp_bind(struct ctl_context *ctx) + struct sbctl_chassis *sbctl_ch; + struct sbctl_port_binding *sbctl_bd; + char *lport_name, *ch_name; ++ bool up = true; + + /* port_binding must exist, chassis must exist! */ + lport_name = ctx->argv[1]; +@@ -683,6 +685,7 @@ cmd_lsp_bind(struct ctl_context *ctx) + } + } + sbrec_port_binding_set_chassis(sbctl_bd->bd_cfg, sbctl_ch->ch_cfg); ++ sbrec_port_binding_set_up(sbctl_bd->bd_cfg, &up, 1); + sbctl_context_invalidate_cache(ctx); + } + +@@ -699,6 +702,7 @@ cmd_lsp_unbind(struct ctl_context *ctx) + sbctl_bd = find_port_binding(sbctl_ctx, lport_name, must_exist); + if (sbctl_bd) { + sbrec_port_binding_set_chassis(sbctl_bd->bd_cfg, NULL); ++ sbrec_port_binding_set_up(sbctl_bd->bd_cfg, NULL, 0); + } + } + +diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c +index 6fad36512..fb88bc06c 100644 +--- a/utilities/ovn-trace.c ++++ b/utilities/ovn-trace.c +@@ -405,6 +405,7 @@ struct ovntrace_datapath { + size_t n_flows, allocated_flows; + + struct hmap mac_bindings; /* Contains "struct ovntrace_mac_binding"s. */ ++ struct hmap fdbs; /* Contains "struct ovntrace_fdb"s. */ + + bool has_local_l3gateway; + }; +@@ -453,12 +454,24 @@ struct ovntrace_mac_binding { + struct eth_addr mac; + }; + ++struct ovntrace_fdb { ++ struct hmap_node node; ++ uint16_t port_key; ++ struct eth_addr mac; ++}; ++ + static inline uint32_t + hash_mac_binding(uint16_t port_key, const struct in6_addr *ip) + { + return hash_bytes(ip, sizeof *ip, port_key); + } + ++static inline uint32_t ++hash_fdb(const struct eth_addr *mac) ++{ ++ return hash_bytes(mac, sizeof *mac, 0); ++} ++ + /* Every ovntrace_datapath, by southbound Datapath_Binding record UUID. */ + static struct hmap datapaths; + +@@ -478,6 +491,7 @@ static struct shash port_groups; + static struct hmap dhcp_opts; /* Contains "struct gen_opts_map"s. */ + static struct hmap dhcpv6_opts; /* Contains "struct gen_opts_map"s. */ + static struct hmap nd_ra_opts; /* Contains "struct gen_opts_map"s. */ ++static struct controller_event_options event_opts; + + static struct ovntrace_datapath * + ovntrace_datapath_find_by_sb_uuid(const struct uuid *sb_uuid) +@@ -517,6 +531,18 @@ ovntrace_datapath_find_by_name(const char *name) + return match; + } + ++static struct ovntrace_datapath * ++ovntrace_datapath_find_by_key(uint32_t tunnel_key) ++{ ++ struct ovntrace_datapath *dp; ++ HMAP_FOR_EACH (dp, sb_uuid_node, &datapaths) { ++ if (dp->tunnel_key == tunnel_key) { ++ return dp; ++ } ++ } ++ return NULL; ++} ++ + static const struct ovntrace_port * + ovntrace_port_find_by_key(const struct ovntrace_datapath *dp, + uint16_t tunnel_key) +@@ -597,6 +623,20 @@ ovntrace_mac_binding_find_mac_ip(const struct ovntrace_datapath *dp, + return NULL; + } + ++static const struct ovntrace_fdb * ++ovntrace_fdb_find(const struct ovntrace_datapath *dp, ++ const struct eth_addr *mac) ++{ ++ const struct ovntrace_fdb *fdb; ++ HMAP_FOR_EACH_WITH_HASH (fdb, node, hash_fdb(mac), ++ &dp->fdbs) { ++ if (eth_addr_equals(fdb->mac, *mac)) { ++ return fdb; ++ } ++ } ++ return NULL; ++} ++ + /* If 's' ends with a UUID, returns a copy of it with the UUID truncated to + * just the first 6 characters; otherwise, returns a copy of 's'. */ + static char * +@@ -637,7 +677,7 @@ read_datapaths(void) + + ovs_list_init(&dp->mcgroups); + hmap_init(&dp->mac_bindings); +- ++ hmap_init(&dp->fdbs); + hmap_insert(&datapaths, &dp->sb_uuid_node, uuid_hash(&dp->sb_uuid)); + } + } +@@ -901,10 +941,11 @@ parse_lflow_for_datapath(const struct sbrec_logical_flow *sblf, + .dhcp_opts = &dhcp_opts, + .dhcpv6_opts = &dhcpv6_opts, + .nd_ra_opts = &nd_ra_opts, ++ .controller_event_opts = &event_opts, + .pipeline = (!strcmp(sblf->pipeline, "ingress") + ? OVNACT_P_INGRESS + : OVNACT_P_EGRESS), +- .n_tables = 24, ++ .n_tables = LOG_PIPELINE_LEN, + .cur_ltable = sblf->table_id, + }; + uint64_t stub[1024 / 8]; +@@ -1006,6 +1047,8 @@ read_gen_opts(void) + + hmap_init(&nd_ra_opts); + nd_ra_opts_init(&nd_ra_opts); ++ ++ controller_event_opts_init(&event_opts); + } + + static void +@@ -1049,6 +1092,30 @@ read_mac_bindings(void) + } + } + ++static void ++read_fdbs(void) ++{ ++ const struct sbrec_fdb *fdb; ++ SBREC_FDB_FOR_EACH (fdb, ovnsb_idl) { ++ struct eth_addr mac; ++ if (!eth_addr_from_string(fdb->mac, &mac)) { ++ VLOG_WARN("%s: bad Ethernet address", fdb->mac); ++ continue; ++ } ++ ++ struct ovntrace_datapath *dp = ++ ovntrace_datapath_find_by_key(fdb->dp_key); ++ if (!dp) { ++ continue; ++ } ++ ++ struct ovntrace_fdb *fdb_t = xmalloc(sizeof *fdb_t); ++ fdb_t->mac = mac; ++ fdb_t->port_key = fdb->port_key; ++ hmap_insert(&dp->fdbs, &fdb_t->node, hash_fdb(&mac)); ++ } ++} ++ + static void + read_db(void) + { +@@ -1060,6 +1127,7 @@ read_db(void) + read_gen_opts(); + read_flows(); + read_mac_bindings(); ++ read_fdbs(); + } + + static const struct ovntrace_port * +@@ -1116,6 +1184,11 @@ ovntrace_lookup_port(const void *dp_, const char *port_name, + return true; + } + ++ if (!strcmp(port_name, "none")) { ++ *portp = 0; ++ return true; ++ } ++ + const struct ovntrace_port *port = ovntrace_port_lookup_by_name(port_name); + if (port) { + if (port->dp == dp) { +@@ -1802,6 +1875,91 @@ execute_tcp_reset(const struct ovnact_nest *on, + execute_tcp6_reset(on, dp, uflow, table_id, loopback, pipeline, super); + } + } ++ ++static void ++execute_sctp4_abort(const struct ovnact_nest *on, ++ const struct ovntrace_datapath *dp, ++ const struct flow *uflow, uint8_t table_id, ++ bool loopback, enum ovnact_pipeline pipeline, ++ struct ovs_list *super) ++{ ++ struct flow sctp_flow = *uflow; ++ ++ /* Update fields for TCP SCTP. */ ++ if (loopback) { ++ sctp_flow.dl_dst = uflow->dl_src; ++ sctp_flow.dl_src = uflow->dl_dst; ++ sctp_flow.nw_dst = uflow->nw_src; ++ sctp_flow.nw_src = uflow->nw_dst; ++ } else { ++ sctp_flow.dl_dst = uflow->dl_dst; ++ sctp_flow.dl_src = uflow->dl_src; ++ sctp_flow.nw_dst = uflow->nw_dst; ++ sctp_flow.nw_src = uflow->nw_src; ++ } ++ sctp_flow.nw_proto = IPPROTO_SCTP; ++ sctp_flow.nw_ttl = 255; ++ sctp_flow.tp_src = uflow->tp_src; ++ sctp_flow.tp_dst = uflow->tp_dst; ++ ++ struct ovntrace_node *node = ovntrace_node_append( ++ super, OVNTRACE_NODE_TRANSFORMATION, "sctp_abort"); ++ ++ trace_actions(on->nested, on->nested_len, dp, &sctp_flow, ++ table_id, pipeline, &node->subs); ++} ++ ++static void ++execute_sctp6_abort(const struct ovnact_nest *on, ++ const struct ovntrace_datapath *dp, ++ const struct flow *uflow, uint8_t table_id, ++ bool loopback, enum ovnact_pipeline pipeline, ++ struct ovs_list *super) ++{ ++ struct flow sctp_flow = *uflow; ++ ++ /* Update fields for SCTP. */ ++ if (loopback) { ++ sctp_flow.dl_dst = uflow->dl_src; ++ sctp_flow.dl_src = uflow->dl_dst; ++ sctp_flow.ipv6_dst = uflow->ipv6_src; ++ sctp_flow.ipv6_src = uflow->ipv6_dst; ++ } else { ++ sctp_flow.dl_dst = uflow->dl_dst; ++ sctp_flow.dl_src = uflow->dl_src; ++ sctp_flow.ipv6_dst = uflow->ipv6_dst; ++ sctp_flow.ipv6_src = uflow->ipv6_src; ++ } ++ sctp_flow.nw_proto = IPPROTO_TCP; ++ sctp_flow.nw_ttl = 255; ++ sctp_flow.tp_src = uflow->tp_src; ++ sctp_flow.tp_dst = uflow->tp_dst; ++ sctp_flow.tcp_flags = htons(TCP_RST); ++ ++ struct ovntrace_node *node = ovntrace_node_append( ++ super, OVNTRACE_NODE_TRANSFORMATION, "sctp_abort"); ++ ++ trace_actions(on->nested, on->nested_len, dp, &sctp_flow, ++ table_id, pipeline, &node->subs); ++} ++ ++static void ++execute_sctp_abort(const struct ovnact_nest *on, ++ const struct ovntrace_datapath *dp, ++ const struct flow *uflow, uint8_t table_id, ++ bool loopback, enum ovnact_pipeline pipeline, ++ struct ovs_list *super) ++{ ++ if (get_dl_type(uflow) == htons(ETH_TYPE_IP)) { ++ execute_sctp4_abort(on, dp, uflow, table_id, loopback, ++ pipeline, super); ++ } else { ++ execute_sctp6_abort(on, dp, uflow, table_id, loopback, ++ pipeline, super); ++ } ++} ++ ++ + static void + execute_reject(const struct ovnact_nest *on, + const struct ovntrace_datapath *dp, +@@ -1810,6 +1968,8 @@ execute_reject(const struct ovnact_nest *on, + { + if (uflow->nw_proto == IPPROTO_TCP) { + execute_tcp_reset(on, dp, uflow, table_id, true, pipeline, super); ++ } else if (uflow->nw_proto == IPPROTO_SCTP) { ++ execute_sctp_abort(on, dp, uflow, table_id, true, pipeline, super); + } else { + if (get_dl_type(uflow) == htons(ETH_TYPE_IP)) { + execute_icmp4(on, dp, uflow, table_id, true, pipeline, super); +@@ -1938,6 +2098,66 @@ execute_lookup_mac_bind_ip(const struct ovnact_lookup_mac_bind_ip *bind, + mf_write_subfield_flow(&dst, &sv, uflow); + } + ++static void ++execute_lookup_fdb(const struct ovnact_lookup_fdb *lookup_fdb, ++ const struct ovntrace_datapath *dp, ++ struct flow *uflow, ++ struct ovs_list *super) ++{ ++ /* Get logical port number.*/ ++ struct mf_subfield port_sf = expr_resolve_field(&lookup_fdb->port); ++ ovs_assert(port_sf.n_bits == 32); ++ uint32_t port_key = mf_get_subfield(&port_sf, uflow); ++ ++ /* Get MAC. */ ++ struct mf_subfield mac_sf = expr_resolve_field(&lookup_fdb->mac); ++ ovs_assert(mac_sf.n_bits == 48); ++ union mf_subvalue mac_sv; ++ mf_read_subfield(&mac_sf, uflow, &mac_sv); ++ ++ const struct ovntrace_fdb *fdb_t ++ = ovntrace_fdb_find(dp, &mac_sv.mac); ++ ++ struct mf_subfield dst = expr_resolve_field(&lookup_fdb->dst); ++ uint8_t val = 0; ++ ++ if (fdb_t && fdb_t->port_key == port_key) { ++ val = 1; ++ ovntrace_node_append(super, OVNTRACE_NODE_ACTION, ++ "/* MAC lookup for "ETH_ADDR_FMT" found in " ++ "FDB. */", ETH_ADDR_ARGS(uflow->dl_dst)); ++ } else { ++ ovntrace_node_append(super, OVNTRACE_NODE_ACTION, ++ "/* lookup mac failed in mac learning table. */"); ++ } ++ union mf_subvalue sv = { .u8_val = val }; ++ mf_write_subfield_flow(&dst, &sv, uflow); ++} ++ ++static void ++execute_get_fdb(const struct ovnact_get_fdb *get_fdb, ++ const struct ovntrace_datapath *dp, ++ struct flow *uflow) ++{ ++ /* Get MAC. */ ++ struct mf_subfield mac_sf = expr_resolve_field(&get_fdb->mac); ++ ovs_assert(mac_sf.n_bits == 48); ++ union mf_subvalue mac_sv; ++ mf_read_subfield(&mac_sf, uflow, &mac_sv); ++ ++ const struct ovntrace_fdb *fdb_t ++ = ovntrace_fdb_find(dp, &mac_sv.mac); ++ ++ struct mf_subfield dst = expr_resolve_field(&get_fdb->dst); ++ uint32_t val = 0; ++ if (fdb_t) { ++ val = fdb_t->port_key; ++ } ++ ++ union mf_subvalue sv = { .be32_int = htonl(val) }; ++ mf_write_subfield_flow(&dst, &sv, uflow); ++} ++ + static void + execute_put_opts(const struct ovnact_put_opts *po, + const char *name, struct flow *uflow, +@@ -2503,6 +2723,11 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len, + false, pipeline, super); + break; + ++ case OVNACT_SCTP_ABORT: ++ execute_sctp_abort(ovnact_get_SCTP_ABORT(a), dp, uflow, table_id, ++ false, pipeline, super); ++ break; ++ + case OVNACT_OVNFIELD_LOAD: + execute_ovnfield_load(ovnact_get_OVNFIELD_LOAD(a), super); + break; +@@ -2540,6 +2765,20 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len, + break; + case OVNACT_DHCP6_REPLY: + break; ++ case OVNACT_BFD_MSG: ++ break; ++ ++ case OVNACT_PUT_FDB: ++ /* Nothing to do for tracing. */ ++ break; ++ ++ case OVNACT_GET_FDB: ++ execute_get_fdb(ovnact_get_GET_FDB(a), dp, uflow); ++ break; ++ ++ case OVNACT_LOOKUP_FDB: ++ execute_lookup_fdb(ovnact_get_LOOKUP_FDB(a), dp, uflow, super); ++ break; + } + } + ds_destroy(&s); diff --git a/SPECS/ovn2.13.spec b/SPECS/ovn2.13.spec index 105c0d4..1b900e5 100644 --- a/SPECS/ovn2.13.spec +++ b/SPECS/ovn2.13.spec @@ -1,6 +1,4 @@ -# Spec file for Open Virtual Network (OVN). - -# Copyright (C) 2020 Red Hat, Inc. +# Copyright (C) 2009, 2010, 2013, 2014 Nicira Networks, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright @@ -9,22 +7,12 @@ # # If tests have to be skipped while building, specify the '--without check' # option. For example: -# rpmbuild -bb --without check rhel/ovn-fedora.spec -# +# rpmbuild -bb --without check rhel/openvswitch-fedora.spec # This defines the base package name's version. -%define pkgver 2.13 -%define pkgname ovn%{pkgver} - -%define upstreamver 20.12 -#%%global commit0 7886ac9ed807d6ff942edde624a3f9331da7332a -#%%global date 20200217 -#%%global shortcommit0 %(c=%{commit0}; echo ${c:0:7}) - -# openvswitch commit -#%%global commit1 8ae6a5f98c3ad57d10220596054f6a0c4d6ea358 -#%%global shortcommit1 %(c=%{commit1}; echo ${c:0:7}) +%define pkgver 2.13 +%define pkgname ovn2.13 # If libcap-ng isn't available and there is no need for running OVS # as regular user, specify the '--without libcapng' @@ -33,12 +21,15 @@ # Enable PIE, bz#955181 %global _hardened_build 1 -# some distros (e.g: RHEL-7) don't define _rundir macro yet +# RHEL-7 doesn't define _rundir macro yet # Fedora 15 onwards uses /run as _rundir %if 0%{!?_rundir:1} %define _rundir /run %endif +# Build python2 (that provides python) and python3 subpackages on Fedora +# Build only python3 (that provides python) subpackage on RHEL8 +# Build only python subpackage on RHEL7 %if 0%{?rhel} > 7 || 0%{?fedora} # On RHEL8 Sphinx is included in buildroot %global external_sphinx 1 @@ -58,9 +49,9 @@ Name: %{pkgname} Summary: Open Virtual Network support Group: System Environment/Daemons -URL: http://www.openvswitch.org/ -Version: %{upstreamver}.0 -Release: 17%{?commit0:.%{date}git%{shortcommit0}}%{?dist} +URL: http://www.ovn.org/ +Version: 20.12.0 +Release: 85%{?commit0:.%{date}git%{shortcommit0}}%{?dist} Provides: openvswitch%{pkgver}-ovn-common = %{?epoch:%{epoch}:}%{version}-%{release} Obsoletes: openvswitch%{pkgver}-ovn-common < 2.11.0-1 @@ -68,26 +59,14 @@ Obsoletes: openvswitch%{pkgver}-ovn-common < 2.11.0-1 # lib/sflow*.[ch] files are SISSL License: ASL 2.0 and LGPLv2+ and SISSL -%if 0%{?commit0:1} -Source: https://github.com/ovn-org/ovn/archive/%{commit0}.tar.gz#/ovn-%{shortcommit0}.tar.gz -%else -# Upstream version is called 20.03, not 2.13. Once we switch to using the -# same versioning scheme for RH, we can reference %{version} here. -# XXX Are OVN releases listed on openvswitch.org? -Source: https://www.openvswitch.org/releases/ovn-%{version}.tar.gz -%endif - +# Always pull an upstream release, since this is what we rebase to. +Source: https://github.com/ovn-org/ovn/archive/v%{version}.tar.gz#/ovn-%{version}.tar.gz -#%define ovsver %{pkgver}.0 -%define ovsver 2.14.90 +%define ovscommit ac09cbfcb70ac6f443f039d5934448bd80f74493 +%define ovsshortcommit ac09cbf -%if 0%{?commit1:1} -Source10: https://github.com/openvswitch/ovs/archive/%{commit1}.tar.gz#/openvswitch-%{shortcommit1}.tar.gz -%define ovsdir ovs-%{commit1} -%else -Source10: https://openvswitch.org/releases/openvswitch-%{ovsver}.tar.gz -%define ovsdir openvswitch-%{ovsver} -%endif +Source10: https://github.com/openvswitch/ovs/archive/%{ovscommit}.tar.gz#/openvswitch-%{ovsshortcommit}.tar.gz +%define ovsdir ovs-%{ovscommit} %define docutilsver 0.12 %define pygmentsver 1.4 @@ -106,96 +85,7 @@ Source504: arm64-armv8a-linuxapp-gcc-config Source505: ppc_64-power8-linuxapp-gcc-config Source506: x86_64-native-linuxapp-gcc-config -# ovn-patches -# OVN backports (0 - 799) - -# Bug 1883957 -# Bug 1881826 -Patch01: 0001-northd-add-reject-action-for-lb-with-no-backends.patch -Patch02: 0002-nbctl-Cache-to-which-switch-or-router-particular-por.patch -Patch03: 0003-nbctl-Use-partial-set-updates-instead-of-re-setting-.patch -Patch04: 0004-nbctl-Remove-column-verification-for-partial-updates.patch -Patch05: 0005-northd-Add-ECMP-support-to-router-policies.patch -Patch06: 0006-osx-Fix-compilation-error.patch -Patch07: 0007-tests-Make-ovn-ovn-controller-incremental-processing.patch - -# Bug 1909650 -Patch10: 0001-ovn-trace-fix-trigger_event-warning.patch - -# Bug 1914304 -Patch20: 0001-binding-Do-not-clear-container-lbinding-pb-when-pare.patch - -# Bug 1847570 -Patch30: 0001-ovn-northd-Move-lswitch-ARP-ND-Responder-to-function.patch -Patch31: 0002-ovn-northd-Move-DHCP-Options-and-Response-to-a-funct.patch -Patch32: 0003-ovn-northd-Move-lswitch-DNS-lookup-and-response-to-a.patch -Patch33: 0004-ovn-northd-Move-DNS-and-DHCP-defaults-to-a-function.patch -Patch34: 0005-ovn-northd-Move-ARP-response-for-external-ports-to-a.patch -Patch35: 0006-ovn-northd-Move-broadcast-and-multicast-lookup-in-ls.patch -Patch36: 0007-ovn-northd-Move-destination-handling-into-functions.patch -Patch37: 0008-ovn-northd-split-build_lswitch_output_port_sec-into-.patch -Patch38: 0009-ovn-northd-Move-lrouter-arp-and-nd-datapath-processi.patch -Patch39: 0010-ovn-northd-Move-ipv4-input-to-a-function.patch -Patch40: 0011-ovn-northd-move-NAT-Defrag-and-lb-to-a-function.patch -Patch41: 0012-controller-introduce-BFD-tx-path-in-ovn-controller.patch -Patch42: 0013-action-introduce-handle_bfd_msg-action.patch -Patch43: 0014-controller-bfd-introduce-BFD-state-machine.patch -Patch44: 0015-bfd-support-demand-mode-on-rx-side.patch -Patch45: 0016-ovn-integrate-bfd-for-static-routes.patch - -# Bug 1915739 -Patch50: 0001-bfd-introduce-IPv6-support.patch - -# Bug 1918004 -Patch60: 0001-northd-Fix-ACL-fair-log-meters-for-Port_Group-ACLs.patch - -# Bug 1917533 -Patch70: 0001-binding-Fix-container-port-removal-from-local-bindin.patch -Patch71: 0002-binding-Always-delete-child-port-bindings-first.patch - -# Bug 1918582 -# Bug 1919055 -Patch80: 0001-northd-Fix-duplicate-logical-port-detection.patch - -# Bug 1919812 -Patch90: 0001-ovn-controller-Fix-wrong-conj_id-match-flows-when-ca.patch - -# Bug 1917979 -Patch100: 0001-ovn-ctl-Add-support-for-ovsdb-server-disable-file-co.patch - -# Bug 1857106 -Patch110: 0001-controller-fix-pkt_marking-with-IP-buffering.patch - -# Bug 1915958 -Patch120: 0001-ovn-nbctl-add-bfd-report-to-lr-route-list-command.patch -Patch121: 0002-ovn-nbctl-add-ecmp-ecmp-symmetric-reply-to-lr-route-.patch - -# Bug 1918422 -Patch130: 0001-northd-add-event-option-to-enable-controller_event-f.patch - -# Bug 1839102 -Patch140: 0001-ofctrl-Rename-nb_cfg-to-req_cfg.patch -Patch141: 0002-controller-Implement-a-generic-barrier-based-on-ofct.patch -Patch142: 0003-binding-Set-Logical_Switch_Port.up-when-all-OVS-flow.patch - -# Bug 1918997 -Patch150: 0001-ovn-nbctl-add-bfd-option-to-lr-route-add.patch - -# Bug 1926165 -Patch160: 0001-binding-Correctly-set-Port_Binding.up-for-container-.patch -Patch161: 0002-binding-Set-Port_Binding.up-only-if-supported.patch -Patch162: 0003-northd-Allow-backwards-compatibility-for-Logical_Swi.patch -Patch163: 0004-tests-Fix-Port_Binding-up-test.patch - -# Bug 1908540 -# Bug 1917875 -Patch170: 0001-Support-configuring-Load-Balancer-hairpin-source-IP.patch -Patch171: 0002-lflow-Use-learn-action-to-generate-LB-hairpin-reply-.patch - -# Bug 1927230 -Patch180: 0001-northd-Skip-matching-on-ct-flags-for-stateless-confi.patch - -# OpenvSwitch backports (800-) if required. +Patch: ovn-%{version}.patch # FIXME Sphinx is used to generate some manpages, unfortunately, on RHEL, it's # in the -optional repository and so we can't require it directly since RHV @@ -636,622 +526,331 @@ fi %{_unitdir}/ovn-controller-vtep.service %changelog -* Wed Feb 10 2021 Mark Michelson