From ac218b6e00ca97ddb54362d71435c4ea2d47cfb7 Mon Sep 17 00:00:00 2001
From: Numan Siddique <numans@ovn.org>
Date: Tue, 4 Aug 2020 12:49:34 +0530
Subject: [PATCH 17/22] ovn-northd: Don't send the pkt to conntrack if it is to
be routed in egress stage.
If there is a logical port 'P1' with the IP - 10.0.0.3 and a logical port 'P2' with
the IP 20.0.0.3 and if the logical switch of 'P1' has atleast one load balancer
associated with it and atleast one ACL with allow-related action associated with it.
Then for every packet from 'P1' to 'P2' after the TCP connection
is established we see a total of 4 recirculations in the datapath on the chassis
claiming 'P1'. This is because,
In the ingress logical switch pipeline, below logical flows are hit
- table=9 (ls_in_lb ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv), action=(reg0[2] = 1; next;)
- table=10(ls_in_stateful ), priority=100 , match=(reg0[2] == 1), action=(ct_lb;)
And in the egress logical switch pipeline, below logical flows are hit
- table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[0] = 1; next;)
- table=2 (ls_out_pre_stateful), priority=100 , match=(reg0[0] == 1), action=(ct_next;)
- table=3 (ls_out_lb ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv), action=(reg0[2] = 1; next;)
- table=7 (ls_out_stateful ), priority=100 , match=(reg0[2] == 1), action=(ct_lb;)
In the above example, when the packet enters the egress pipeline and since it needs to
enter the router pipeline, we can skip setting reg0[0] if outport is peer port of
logical router port. There is no need to send the packet to conntrack in this case.
This patch handles this case for router ports. Next patch in the series avoids sending to
conntrack with the action - ct_lb if the packet is not destined to the LB VIP.
With the present master for the above example, we see total of 4 recirculations on the
chassis claiming the lport 'P1'. With this patch we see only 2 recirculations.
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Numan Siddique <numans@ovn.org>
---
northd/ovn-northd.8.xml | 33 ++++++++++++++++++++++++++++++++-
northd/ovn-northd.c | 39 ++++++++++++++++++++++++++++++---------
2 files changed, 62 insertions(+), 10 deletions(-)
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index f35a035fd..e45d494e8 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -338,6 +338,15 @@
db="OVN_Northbound"/> table.
</p>
+ <p>
+ This table also has a priority-110 flow with the match
+ <code>inport == <var>I</var></code> for all logical switch
+ datapaths to move traffic to the next table. Where <var>I</var>
+ is the peer of a logical router port. This flow is added to
+ skip the connection tracking of packets which enter from
+ logical router datapath to logical switch datapath.
+ </p>
+
<h3>Ingress Table 5: Pre-stateful</h3>
<p>
@@ -505,7 +514,20 @@
<p>
It contains a priority-0 flow that simply moves traffic to the next
- table. For established connections a priority 100 flow matches on
+ table.
+ </p>
+
+ <p>
+ A priority-65535 flow with the match
+ <code>inport == <var>I</var></code> for all logical switch
+ datapaths to move traffic to the next table. Where <var>I</var>
+ is the peer of a logical router port. This flow is added to
+ skip the connection tracking of packets which enter from
+ logical router datapath to logical switch datapath.
+ </p>
+
+ <p>
+ For established connections a priority 65534 flow matches on
<code>ct.est && !ct.rel && !ct.new &&
!ct.inv</code> and sets an action <code>reg0[2] = 1; next;</code> to act
as a hint for table <code>Stateful</code> to send packets through
@@ -1342,6 +1364,15 @@ output;
db="OVN_Northbound"/> table.
</p>
+ <p>
+ This table also has a priority-110 flow with the match
+ <code>outport == <var>I</var></code> for all logical switch
+ datapaths to move traffic to the next table. Where <var>I</var>
+ is the peer of a logical router port. This flow is added to
+ skip the connection tracking of packets which will be entering
+ logical router datapath from logical switch datapath for routing.
+ </p>
+
<h3>Egress Table 2: Pre-stateful</h3>
<p>
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 1f5433d9d..7b534ce3c 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -4850,8 +4850,9 @@ build_lswitch_output_port_sec(struct hmap *ports, struct hmap *datapaths,
}
static void
-build_pre_acl_flows(struct ovn_datapath *od, struct ovn_port *op,
- struct hmap *lflows)
+skip_port_from_conntrack(struct ovn_datapath *od, struct ovn_port *op,
+ enum ovn_stage in_stage, enum ovn_stage out_stage,
+ uint16_t priority, struct hmap *lflows)
{
/* Can't use ct() for router ports. Consider the following configuration:
* lp1(10.0.0.2) on hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
@@ -4867,10 +4868,10 @@ build_pre_acl_flows(struct ovn_datapath *od, struct ovn_port *op,
ds_put_format(&match_in, "ip && inport == %s", op->json_key);
ds_put_format(&match_out, "ip && outport == %s", op->json_key);
- ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
+ ovn_lflow_add_with_hint(lflows, od, in_stage, priority,
ds_cstr(&match_in), "next;",
&op->nbsp->header_);
- ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
+ ovn_lflow_add_with_hint(lflows, od, out_stage, priority,
ds_cstr(&match_out), "next;",
&op->nbsp->header_);
@@ -4903,10 +4904,14 @@ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
* defragmentation, in order to match L4 headers. */
if (has_stateful) {
for (size_t i = 0; i < od->n_router_ports; i++) {
- build_pre_acl_flows(od, od->router_ports[i], lflows);
+ skip_port_from_conntrack(od, od->router_ports[i],
+ S_SWITCH_IN_PRE_ACL, S_SWITCH_OUT_PRE_ACL,
+ 110, lflows);
}
for (size_t i = 0; i < od->n_localnet_ports; i++) {
- build_pre_acl_flows(od, od->localnet_ports[i], lflows);
+ skip_port_from_conntrack(od, od->localnet_ports[i],
+ S_SWITCH_IN_PRE_ACL, S_SWITCH_OUT_PRE_ACL,
+ 110, lflows);
}
/* Ingress and Egress Pre-ACL Table (Priority 110).
@@ -5050,6 +5055,17 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
+ for (size_t i = 0; i < od->n_router_ports; i++) {
+ skip_port_from_conntrack(od, od->router_ports[i],
+ S_SWITCH_IN_PRE_LB, S_SWITCH_OUT_PRE_LB,
+ 110, lflows);
+ }
+ for (size_t i = 0; i < od->n_localnet_ports; i++) {
+ skip_port_from_conntrack(od, od->localnet_ports[i],
+ S_SWITCH_IN_PRE_LB, S_SWITCH_OUT_PRE_LB,
+ 110, lflows);
+ }
+
struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
bool vip_configured = false;
@@ -5725,13 +5741,18 @@ build_lb(struct ovn_datapath *od, struct hmap *lflows)
ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
if (od->nbs->load_balancer) {
- /* Ingress and Egress LB Table (Priority 65535).
+ for (size_t i = 0; i < od->n_router_ports; i++) {
+ skip_port_from_conntrack(od, od->router_ports[i],
+ S_SWITCH_IN_LB, S_SWITCH_OUT_LB,
+ UINT16_MAX, lflows);
+ }
+ /* Ingress and Egress LB Table (Priority 65534).
*
* Send established traffic through conntrack for just NAT. */
- ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX - 1,
"ct.est && !ct.rel && !ct.new && !ct.inv",
REGBIT_CONNTRACK_NAT" = 1; next;");
- ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX - 1,
"ct.est && !ct.rel && !ct.new && !ct.inv",
REGBIT_CONNTRACK_NAT" = 1; next;");
}
--
2.26.2