diff --git a/.ovn2.13.metadata b/.ovn2.13.metadata index 543429b..e30eba6 100644 --- a/.ovn2.13.metadata +++ b/.ovn2.13.metadata @@ -1,5 +1,5 @@ 002450621b33c5690060345b0aac25bc2426d675 SOURCES/docutils-0.12.tar.gz b5734e2bdf09d15d8950b3423dbecec9825d129a SOURCES/openvswitch-2.13.0.tar.gz -3fac0d814f52e9744195042bb9a5e93561eec9bc SOURCES/ovn-2.13.0.tar.gz +e2761d2e4a5fe7d0fc9f9900dda176ba1a2fc049 SOURCES/ovn-20.06.2.tar.gz d34f96421a86004aa5d26ecf975edefd09f948b1 SOURCES/Pygments-1.4.tar.gz 6beb30f18ffac3de7689b7fd63e9a8a7d9c8df3a SOURCES/Sphinx-1.1.3.tar.gz diff --git a/SOURCES/0001-Add-SCTP-support-to-load-balancers.patch b/SOURCES/0001-Add-SCTP-support-to-load-balancers.patch deleted file mode 100644 index 2c966fa..0000000 --- a/SOURCES/0001-Add-SCTP-support-to-load-balancers.patch +++ /dev/null @@ -1,364 +0,0 @@ -From 080b77af805d1c48f48c617c1fab095edcebaffd Mon Sep 17 00:00:00 2001 -From: Mark Michelson -Date: Mon, 9 Mar 2020 17:09:15 -0400 -Subject: [PATCH] Add SCTP support to load balancers. - -This allows for load balancers to use SCTP as a supported protocol in -addition to the already-supported UDP and TCP. - -With this patch, health checks are not supported for SCTP load -balancers. A test has been added to ensure that this is the case. Health -checks should be added for SCTP load balancers in the near future. When -that's done, the existing test can be updated to ensure that the SCTP -health check works properly. - -Signed-off-by: Mark Michelson -Acked-by: Numan Siddique -(cherry picked from upstream commit c4700eed17da8615107553aec82852a37d401821) - -Change-Id: Iecbc4a2329716aaa0f37a5f53acd5b5cfa74d133 ---- - lib/actions.c | 8 ++-- - northd/ovn-northd.c | 55 ++++++++++++++--------- - ovn-nb.ovsschema | 6 +-- - ovn-nb.xml | 10 ++--- - tests/ovn.at | 121 +++++++++++++++++++++++++++++++++++++++++++++++++- - utilities/ovn-nbctl.c | 8 ++-- - 6 files changed, 170 insertions(+), 38 deletions(-) - -diff --git a/lib/actions.c b/lib/actions.c -index f22acdd..6351db7 100644 ---- a/lib/actions.c -+++ b/lib/actions.c -@@ -1957,10 +1957,12 @@ validate_empty_lb_backends(struct action_context *ctx, - } - break; - case EMPTY_LB_PROTOCOL: -- if (strcmp(c->string, "tcp") && strcmp(c->string, "udp")) { -+ if (strcmp(c->string, "tcp") && -+ strcmp(c->string, "udp") && -+ strcmp(c->string, "sctp")) { - lexer_error(ctx->lexer, -- "Load balancer protocol '%s' is not 'tcp' or 'udp'", -- c->string); -+ "Load balancer protocol '%s' is not 'tcp', 'udp', " -+ "or 'sctp'", c->string); - return; - } - break; -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index 75c19df..bb68b8f 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -3173,10 +3173,21 @@ ovn_lb_create(struct northd_context *ctx, struct hmap *lbs, - lb->vips[n_vips].backend_ips = xstrdup(node->value); - - struct nbrec_load_balancer_health_check *lb_health_check = NULL; -- for (size_t i = 0; i < nbrec_lb->n_health_check; i++) { -- if (!strcmp(nbrec_lb->health_check[i]->vip, node->key)) { -- lb_health_check = nbrec_lb->health_check[i]; -- break; -+ if (nbrec_lb->protocol && !strcmp(nbrec_lb->protocol, "sctp")) { -+ if (nbrec_lb->n_health_check > 0) { -+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); -+ VLOG_WARN_RL(&rl, -+ "SCTP load balancers do not currently support " -+ "health checks. Not creating health checks for " -+ "load balancer " UUID_FMT, -+ UUID_ARGS(&nbrec_lb->header_.uuid)); -+ } -+ } else { -+ for (size_t i = 0; i < nbrec_lb->n_health_check; i++) { -+ if (!strcmp(nbrec_lb->health_check[i]->vip, node->key)) { -+ lb_health_check = nbrec_lb->health_check[i]; -+ break; -+ } - } - } - -@@ -5558,10 +5569,13 @@ build_lb_rules(struct ovn_datapath *od, struct hmap *lflows, struct ovn_lb *lb) - - const char *proto = NULL; - if (lb_vip->vip_port) { -- if (lb->nlb->protocol && !strcmp(lb->nlb->protocol, "udp")) { -- proto = "udp"; -- } else { -- proto = "tcp"; -+ proto = "tcp"; -+ if (lb->nlb->protocol) { -+ if (!strcmp(lb->nlb->protocol, "udp")) { -+ proto = "udp"; -+ } else if (!strcmp(lb->nlb->protocol, "sctp")) { -+ proto = "sctp"; -+ } - } - } - -@@ -7569,7 +7583,7 @@ static void - add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, - struct ds *match, struct ds *actions, int priority, - const char *lb_force_snat_ip, struct lb_vip *lb_vip, -- bool is_udp, struct nbrec_load_balancer *lb, -+ const char *proto, struct nbrec_load_balancer *lb, - struct shash *meter_groups, struct sset *nat_entries) - { - build_empty_lb_event_flow(od, lflows, lb_vip, lb, S_ROUTER_IN_DNAT, -@@ -7624,11 +7638,10 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, - * S_ROUTER_IN_DNAT stage. */ - struct ds unsnat_match = DS_EMPTY_INITIALIZER; - ds_put_format(&unsnat_match, "%s && %s.dst == %s && %s", -- ip_match, ip_match, lb_vip->vip, -- is_udp ? "udp" : "tcp"); -+ ip_match, ip_match, lb_vip->vip, proto); - if (lb_vip->vip_port) { -- ds_put_format(&unsnat_match, " && %s.dst == %d", -- is_udp ? "udp" : "tcp", lb_vip->vip_port); -+ ds_put_format(&unsnat_match, " && %s.dst == %d", proto, -+ lb_vip->vip_port); - } - - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, 120, -@@ -7654,7 +7667,7 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, - - if (backend->port) { - ds_put_format(&undnat_match, " && %s.src == %d) || ", -- is_udp ? "udp" : "tcp", backend->port); -+ proto, backend->port); - } else { - ds_put_cstr(&undnat_match, ") || "); - } -@@ -9203,15 +9216,13 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - - int prio = 110; - bool is_udp = nullable_string_is_equal(nb_lb->protocol, "udp"); -+ bool is_sctp = nullable_string_is_equal(nb_lb->protocol, -+ "sctp"); -+ const char *proto = is_udp ? "udp" : is_sctp ? "sctp" : "tcp"; - - if (lb_vip->vip_port) { -- if (is_udp) { -- ds_put_format(&match, " && udp && udp.dst == %d", -- lb_vip->vip_port); -- } else { -- ds_put_format(&match, " && tcp && tcp.dst == %d", -- lb_vip->vip_port); -- } -+ ds_put_format(&match, " && %s && %s.dst == %d", proto, -+ proto, lb_vip->vip_port); - prio = 120; - } - -@@ -9220,7 +9231,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - od->l3redirect_port->json_key); - } - add_router_lb_flow(lflows, od, &match, &actions, prio, -- lb_force_snat_ip, lb_vip, is_udp, -+ lb_force_snat_ip, lb_vip, proto, - nb_lb, meter_groups, &nat_entries); - } - } -diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema -index 843e979..ea6f4e3 100644 ---- a/ovn-nb.ovsschema -+++ b/ovn-nb.ovsschema -@@ -1,7 +1,7 @@ - { - "name": "OVN_Northbound", -- "version": "5.20.0", -- "cksum": "2846067333 25243", -+ "version": "5.20.1", -+ "cksum": "721375950 25251", - "tables": { - "NB_Global": { - "columns": { -@@ -168,7 +168,7 @@ - "min": 0, "max": "unlimited"}}, - "protocol": { - "type": {"key": {"type": "string", -- "enum": ["set", ["tcp", "udp"]]}, -+ "enum": ["set", ["tcp", "udp", "sctp"]]}, - "min": 0, "max": 1}}, - "health_check": {"type": { - "key": {"type": "uuid", -diff --git a/ovn-nb.xml b/ovn-nb.xml -index 4a422bb..f7ba9c3 100644 ---- a/ovn-nb.xml -+++ b/ovn-nb.xml -@@ -1458,11 +1458,11 @@ - - -

-- Valid protocols are tcp or udp. This column -- is useful when a port number is provided as part of the -- vips column. If this column is empty and a port number -- is provided as part of vips column, OVN assumes the -- protocol to be tcp. -+ Valid protocols are tcp, udp, or -+ sctp. This column is useful when a port number is -+ provided as part of the vips column. If this column is -+ empty and a port number is provided as part of vips -+ column, OVN assumes the protocol to be tcp. -

-
- -diff --git a/tests/ovn.at b/tests/ovn.at -index 1402fae..9f3d9d3 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -1423,8 +1423,8 @@ trigger_event(event = "empty_lb_backends", meter="event-elb" vip = "10.0.0.1:80" - encodes as controller(userdata=00.00.00.0f.00.00.00.00.00.00.00.00.00.01.00.0b.31.30.2e.30.2e.30.2e.31.3a.38.30.00.02.00.03.74.63.70.00.03.00.24.31.32.33.34.35.36.37.38.2d.61.62.63.64.2d.39.38.37.36.2d.66.65.64.63.2d.31.31.31.31.39.66.38.65.37.64.36.63,meter_id=5) - - # Testing invalid vip results in extra error messages from socket-util.c --trigger_event(event = "empty_lb_backends", vip = "10.0.0.1:80", protocol = "sctp", load_balancer = "12345678-abcd-9876-fedc-11119f8e7d6c"); -- Load balancer protocol 'sctp' is not 'tcp' or 'udp' -+trigger_event(event = "empty_lb_backends", vip = "10.0.0.1:80", protocol = "aarp", load_balancer = "12345678-abcd-9876-fedc-11119f8e7d6c"); -+ Load balancer protocol 'aarp' is not 'tcp', 'udp', or 'sctp' - trigger_event(event = "empty_lb_backends", vip = "10.0.0.1:80", protocol = "tcp", load_balancer = "bacon"); - Load balancer 'bacon' is not a UUID - -@@ -17894,6 +17894,123 @@ AT_CHECK([cat lflows.txt], [0], [dnl - OVN_CLEANUP([hv1], [hv2]) - AT_CLEANUP - -+AT_SETUP([ovn -- SCTP Load balancer health checks]) -+AT_KEYWORDS([lb sctp]) -+ -+# Currently this test just ensures that no service monitors get created when -+# An SCTP load balancer is configured to use health checks. Once SCTP load -+# balancers are modified to allow health checks, this test should be altered -+# to ensure the health check succeeds. -+ -+ovn_start -+ -+# Set up same network as previous health check test. As long as health checks -+# aren't allowed for SCTP load balancers, the network will not be used for -+# much. However, having the network in place will make it easy to alter when -+# health checks are allowed. -+ -+net_add n1 -+ -+sim_add hv1 -+as hv1 -+ovs-vsctl add-br br-phys -+ovn_attach n1 br-phys 192.168.0.1 -+ovs-vsctl -- add-port br-int hv1-vif1 -- \ -+ set interface hv1-vif1 external-ids:iface-id=sw0-p1 \ -+ options:tx_pcap=hv1/vif1-tx.pcap \ -+ options:rxq_pcap=hv1/vif1-rx.pcap \ -+ ofport-request=1 -+ovs-vsctl -- add-port br-int hv1-vif2 -- \ -+ set interface hv1-vif2 external-ids:iface-id=sw0-p2 \ -+ options:tx_pcap=hv1/vif2-tx.pcap \ -+ options:rxq_pcap=hv1/vif2-rx.pcap \ -+ ofport-request=2 -+ -+sim_add hv2 -+as hv2 -+ovs-vsctl add-br br-phys -+ovn_attach n1 br-phys 192.168.0.2 -+ovs-vsctl -- add-port br-int hv2-vif1 -- \ -+ set interface hv2-vif1 external-ids:iface-id=sw1-p1 \ -+ options:tx_pcap=hv2/vif1-tx.pcap \ -+ options:rxq_pcap=hv2/vif1-rx.pcap \ -+ ofport-request=1 -+ -+ovn-nbctl ls-add sw0 -+ -+ovn-nbctl lsp-add sw0 sw0-p1 -+ovn-nbctl lsp-set-addresses sw0-p1 "50:54:00:00:00:03 10.0.0.3" -+ovn-nbctl lsp-set-port-security sw0-p1 "50:54:00:00:00:03 10.0.0.3" -+ -+ovn-nbctl lsp-add sw0 sw0-p2 -+ovn-nbctl lsp-set-addresses sw0-p2 "50:54:00:00:00:04 10.0.0.4" -+ovn-nbctl lsp-set-port-security sw0-p2 "50:54:00:00:00:04 10.0.0.4" -+ -+# Create the second logical switch with one port -+ovn-nbctl ls-add sw1 -+ovn-nbctl lsp-add sw1 sw1-p1 -+ovn-nbctl lsp-set-addresses sw1-p1 "40:54:00:00:00:03 20.0.0.3" -+ovn-nbctl lsp-set-port-security sw1-p1 "40:54:00:00:00:03 20.0.0.3" -+ -+# Create a logical router and attach both logical switches -+ovn-nbctl lr-add lr0 -+ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 -+ovn-nbctl lsp-add sw0 sw0-lr0 -+ovn-nbctl lsp-set-type sw0-lr0 router -+ovn-nbctl lsp-set-addresses sw0-lr0 router -+ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 -+ -+ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 -+ovn-nbctl lsp-add sw1 sw1-lr0 -+ovn-nbctl lsp-set-type sw1-lr0 router -+ovn-nbctl lsp-set-addresses sw1-lr0 router -+ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1 -+ -+ovn-nbctl lb-add lb1 10.0.0.10:80 10.0.0.3:80,20.0.0.3:80 sctp -+ -+ovn-nbctl --wait=sb set load_balancer . ip_port_mappings:10.0.0.3=sw0-p1:10.0.0.2 -+ovn-nbctl --wait=sb set load_balancer . ip_port_mappings:20.0.0.3=sw1-p1:10.0.0.2 -+ -+ovn-nbctl --wait=sb -- --id=@hc create \ -+Load_Balancer_Health_Check vip="10.0.0.10\:80" -- add Load_Balancer . \ -+health_check @hc -+ -+ovn-nbctl --wait=sb ls-lb-add sw0 lb1 -+ovn-nbctl --wait=sb ls-lb-add sw1 lb1 -+ovn-nbctl --wait=sb lr-lb-add lr0 lb1 -+ -+ovn-nbctl ls-add public -+ovn-nbctl lrp-add lr0 lr0-public 00:00:20:20:12:13 172.168.0.100/24 -+ovn-nbctl lsp-add public public-lr0 -+ovn-nbctl lsp-set-type public-lr0 router -+ovn-nbctl lsp-set-addresses public-lr0 router -+ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public -+ -+# localnet port -+ovn-nbctl lsp-add public ln-public -+ovn-nbctl lsp-set-type ln-public localnet -+ovn-nbctl lsp-set-addresses ln-public unknown -+ovn-nbctl lsp-set-options ln-public network_name=public -+ -+# schedule the gw router port to a chassis. Change the name of the chassis -+ovn-nbctl --wait=hv lrp-set-gateway-chassis lr0-public hv1 20 -+ -+OVN_POPULATE_ARP -+ovn-nbctl --wait=hv sync -+ -+# And now for the anticlimax. We need to ensure that there is no -+# service monitor in the southbound db. -+ -+AT_CHECK([test 0 = `ovn-sbctl --bare --columns _uuid find \ -+service_monitor | sed '/^$/d' | wc -l`]) -+ -+# Let's also be sure the warning message about SCTP load balancers is -+# is in the ovn-northd log -+ -+AT_CHECK([test 1 = `grep -c "SCTP load balancers do not currently support health checks" northd/ovn-northd.log`]) -+ -+AT_CLEANUP -+ - AT_SETUP([ovn -- ARP/ND request broadcast limiting]) - ovn_start - -diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c -index e80058e..59abe00 100644 ---- a/utilities/ovn-nbctl.c -+++ b/utilities/ovn-nbctl.c -@@ -2734,9 +2734,11 @@ nbctl_lb_add(struct ctl_context *ctx) - /* Validate protocol. */ - lb_proto = ctx->argv[4]; - is_update_proto = true; -- if (strcmp(lb_proto, "tcp") && strcmp(lb_proto, "udp")) { -- ctl_error(ctx, "%s: protocol must be one of \"tcp\", \"udp\".", -- lb_proto); -+ if (strcmp(lb_proto, "tcp") && -+ strcmp(lb_proto, "udp") && -+ strcmp(lb_proto, "sctp")) { -+ ctl_error(ctx, "%s: protocol must be one of \"tcp\", \"udp\", " -+ " or \"sctp\".", lb_proto); - return; - } - } --- -1.8.3.1 - diff --git a/SOURCES/0001-Add-external_ids-column-for-tables-in-nb-schema.patch b/SOURCES/0001-Add-external_ids-column-for-tables-in-nb-schema.patch deleted file mode 100644 index 928a1f1..0000000 --- a/SOURCES/0001-Add-external_ids-column-for-tables-in-nb-schema.patch +++ /dev/null @@ -1,93 +0,0 @@ -From aff9c19d5fb9bb2cfba220d32ec68111d1adb1c5 Mon Sep 17 00:00:00 2001 -From: Tao YunXiang -Date: Wed, 11 Mar 2020 11:37:53 +0800 -Subject: [PATCH] Add external_ids column for tables in nb schema - -"Logical_Router_Policy" and "Forwarding_Group" tables doesn't have -"external_ids" column. I think it is better to add it, so CMS could -fill it with useful information. - -Author: Tao YunXiang -Co-authored-by: Liu Chang -Co-authored-by: Rong Yin -Signed-off-by: Tao YunXiang -Signed-off-by: Liu Chang -Signed-off-by: Rong Yin -Acked-by: Numan Siddique -Signed-off-by: Numan Siddique -(cherry picked from upstream commit 45de84bff0224e61847ff52d480c6500153ce699) - -Change-Id: I85671ce34238dd8989756b395d85785c25c8842c ---- - ovn-nb.ovsschema | 10 ++++++++-- - ovn-nb.xml | 12 ++++++++++++ - 2 files changed, 20 insertions(+), 2 deletions(-) - -diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema -index bbd6c25..843e979 100644 ---- a/ovn-nb.ovsschema -+++ b/ovn-nb.ovsschema -@@ -1,7 +1,7 @@ - { - "name": "OVN_Northbound", - "version": "5.20.0", -- "cksum": "987891875 24923", -+ "cksum": "2846067333 25243", - "tables": { - "NB_Global": { - "columns": { -@@ -125,6 +125,9 @@ - "vip": {"type": "string"}, - "vmac": {"type": "string"}, - "liveness": {"type": "boolean"}, -+ "external_ids": { -+ "type": {"key": "string", "value": "string", -+ "min": 0, "max": "unlimited"}}, - "child_port": {"type": {"key": "string", - "min": 1, "max": "unlimited"}}}, - "isRoot": false}, -@@ -366,7 +369,10 @@ - "action": {"type": { - "key": {"type": "string", - "enum": ["set", ["allow", "drop", "reroute"]]}}}, -- "nexthop": {"type": {"key": "string", "min": 0, "max": 1}}}, -+ "nexthop": {"type": {"key": "string", "min": 0, "max": 1}}, -+ "external_ids": { -+ "type": {"key": "string", "value": "string", -+ "min": 0, "max": "unlimited"}}}, - "isRoot": false}, - "NAT": { - "columns": { -diff --git a/ovn-nb.xml b/ovn-nb.xml -index f30cc9e..4a422bb 100644 ---- a/ovn-nb.xml -+++ b/ovn-nb.xml -@@ -1313,6 +1313,12 @@ - - List of child ports in the forwarding group. - -+ -+ -+ -+ See External IDs at the beginning of this document. -+ -+ - - - -@@ -2498,6 +2504,12 @@ - address of a connected router port or the IP address of a logical port. -

- -+ -+ -+ -+ See External IDs at the beginning of this document. -+ -+ -
- - --- -1.8.3.1 - diff --git a/SOURCES/0001-DNS-Make-DNS-lookups-case-insensitive.patch b/SOURCES/0001-DNS-Make-DNS-lookups-case-insensitive.patch deleted file mode 100644 index f4c91ae..0000000 --- a/SOURCES/0001-DNS-Make-DNS-lookups-case-insensitive.patch +++ /dev/null @@ -1,269 +0,0 @@ -From a60b2826bd4eb0144d2dc9b25b63b3a6ca5106c7 Mon Sep 17 00:00:00 2001 -From: Mark Michelson -Date: Mon, 20 Apr 2020 09:25:09 -0400 -Subject: [PATCH 1/2] DNS: Make DNS lookups case insensitive. - -From RFC 1035 Section 2.3.3: - -"For all parts of the DNS that are part of the official protocol, all -comparisons between character strings (e.g., labels, domain names, etc.) -are done in a case-insensitive manner." - -OVN was using case-sensitive lookups and therefore was not complying. -This change makes lookups case insensitive by storing lowercase record -names in the southbound database and converting incoming query names to -lowercase. - -Signed-off-by: Mark Michelson -Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1819069 -Reported-by: Jianlin Shi -Acked-by: Numan Siddique ---- - controller/pinctrl.c | 7 ++++- - lib/ovn-util.c | 15 +++++++++++ - lib/ovn-util.h | 5 ++++ - northd/ovn-northd.c | 15 ++++++++++- - ovn-sb.xml | 3 ++- - tests/ovn.at | 61 ++++++++++++++++++++++++++++++++------------ - 6 files changed, 87 insertions(+), 19 deletions(-) - -diff --git a/controller/pinctrl.c b/controller/pinctrl.c -index 8703641c2..8592d4e3f 100644 ---- a/controller/pinctrl.c -+++ b/controller/pinctrl.c -@@ -2368,7 +2368,12 @@ pinctrl_handle_dns_lookup( - struct dns_data *d = iter->data; - for (size_t i = 0; i < d->n_dps; i++) { - if (d->dps[i] == dp_key) { -- answer_ips = smap_get(&d->records, ds_cstr(&query_name)); -+ /* DNS records in SBDB are stored in lowercase. Convert to -+ * lowercase to perform case insensitive lookup -+ */ -+ char *query_name_lower = str_tolower(ds_cstr(&query_name)); -+ answer_ips = smap_get(&d->records, query_name_lower); -+ free(query_name_lower); - if (answer_ips) { - break; - } -diff --git a/lib/ovn-util.c b/lib/ovn-util.c -index 514e2489f..1b30c2e9a 100644 ---- a/lib/ovn-util.c -+++ b/lib/ovn-util.c -@@ -21,6 +21,7 @@ - #include "openvswitch/ofp-parse.h" - #include "ovn-nb-idl.h" - #include "ovn-sb-idl.h" -+#include - - VLOG_DEFINE_THIS_MODULE(ovn_util); - -@@ -550,3 +551,17 @@ ip46_equals(const struct v46_ip *addr1, const struct v46_ip *addr2) - (addr1->family == AF_INET ? addr1->ipv4 == addr2->ipv4 : - IN6_ARE_ADDR_EQUAL(&addr1->ipv6, &addr2->ipv6))); - } -+ -+char * -+str_tolower(const char *orig) -+{ -+ char *copy = xmalloc(strlen(orig) + 1); -+ char *p = copy; -+ -+ while (*orig) { -+ *p++ = tolower(*orig++); -+ } -+ *p = '\0'; -+ -+ return copy; -+} -diff --git a/lib/ovn-util.h b/lib/ovn-util.h -index 11238f61c..4076e8b9a 100644 ---- a/lib/ovn-util.h -+++ b/lib/ovn-util.h -@@ -124,4 +124,9 @@ struct v46_ip { - bool ip46_parse_cidr(const char *str, struct v46_ip *prefix, - unsigned int *plen); - bool ip46_equals(const struct v46_ip *addr1, const struct v46_ip *addr2); -+ -+/* Returns a lowercase copy of orig. -+ * Caller must free the returned string. -+ */ -+char *str_tolower(const char *orig); - #endif -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index f7d3988d7..515722c5d 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -10698,7 +10698,20 @@ sync_dns_entries(struct northd_context *ctx, struct hmap *datapaths) - dns_info->sb_dns, - (struct sbrec_datapath_binding **)dns_info->sbs, - dns_info->n_sbs); -- sbrec_dns_set_records(dns_info->sb_dns, &dns_info->nb_dns->records); -+ -+ /* DNS lookups are case-insensitive. Convert records to lowercase so -+ * we can do consistent lookups when DNS requests arrive -+ */ -+ struct smap lower_records = SMAP_INITIALIZER(&lower_records); -+ struct smap_node *node; -+ SMAP_FOR_EACH (node, &dns_info->nb_dns->records) { -+ smap_add_nocopy(&lower_records, xstrdup(node->key), -+ str_tolower(node->value)); -+ } -+ -+ sbrec_dns_set_records(dns_info->sb_dns, &lower_records); -+ -+ smap_destroy(&lower_records); - free(dns_info->sbs); - free(dns_info); - } -diff --git a/ovn-sb.xml b/ovn-sb.xml -index 72466b97e..5f8da534c 100644 ---- a/ovn-sb.xml -+++ b/ovn-sb.xml -@@ -3597,7 +3597,8 @@ tcp.flags = RST; - - Key-value pair of DNS records with DNS query name as the key - and a string of IP address(es) separated by comma or space as the -- value. -+ value. ovn-northd stores the DNS query name in all lowercase in order to -+ facilitate case-insensitive lookups. - -

Example: "vm1.ovn.org" = "10.0.0.4 aef0::4"

-
-diff --git a/tests/ovn.at b/tests/ovn.at -index 0f02e8144..b78637044 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -8328,6 +8328,12 @@ set_dns_params() { - # IPv4 address - 10.0.0.4 - expected_dns_answer=${query_name}00010001${ttl}00040a000004 - ;; -+ VM1) -+ # VM1.OVN.ORG -+ query_name=03564d31034f564e034f524700 -+ # IPv4 address - 10.0.0.4 -+ expected_dns_answer=${query_name}00010001${ttl}00040a000004 -+ ;; - vm2) - # vm2.ovn.org - query_name=03766d32036f766e036f726700 -@@ -8490,6 +8496,29 @@ reset_pcap_file hv1-vif2 hv1/vif2 - rm -f 1.expected - rm -f 2.expected - -+# Try vm1 again but an all-caps query name -+ -+set_dns_params VM1 -+src_ip=`ip_to_hex 10 0 0 6` -+dst_ip=`ip_to_hex 10 0 0 1` -+dns_reply=1 -+test_dns 2 f00000000002 f000000000f0 $src_ip $dst_ip $dns_reply $dns_req_data $dns_resp_data -+ -+# NXT_RESUMEs should be 3. -+OVS_WAIT_UNTIL([test 3 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -+ -+$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets -+cat 2.expected | cut -c -48 > expout -+AT_CHECK([cat 2.packets | cut -c -48], [0], [expout]) -+# Skipping the IPv4 checksum. -+cat 2.expected | cut -c 53- > expout -+AT_CHECK([cat 2.packets | cut -c 53-], [0], [expout]) -+ -+reset_pcap_file hv1-vif1 hv1/vif1 -+reset_pcap_file hv1-vif2 hv1/vif2 -+rm -f 1.expected -+rm -f 2.expected -+ - # Clear the query name options for ls1-lp2 - ovn-nbctl --wait=hv remove DNS $DNS1 records vm2.ovn.org - -@@ -8499,8 +8528,8 @@ dst_ip=`ip_to_hex 10 0 0 1` - dns_reply=0 - test_dns 1 f00000000001 f00000000002 $src_ip $dst_ip $dns_reply $dns_req_data - --# NXT_RESUMEs should be 3. --OVS_WAIT_UNTIL([test 3 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -+# NXT_RESUMEs should be 4. -+OVS_WAIT_UNTIL([test 4 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) - - $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif1-tx.pcap > 1.packets - AT_CHECK([cat 1.packets], [0], []) -@@ -8521,8 +8550,8 @@ dst_ip=`ip_to_hex 10 0 0 1` - dns_reply=0 - test_dns 2 f00000000002 f000000000f0 $src_ip $dst_ip $dns_reply $dns_req_data - --# NXT_RESUMEs should be 3 only. --OVS_WAIT_UNTIL([test 3 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -+# NXT_RESUMEs should be 4 only. -+OVS_WAIT_UNTIL([test 4 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) - - $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets - AT_CHECK([cat 2.packets], [0], []) -@@ -8542,8 +8571,8 @@ dst_ip=`ip_to_hex 10 0 0 1` - dns_reply=1 - test_dns 2 f00000000002 f000000000f0 $src_ip $dst_ip $dns_reply $dns_req_data $dns_resp_data - --# NXT_RESUMEs should be 4. --OVS_WAIT_UNTIL([test 4 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -+# NXT_RESUMEs should be 5. -+OVS_WAIT_UNTIL([test 5 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) - - $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets - cat 2.expected | cut -c -48 > expout -@@ -8564,8 +8593,8 @@ dst_ip=`ip_to_hex 10 0 0 1` - dns_reply=1 - test_dns 2 f00000000002 f000000000f0 $src_ip $dst_ip $dns_reply $dns_req_data $dns_resp_data - --# NXT_RESUMEs should be 5. --OVS_WAIT_UNTIL([test 5 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -+# NXT_RESUMEs should be 6. -+OVS_WAIT_UNTIL([test 6 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) - - $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets - cat 2.expected | cut -c -48 > expout -@@ -8586,8 +8615,8 @@ dst_ip=`ip_to_hex 10 0 0 1` - dns_reply=0 - test_dns 2 f00000000002 f000000000f0 $src_ip $dst_ip $dns_reply $dns_req_data - --# NXT_RESUMEs should be 6. --OVS_WAIT_UNTIL([test 6 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -+# NXT_RESUMEs should be 7. -+OVS_WAIT_UNTIL([test 7 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) - - $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets - AT_CHECK([cat 2.packets], [0], []) -@@ -8604,8 +8633,8 @@ dst_ip=`ip_to_hex 10 0 0 1` - dns_reply=0 - test_dns 2 f00000000002 f000000000f0 $src_ip $dst_ip $dns_reply $dns_req_data - --# NXT_RESUMEs should be 7. --OVS_WAIT_UNTIL([test 7 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -+# NXT_RESUMEs should be 8. -+OVS_WAIT_UNTIL([test 8 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) - - $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets - AT_CHECK([cat 2.packets], [0], []) -@@ -8624,8 +8653,8 @@ dst_ip=`ip_to_hex 10 0 0 1` - dns_reply=1 - test_dns 1 f00000000001 f000000000f0 $src_ip $dst_ip $dns_reply $dns_req_data $dns_resp_data - --# NXT_RESUMEs should be 8. --OVS_WAIT_UNTIL([test 8 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -+# NXT_RESUMEs should be 9. -+OVS_WAIT_UNTIL([test 9 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) - - $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif1-tx.pcap > 1.packets - cat 1.expected | cut -c -48 > expout -@@ -8646,8 +8675,8 @@ dst_ip=aef00000000000000000000000000001 - dns_reply=1 - test_dns6 1 f00000000001 f000000000f0 $src_ip $dst_ip $dns_reply $dns_req_data $dns_resp_data - --# NXT_RESUMEs should be 9. --OVS_WAIT_UNTIL([test 9 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -+# NXT_RESUMEs should be 10 -+OVS_WAIT_UNTIL([test 10 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) - - $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif1-tx.pcap > 1.packets - # Skipping the UDP checksum. --- -2.25.1 - diff --git a/SOURCES/0001-Disable-IPv6-prefix-reporting-if-IPv6-PD-is-disabled.patch b/SOURCES/0001-Disable-IPv6-prefix-reporting-if-IPv6-PD-is-disabled.patch deleted file mode 100644 index 5168b84..0000000 --- a/SOURCES/0001-Disable-IPv6-prefix-reporting-if-IPv6-PD-is-disabled.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 0b9d16670d5561d8300d2448cbd4686a3acdc57e Mon Sep 17 00:00:00 2001 -Message-Id: <0b9d16670d5561d8300d2448cbd4686a3acdc57e.1588608928.git.lorenzo.bianconi@redhat.com> -From: Lorenzo Bianconi -Date: Wed, 22 Apr 2020 16:13:03 +0200 -Subject: [PATCH 1/3] Disable IPv6 prefix reporting if IPv6 PD is disabled - -Disable IPv6 prefix delegation reporting in Logical_Router_Port table if -IPv6 prefix delegation state machine has been disabled for the related -logical router port - -Signed-off-by: Lorenzo Bianconi -Signed-off-by: Numan Siddique ---- - northd/ovn-northd.c | 28 ++++++++++++++++------------ - tests/system-ovn.at | 7 +++++++ - 2 files changed, 23 insertions(+), 12 deletions(-) - -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index bc1ea0bd3..431c511c3 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -2703,6 +2703,10 @@ ovn_update_ipv6_prefix(struct hmap *ports) - continue; - } - -+ if (!smap_get_bool(&op->nbrp->options, "prefix", false)) { -+ continue; -+ } -+ - char prefix[IPV6_SCAN_LEN + 6]; - unsigned aid; - const char *ipv6_pd_list = smap_get(&op->sb->options, -@@ -9364,22 +9368,22 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - } - - struct smap options; -+ smap_clone(&options, &op->sb->options); -+ - /* enable IPv6 prefix delegation */ - bool prefix_delegation = smap_get_bool(&op->nbrp->options, - "prefix_delegation", false); -- if (prefix_delegation) { -- smap_clone(&options, &op->sb->options); -- smap_add(&options, "ipv6_prefix_delegation", "true"); -- sbrec_port_binding_set_options(op->sb, &options); -- smap_destroy(&options); -- } -+ smap_add(&options, "ipv6_prefix_delegation", -+ prefix_delegation ? "true" : "false"); -+ sbrec_port_binding_set_options(op->sb, &options); - -- if (smap_get_bool(&op->nbrp->options, "prefix", false)) { -- smap_clone(&options, &op->sb->options); -- smap_add(&options, "ipv6_prefix", "true"); -- sbrec_port_binding_set_options(op->sb, &options); -- smap_destroy(&options); -- } -+ bool ipv6_prefix = smap_get_bool(&op->nbrp->options, -+ "prefix", false); -+ smap_add(&options, "ipv6_prefix", -+ ipv6_prefix ? "true" : "false"); -+ sbrec_port_binding_set_options(op->sb, &options); -+ -+ smap_destroy(&options); - - const char *address_mode = smap_get( - &op->nbrp->ipv6_ra_configs, "address_mode"); -diff --git a/tests/system-ovn.at b/tests/system-ovn.at -index 3b11cf92b..fa3b83cb1 100644 ---- a/tests/system-ovn.at -+++ b/tests/system-ovn.at -@@ -3946,6 +3946,13 @@ OVS_WAIT_UNTIL([ - test "${total_pkts}" = "1" - ]) - -+ovn-nbctl set logical_router_port rp-sw0 options:prefix=false -+ovn-nbctl clear logical_router_port rp-sw0 ipv6_prefix -+OVS_WAIT_WHILE([test "$(ovn-nbctl get logical_router_port rp-sw0 ipv6_prefix | cut -c3-16)" = "[2001:1db8:3333]"]) -+AT_CHECK([ovn-nbctl get logical_router_port rp-sw0 ipv6_prefix | cut -c3-16], [0], [dnl -+[] -+]) -+ - kill $(pidof tcpdump) - kill $(pidof ovn-controller) - --- -2.26.2 - diff --git a/SOURCES/0001-Fix-ACL-reject-action-for-UDP-packets.patch b/SOURCES/0001-Fix-ACL-reject-action-for-UDP-packets.patch deleted file mode 100644 index c32d975..0000000 --- a/SOURCES/0001-Fix-ACL-reject-action-for-UDP-packets.patch +++ /dev/null @@ -1,525 +0,0 @@ -From cfb0f49b644f2a253cc1365c219d1bb78c2cacac Mon Sep 17 00:00:00 2001 -From: Numan Siddique -Date: Fri, 24 Apr 2020 12:19:09 +0530 -Subject: [PATCH] Fix ACL reject action for UDP packets. - -The icmp packet generated by ovn-controller for reject ACL action -for non TCP packets is not getting delivered to the sender of -the original packet. This is because the icmp packets are skipped -from out_pre_lb/out_pre_acl logical switch egress pipeline and this -results in these icmp packets getting dropped in the ACL stage because -of invalid ct flags. This patch fixes this issue by removing those logical -flows. The IP checksum generated by ovn-controller is invalid. This patch -fixes this issue as well. - -Tested-by: Lorenzo Bianconi -Signed-off-by: Numan Siddique - -(cherry-picked from upstream commit f792b1a00b439a949e3b7aae4951f8513340c1a1) - -Change-Id: I9837991cf0981f57dc92d1309f0f453c800d7937 ---- - controller/pinctrl.c | 102 ++++++++++++++++++++++++++++--------------- - northd/ovn-northd.c | 22 +++++----- - tests/ovn.at | 46 +++++++++---------- - tests/system-ovn.at | 95 ++++++++++++++++++++++++++++++++-------- - 4 files changed, 177 insertions(+), 88 deletions(-) - -diff --git a/controller/pinctrl.c b/controller/pinctrl.c -index f0d63b9a6..9d5b7c3c0 100644 ---- a/controller/pinctrl.c -+++ b/controller/pinctrl.c -@@ -1465,7 +1465,7 @@ static void - pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow, - struct dp_packet *pkt_in, - const struct match *md, struct ofpbuf *userdata, -- bool include_orig_ip_datagram) -+ bool set_icmp_code) - { - /* This action only works for IP packets, and the switch should only send - * us IP packets this way, but check here just to be sure. */ -@@ -1512,46 +1512,51 @@ pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow, - packet_set_ipv4(&packet, ip_flow->nw_src, ip_flow->nw_dst, - ip_flow->nw_tos, 255); - -+ uint8_t icmp_code = 1; -+ if (set_icmp_code && in_ip->ip_proto == IPPROTO_UDP) { -+ icmp_code = 3; -+ } -+ - struct icmp_header *ih = dp_packet_put_zeros(&packet, sizeof *ih); - dp_packet_set_l4(&packet, ih); -- packet_set_icmp(&packet, ICMP4_DST_UNREACH, 1); -- -- if (include_orig_ip_datagram) { -- /* RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes -- * of header. MAY send more. -- * RFC says return as much as we can without exceeding 576 -- * bytes. -- * So, lets return as much as we can. */ -- -- /* Calculate available room to include the original IP + data. */ -- nh = dp_packet_l3(&packet); -- uint16_t room = 576 - (sizeof *eh + ntohs(nh->ip_tot_len)); -- if (in_ip_len > room) { -- in_ip_len = room; -- } -- dp_packet_put(&packet, in_ip, in_ip_len); -- -- /* dp_packet_put may reallocate the buffer. Get the l3 and l4 -- * header pointers again. */ -- nh = dp_packet_l3(&packet); -- ih = dp_packet_l4(&packet); -- uint16_t ip_total_len = ntohs(nh->ip_tot_len) + in_ip_len; -- nh->ip_tot_len = htons(ip_total_len); -- ih->icmp_csum = 0; -- ih->icmp_csum = csum(ih, sizeof *ih + in_ip_len); -- nh->ip_csum = 0; -- nh->ip_csum = csum(nh, sizeof *nh); -- } -+ packet_set_icmp(&packet, ICMP4_DST_UNREACH, icmp_code); -+ -+ /* RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes -+ * of header. MAY send more. -+ * RFC says return as much as we can without exceeding 576 -+ * bytes. -+ * So, lets return as much as we can. */ -+ -+ /* Calculate available room to include the original IP + data. */ -+ nh = dp_packet_l3(&packet); -+ uint16_t room = 576 - (sizeof *eh + ntohs(nh->ip_tot_len)); -+ if (in_ip_len > room) { -+ in_ip_len = room; -+ } -+ dp_packet_put(&packet, in_ip, in_ip_len); -+ -+ /* dp_packet_put may reallocate the buffer. Get the l3 and l4 -+ * header pointers again. */ -+ nh = dp_packet_l3(&packet); -+ ih = dp_packet_l4(&packet); -+ uint16_t ip_total_len = ntohs(nh->ip_tot_len) + in_ip_len; -+ nh->ip_tot_len = htons(ip_total_len); -+ ih->icmp_csum = 0; -+ ih->icmp_csum = csum(ih, sizeof *ih + in_ip_len); -+ nh->ip_csum = 0; -+ nh->ip_csum = csum(nh, sizeof *nh); -+ - } else { - struct ip6_hdr *nh = dp_packet_put_zeros(&packet, sizeof *nh); - struct icmp6_data_header *ih; - uint32_t icmpv6_csum; -+ struct ip6_hdr *in_ip = dp_packet_l3(pkt_in); - - eh->eth_type = htons(ETH_TYPE_IPV6); - dp_packet_set_l3(&packet, nh); - nh->ip6_vfc = 0x60; - nh->ip6_nxt = IPPROTO_ICMPV6; -- nh->ip6_plen = htons(sizeof(*nh) + ICMP6_DATA_HEADER_LEN); -+ nh->ip6_plen = htons(ICMP6_DATA_HEADER_LEN); - packet_set_ipv6(&packet, &ip_flow->ipv6_src, &ip_flow->ipv6_dst, - ip_flow->nw_tos, ip_flow->ipv6_label, 255); - -@@ -1559,15 +1564,42 @@ pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow, - dp_packet_set_l4(&packet, ih); - ih->icmp6_base.icmp6_type = ICMP6_DST_UNREACH; - ih->icmp6_base.icmp6_code = 1; -+ -+ if (set_icmp_code && in_ip->ip6_nxt == IPPROTO_UDP) { -+ ih->icmp6_base.icmp6_code = ICMP6_DST_UNREACH_NOPORT; -+ } - ih->icmp6_base.icmp6_cksum = 0; - -- uint8_t *data = dp_packet_put_zeros(&packet, sizeof *nh); -- memcpy(data, dp_packet_l3(pkt_in), sizeof(*nh)); -+ nh = dp_packet_l3(&packet); -+ -+ /* RFC 4443: 3.1. -+ * -+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -+ * | Type | Code | Checksum | -+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -+ * | Unused | -+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -+ * | As much of invoking packet | -+ * + as possible without the ICMPv6 packet + -+ * | exceeding the minimum IPv6 MTU [IPv6] | -+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -+ */ -+ -+ uint16_t room = 1280 - (sizeof *eh + sizeof *nh + -+ ICMP6_DATA_HEADER_LEN); -+ uint16_t in_ip_len = (uint16_t) sizeof *in_ip + ntohs(in_ip->ip6_plen); -+ if (in_ip_len > room) { -+ in_ip_len = room; -+ } -+ -+ dp_packet_put(&packet, in_ip, in_ip_len); -+ nh->ip6_plen = htons(ICMP6_DATA_HEADER_LEN + in_ip_len); - - icmpv6_csum = packet_csum_pseudoheader6(dp_packet_l3(&packet)); - ih->icmp6_base.icmp6_cksum = csum_finish( - csum_continue(icmpv6_csum, ih, -- sizeof(*nh) + ICMP6_DATA_HEADER_LEN)); -+ in_ip_len + ICMP6_DATA_HEADER_LEN)); - } - - if (ip_flow->vlans[0].tci & htons(VLAN_CFI)) { -@@ -2658,12 +2690,12 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg) - - case ACTION_OPCODE_ICMP: - pinctrl_handle_icmp(swconn, &headers, &packet, &pin.flow_metadata, -- &userdata, false); -+ &userdata, true); - break; - - case ACTION_OPCODE_ICMP4_ERROR: - pinctrl_handle_icmp(swconn, &headers, &packet, &pin.flow_metadata, -- &userdata, true); -+ &userdata, false); - break; - - case ACTION_OPCODE_TCP_RESET: -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index 4efe4a0c3..ec77ae1a8 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -4736,12 +4736,10 @@ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows) - * Not to do conntrack on ND and ICMP destination - * unreachable packets. */ - ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, -- "nd || nd_rs || nd_ra || icmp4.type == 3 || " -- "icmp6.type == 1 || " -+ "nd || nd_rs || nd_ra || " - "(udp && udp.src == 546 && udp.dst == 547)", "next;"); - ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, -- "nd || nd_rs || nd_ra || icmp4.type == 3 || " -- "icmp6.type == 1 || " -+ "nd || nd_rs || nd_ra || " - "(udp && udp.src == 546 && udp.dst == 547)", "next;"); - - /* Ingress and Egress Pre-ACL Table (Priority 100). -@@ -4853,12 +4851,10 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, - { - /* Do not send ND packets to conntrack */ - ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110, -- "nd || nd_rs || nd_ra || icmp4.type == 3 ||" -- "icmp6.type == 1", -+ "nd || nd_rs || nd_ra", - "next;"); - ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110, -- "nd || nd_rs || nd_ra || icmp4.type == 3 ||" -- "icmp6.type == 1", -+ "nd || nd_rs || nd_ra", - "next;"); - - /* Do not send service monitor packets to conntrack. */ -@@ -5037,9 +5033,10 @@ build_reject_acl_rules(struct ovn_datapath *od, struct hmap *lflows, - ds_put_format(&actions, "%s ", extra_actions->string); - } - ds_put_format(&actions, "reg0 = 0; " -- "eth.dst <-> eth.src; ip4.dst <-> ip4.src; " -- "icmp4 { outport <-> inport; %s };", -- ingress ? "output;" : "next(pipeline=ingress,table=0);"); -+ "icmp4 { eth.dst <-> eth.src; ip4.dst <-> ip4.src; " -+ "outport <-> inport; %s };", -+ ingress ? "next(pipeline=egress,table=5);" -+ : "next(pipeline=ingress,table=19);"); - ovn_lflow_add_with_hint(lflows, od, stage, - acl->priority + OVN_ACL_PRI_OFFSET, - ds_cstr(&match), ds_cstr(&actions), stage_hint); -@@ -5056,7 +5053,8 @@ build_reject_acl_rules(struct ovn_datapath *od, struct hmap *lflows, - ds_put_format(&actions, "reg0 = 0; icmp6 { " - "eth.dst <-> eth.src; ip6.dst <-> ip6.src; " - "outport <-> inport; %s };", -- ingress ? "output;" : "next(pipeline=ingress,table=0);"); -+ ingress ? "next(pipeline=egress,table=5);" -+ : "next(pipeline=ingress,table=19);"); - ovn_lflow_add_with_hint(lflows, od, stage, - acl->priority + OVN_ACL_PRI_OFFSET, - ds_cstr(&match), ds_cstr(&actions), stage_hint); -diff --git a/tests/ovn.at b/tests/ovn.at -index defe00a40..35415f2b6 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -11737,13 +11737,13 @@ test_ip_packet() { - - local ip_ttl=ff - local packet=${eth_dst}${eth_src}08004500001400004000${ip_ttl}01${ip_chksum}${ipv4_src}${ipv4_dst} -- -+ local orig_pkt_in_reply=4500001400004000${ip_ttl}01${ip_chksum}${ipv4_src}${ipv4_dst} - local reply_icmp_ttl=ff - local icmp_type_code_response=0301 - local icmp_data=00000000 - local reply_icmp_payload=${icmp_type_code_response}${exp_icmp_chksum}${icmp_data} -- local reply=${eth_src}${eth_dst}08004500001c00004000${reply_icmp_ttl}01${exp_ip_chksum}${ipv4_dst}${ipv4_src}${reply_icmp_payload} -- echo $reply >> vif$inport.expected -+ local reply=${eth_src}${eth_dst}08004500003000004000${reply_icmp_ttl}01${exp_ip_chksum}${ipv4_dst}${ipv4_src}${reply_icmp_payload} -+ echo $reply$orig_pkt_in_reply >> vif$inport.expected - - as hv$hv ovs-appctl netdev-dummy/receive vif$inport $packet - } -@@ -11760,7 +11760,7 @@ test_ipv6_packet() { - local ip6_hdr=6000000000083aff${ipv6_src}${ipv6_dst} - local packet=${eth_dst}${eth_src}86dd${ip6_hdr}0000000000000000 - -- local reply=${eth_src}${eth_dst}86dd6000000000303aff${ipv6_dst}${ipv6_src}0101${exp_icmp_chksum}00000000${ip6_hdr} -+ local reply=${eth_src}${eth_dst}86dd6000000000383aff${ipv6_dst}${ipv6_src}0101${exp_icmp_chksum}00000000${ip6_hdr}0000000000000000 - echo $reply >> vif$inport.expected - - as hv$hv ovs-appctl netdev-dummy/receive vif$inport $packet -@@ -11838,11 +11838,11 @@ ovn-nbctl --log acl-add sw0 from-lport 1000 "inport == \"sw0-p21\"" reject - # Allow some time for ovn-northd and ovn-controller to catch up. - ovn-nbctl --timeout=3 --wait=hv sync - --test_ip_packet 11 1 000000000011 000000000021 $(ip_to_hex 192 168 1 11) $(ip_to_hex 192 168 1 21) 0000 7d8d fcfe --test_ip_packet 21 2 000000000021 000000000011 $(ip_to_hex 192 168 1 21) $(ip_to_hex 192 168 1 11) 0000 7d8d fcfe --test_ip_packet 31 3 000000000031 000000000012 $(ip_to_hex 192 168 1 31) $(ip_to_hex 192 168 1 12) 0000 7d82 fcfe -+test_ip_packet 11 1 000000000011 000000000021 $(ip_to_hex 192 168 1 11) $(ip_to_hex 192 168 1 21) 0000 f85b f576 -+test_ip_packet 21 2 000000000021 000000000011 $(ip_to_hex 192 168 1 21) $(ip_to_hex 192 168 1 11) 0000 f85b f576 -+test_ip_packet 31 3 000000000031 000000000012 $(ip_to_hex 192 168 1 31) $(ip_to_hex 192 168 1 12) 0000 f850 f56b - --test_ipv6_packet 11 1 000000000011 000000000021 fe80000000000000020001fffe000001 fe80000000000000020001fffe000002 6183 -+test_ipv6_packet 11 1 000000000011 000000000021 fe80000000000000020001fffe000001 fe80000000000000020001fffe000002 617b - - test_tcp_syn_packet 11 1 000000000011 000000000021 $(ip_to_hex 192 168 1 11) $(ip_to_hex 192 168 1 21) 0000 8b40 3039 0000 b85f 70e4 - test_tcp_syn_packet 21 2 000000000021 000000000011 $(ip_to_hex 192 168 1 21) $(ip_to_hex 192 168 1 11) 0000 8b40 3039 0000 b85f 70e4 -@@ -12795,13 +12795,13 @@ test_ip_packet() { - - local ip_ttl=01 - local packet=${eth_dst}${eth_src}08004500001400004000${ip_ttl}01${ip_chksum}${ipv4_src}${ipv4_dst} -- -+ local orig_pkt_in_reply=4500001400004000${ip_ttl}01${ip_chksum}${ipv4_src}${ipv4_dst} - local reply_icmp_ttl=fe - local icmp_type_code_response=0b00 - local icmp_data=00000000 - local reply_icmp_payload=${icmp_type_code_response}${exp_icmp_chksum}${icmp_data} -- local reply=${eth_src}${eth_dst}08004500001c00004000${reply_icmp_ttl}01${exp_ip_chksum}${ip_router}${ipv4_src}${reply_icmp_payload} -- echo $reply >> vif$inport.expected -+ local reply=${eth_src}${eth_dst}08004500003000004000${reply_icmp_ttl}01${exp_ip_chksum}${ip_router}${ipv4_src}${reply_icmp_payload} -+ echo $reply$orig_pkt_in_reply >> vif$inport.expected - - as hv$hv ovs-appctl netdev-dummy/receive vif$inport $packet - } -@@ -12819,7 +12819,7 @@ test_ip6_packet() { - local ip6_hdr=6000000000151101${ipv6_src}${ipv6_dst} - local packet=${eth_dst}${eth_src}86dd${ip6_hdr}dbb8303900155bac6b646f65206676676e6d66720a - -- local reply=${eth_src}${eth_dst}86dd6000000000303afe${ipv6_router}${ipv6_src}0300${exp_icmp_chksum}00000000${ip6_hdr} -+ local reply=${eth_src}${eth_dst}86dd6000000000453afe${ipv6_router}${ipv6_src}0300${exp_icmp_chksum}00000000${ip6_hdr}dbb8303900155bac6b646f65206676676e6d66720a - echo $reply >> vif$inport.expected - - as hv$hv ovs-appctl netdev-dummy/receive vif$inport $packet -@@ -12861,8 +12861,8 @@ OVN_POPULATE_ARP - # allow some time for ovn-northd and ovn-controller to catch up. - ovn-nbctl --wait=hv sync - --test_ip_packet 1 1 000000000001 00000000ff01 $(ip_to_hex 192 168 1 1) $(ip_to_hex 192 168 2 1) $(ip_to_hex 192 168 1 254) 0000 7dae f4ff --test_ip6_packet 1 1 000000000001 00000000ff01 20010db8000100000000000000000011 20010db8000200000000000000000011 20010db8000100000000000000000001 d461 -+test_ip_packet 1 1 000000000001 00000000ff01 $(ip_to_hex 192 168 1 1) $(ip_to_hex 192 168 2 1) $(ip_to_hex 192 168 1 254) 0000 f87c ea96 -+test_ip6_packet 1 1 000000000001 00000000ff01 20010db8000100000000000000000011 20010db8000200000000000000000011 20010db8000100000000000000000001 1c22 - OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [vif1.expected]) - - OVN_CLEANUP([hv1], [hv2]) -@@ -12891,12 +12891,12 @@ test_ip_packet() { - - local ip_ttl=ff - local packet=${eth_dst}${eth_src}08004500001400004000${ip_ttl}${l4_proto}${ip_chksum}${ipv4_src}${ip_router} -- -+ local orig_pkt_in_reply=4500001400004000${ip_ttl}${l4_proto}${ip_chksum}${ipv4_src}${ip_router} - local reply_icmp_ttl=fe - local icmp_data=00000000 - local reply_icmp_payload=${exp_icmp_code}${exp_icmp_chksum}${icmp_data} -- local reply=${eth_src}${eth_dst}08004500001c00004000${reply_icmp_ttl}01${exp_ip_chksum}${ip_router}${ipv4_src}${reply_icmp_payload} -- echo $reply >> vif$inport.expected -+ local reply=${eth_src}${eth_dst}08004500003000004000${reply_icmp_ttl}01${exp_ip_chksum}${ip_router}${ipv4_src}${reply_icmp_payload} -+ echo $reply$orig_pkt_in_reply >> vif$inport.expected - - as hv$hv ovs-appctl netdev-dummy/receive vif$inport $packet - } -@@ -12962,7 +12962,9 @@ test_ip6_packet() { - local ip6_hdr=60000000${ipv6_len}${ipv6_proto}ff${ipv6_src}${ipv6_dst} - local packet=${eth_dst}${eth_src}86dd${ip6_hdr}${data} - -- local reply=${eth_src}${eth_dst}86dd6000000000303afe${ipv6_dst}${ipv6_src}${exp_icmp_code}${exp_icmp_chksum}00000000${ip6_hdr} -+ local reply_ip_len=`expr 48 + ${#data} / 2` -+ reply_ip_len=$(printf "%x" $reply_ip_len) -+ local reply=${eth_src}${eth_dst}86dd6000000000${reply_ip_len}3afe${ipv6_dst}${ipv6_src}${exp_icmp_code}${exp_icmp_chksum}00000000${ip6_hdr}${data} - echo $reply >> vif$inport.expected - - as hv$hv ovs-appctl netdev-dummy/receive vif$inport $packet -@@ -13004,13 +13006,13 @@ OVN_POPULATE_ARP - # allow some time for ovn-northd and ovn-controller to catch up. - ovn-nbctl --wait=hv sync - --test_ip_packet 1 1 000000000001 00000000ff01 $(ip_to_hex 192 168 1 1) $(ip_to_hex 192 168 1 254) 11 0000 7dae fcfc 0303 --test_ip_packet 1 1 000000000001 00000000ff01 $(ip_to_hex 192 168 1 1) $(ip_to_hex 192 168 1 254) 84 0000 7dae fcfd 0302 --test_ip6_packet 1 1 000000000001 00000000ff01 20010db8000100000000000000000011 20010db8000100000000000000000001 11 0015 dbb8303900155bac6b646f65206676676e6d66720a 0104 d570 -+test_ip_packet 1 1 000000000001 00000000ff01 $(ip_to_hex 192 168 1 1) $(ip_to_hex 192 168 1 254) 11 0000 f87c f485 0303 -+test_ip_packet 1 1 000000000001 00000000ff01 $(ip_to_hex 192 168 1 1) $(ip_to_hex 192 168 1 254) 84 0000 f87c f413 0302 -+test_ip6_packet 1 1 000000000001 00000000ff01 20010db8000100000000000000000011 20010db8000100000000000000000001 11 0015 dbb8303900155bac6b646f65206676676e6d66720a 0104 1d31 - OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [vif1.expected]) - - test_tcp_syn_packet 2 2 000000000002 00000000ff02 $(ip_to_hex 192 168 2 1) $(ip_to_hex 192 168 2 254) 0000 8b40 3039 0000 b680 6e05 --test_ip6_packet 2 2 000000000002 00000000ff02 20010db8000200000000000000000011 20010db8000200000000000000000001 84 0004 01020304 0103 627e -+test_ip6_packet 2 2 000000000002 00000000ff02 20010db8000200000000000000000011 20010db8000200000000000000000001 84 0004 01020304 0103 5e74 - test_tcp6_packet 2 2 000000000002 00000000ff02 20010db8000200000000000000000011 20010db8000200000000000000000001 8b40 3039 0000 98cd - OVN_CHECK_PACKETS([hv2/vif2-tx.pcap], [vif2.expected]) - -diff --git a/tests/system-ovn.at b/tests/system-ovn.at -index fa3b83cb1..117f1e835 100644 ---- a/tests/system-ovn.at -+++ b/tests/system-ovn.at -@@ -3697,7 +3697,7 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d - AT_CLEANUP - - --AT_SETUP([ovn -- ACL reject - TCP reset]) -+AT_SETUP([ovn -- ACL reject]) - AT_SKIP_IF([test $HAVE_NC = no]) - AT_KEYWORDS([lb]) - -@@ -3736,13 +3736,14 @@ ovn-nbctl acl-add pg0_drop from-lport 1001 "inport == @pg0_drop && ip" drop - ovn-nbctl acl-add pg0_drop to-lport 1001 "outport == @pg0_drop && ip" drop - - ovn-nbctl pg-add pg0 sw0-p1-rej sw0-p2-rej --ovn-nbctl acl-add pg0 from-lport 1002 "inport == @pg0 && ip4" allow-related -+ovn-nbctl acl-add pg0 from-lport 1002 "inport == @pg0 && ip" allow-related - ovn-nbctl --log acl-add pg0 from-lport 1004 "inport == @pg0 && ip && tcp && tcp.dst == 80" reject -+ovn-nbctl --log acl-add pg0 from-lport 1004 "inport == @pg0 && ip && udp && udp.dst == 90" reject - --ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && ip4.src == 0.0.0.0/0 && icmp4" allow-related - ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && ip4.src == 0.0.0.0/0 && tcp && tcp.dst == 82" allow-related - ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && ip4.src == 0.0.0.0/0 && udp && udp.dst == 82" allow-related - ovn-nbctl --log acl-add pg0 to-lport 1004 "inport == @pg0 && ip && tcp && tcp.dst == 84" reject -+ovn-nbctl --log acl-add pg0 to-lport 1004 "inport == @pg0 && ip && udp && udp.dst == 94" reject - - OVN_POPULATE_ARP - ovn-nbctl --wait=hv sync -@@ -3758,33 +3759,38 @@ ADD_VETH(sw0-p2-rej, sw0-p2-rej, br-int, "10.0.0.4/24", "50:54:00:00:00:04", \ - NS_CHECK_EXEC([sw0-p1-rej], [ip a a aef0::3/64 dev sw0-p1-rej], [0]) - NS_CHECK_EXEC([sw0-p2-rej], [ip a a aef0::4/64 dev sw0-p2-rej], [0]) - --# Capture packets in sw0-p1-rej. --NS_CHECK_EXEC([sw0-p1-rej], [tcpdump -n -c 2 -i sw0-p1-rej tcp port 80 > sw0-p1-rej-ip4.pcap &], [0]) - sleep 1 - --NS_CHECK_EXEC([sw0-p1-rej], [nc 10.0.0.4 80], [1], [], --[dnl --Ncat: Connection refused. --]) -+# Capture packets in sw0-p1-rej. -+NS_CHECK_EXEC([sw0-p1-rej], [tcpdump -n -c 4 -i sw0-p1-rej tcp > sw0-p1-rej-ip4.pcap &], [0]) -+ -+sleep 1 - - OVS_WAIT_UNTIL([ -- total=`cat sw0-p1-rej-ip4.pcap | wc -l` -- echo "total = $total" -- test "${total}" = "2" -+ ip netns exec sw0-p1-rej nc 10.0.0.4 80 2> r -+ res=$(cat r) -+ test "$res" = "Ncat: Connection refused." - ]) - - # Now send traffic to port 84 --NS_CHECK_EXEC([sw0-p1-rej], [nc 10.0.0.4 84], [1], [], --[dnl --Ncat: Connection refused. -+OVS_WAIT_UNTIL([ -+ ip netns exec sw0-p1-rej nc 10.0.0.4 84 2> r -+ res=$(cat r) -+ test "$res" = "Ncat: Connection refused." - ]) - --AT_CHECK([ -+OVS_WAIT_UNTIL([ - n_pkt=$(ovs-ofctl dump-flows br-int table=44 | grep -v n_packets=0 | \ - grep controller | grep tp_dst=84 -c) - test $n_pkt -eq 1 - ]) - -+OVS_WAIT_UNTIL([ -+ total=`cat sw0-p1-rej-ip4.pcap | wc -l` -+ echo "total = $total" -+ test "${total}" = "4" -+]) -+ - # Without this sleep, test case fails intermittently. - sleep 3 - -@@ -3792,17 +3798,68 @@ NS_CHECK_EXEC([sw0-p2-rej], [tcpdump -n -c 2 -i sw0-p2-rej tcp port 80 > sw0-p2- - - sleep 1 - --NS_CHECK_EXEC([sw0-p2-rej], [nc -6 aef0::3 80], [1], [], --[dnl --Ncat: Connection refused. -+OVS_WAIT_UNTIL([ -+ ip netns exec sw0-p2-rej nc -6 aef0::3 80 2> r -+ res=$(cat r) -+ test "$res" = "Ncat: Connection refused." - ]) - -+ - OVS_WAIT_UNTIL([ - total=`cat sw0-p2-rej-ip6.pcap | wc -l` - echo "total = $total" - test "${total}" = "2" - ]) - -+# Now test for IPv4 UDP. -+NS_CHECK_EXEC([sw0-p1-rej], [tcpdump -n -c 1 -i sw0-p1-rej udp port 90 > sw0-p1-rej-udp.pcap &], [0]) -+NS_CHECK_EXEC([sw0-p1-rej], [tcpdump -n -c 1 -i sw0-p1-rej icmp > sw0-p1-rej-icmp.pcap &], [0]) -+ -+echo "foo" > foo -+OVS_WAIT_UNTIL([ -+ ip netns exec sw0-p1-rej nc -u 10.0.0.4 90 < foo -+ c=$(cat sw0-p1-rej-icmp.pcap | grep \ -+"10.0.0.4 > 10.0.0.3: ICMP 10.0.0.4 udp port dnsix unreachable" | uniq | wc -l) -+ test $c -eq 1 -+]) -+ -+rm -f *.pcap -+ -+NS_CHECK_EXEC([sw0-p1-rej], [tcpdump -n -c 1 -i sw0-p1-rej udp port 94 > sw0-p1-rej-udp.pcap &], [0]) -+NS_CHECK_EXEC([sw0-p1-rej], [tcpdump -n -c 1 -i sw0-p1-rej icmp > sw0-p1-rej-icmp.pcap &], [0]) -+ -+OVS_WAIT_UNTIL([ -+ ip netns exec sw0-p1-rej nc -u 10.0.0.4 94 < foo -+ c=$(cat sw0-p1-rej-icmp.pcap | grep \ -+"10.0.0.4 > 10.0.0.3: ICMP 10.0.0.4 udp port objcall unreachable" | uniq | wc -l) -+ test $c -eq 1 -+]) -+ -+# Now test for IPv6 UDP. -+NS_CHECK_EXEC([sw0-p2-rej], [tcpdump -n -c 1 -i sw0-p2-rej udp port 90 > sw0-p2-rej-ip6-udp.pcap &], [0]) -+NS_CHECK_EXEC([sw0-p2-rej], [tcpdump -n -c 1 -i sw0-p2-rej icmp6 > sw0-p2-rej-icmp6.pcap &], [0]) -+ -+OVS_WAIT_UNTIL([ -+ ip netns exec sw0-p2-rej nc -u -6 aef0::3 90 < foo -+ c=$(cat sw0-p2-rej-icmp6.pcap | grep \ -+"IP6 aef0::3 > aef0::4: ICMP6, destination unreachable, unreachable port, \ -+aef0::3 udp port dnsix" | uniq | wc -l) -+ test $c -eq 1 -+]) -+ -+rm -f *.pcap -+ -+NS_CHECK_EXEC([sw0-p2-rej], [tcpdump -n -c 1 -i sw0-p2-rej udp port 94 > sw0-p2-rej-ip6-udp.pcap &], [0]) -+NS_CHECK_EXEC([sw0-p2-rej], [tcpdump -n -c 1 -i sw0-p2-rej icmp6 > sw0-p2-rej-icmp6.pcap &], [0]) -+ -+OVS_WAIT_UNTIL([ -+ ip netns exec sw0-p2-rej nc -u -6 aef0::3 94 < foo -+ c=$(cat sw0-p2-rej-icmp6.pcap | grep \ -+"IP6 aef0::3 > aef0::4: ICMP6, destination unreachable, unreachable port, \ -+aef0::3 udp port objcall" | uniq | wc -l) -+ test $c -eq 1 -+]) -+ - OVS_APP_EXIT_AND_WAIT([ovn-controller]) - - as ovn-sb --- -2.26.2 - diff --git a/SOURCES/0001-Fix-conntrack-entry-leaks-because-of-TCP-RST-packets.patch b/SOURCES/0001-Fix-conntrack-entry-leaks-because-of-TCP-RST-packets.patch deleted file mode 100644 index 2ec0cb6..0000000 --- a/SOURCES/0001-Fix-conntrack-entry-leaks-because-of-TCP-RST-packets.patch +++ /dev/null @@ -1,473 +0,0 @@ -From 685c685b0070731524869459f96b7b690e12ae74 Mon Sep 17 00:00:00 2001 -From: Numan Siddique -Date: Thu, 23 Apr 2020 18:08:48 +0530 -Subject: [PATCH] Fix conntrack entry leaks because of TCP RST packets not sent - to conntrack. - -The commit [1] - 28097d5adb95("Fix tcp_reset action handling") fixed an issue -with tcp_reset OVN action. In order to fix that issue, this commit added -logical flows to skip all the TCP RST packets from conntrack. -Ideally it should have skipped only the TCP RST packets generated by -ovn-controller from conntrack. Since all the TCP RST packets are -skipped from conntrack, the connections in conntrack remain in -ESTABLISHED state even if the client/server sends TCP RST to close the -connection. And these entries live for a long time and this is -causing performance issues as reported in the BZ. - -This patch reverts the logical flows added in [1] and modifies the inner -actions of tcp_reset in the ingress logical switch pipeline -from - "tcp_reset { outport <-> inport; output; }" -to "tcp_reset { output <-> inport; next(pipeline=egress,table=5); }". -This causes the packet to resubmit to the egress table ls_out_qos_mark -skipping the egress ACL stage. Prior to this packet, next action was -not allowing a resubmit from ingress to egress pipeline. This patch -relaxes this limitation. - -For the tcp_reset action in the egress logical switch pipeline, this patch -modifies the inner action -from - "tcp_reset { outport <-> inport; next(pipeline=ingress,table=0); }" -to - "tcp_reset { outport <-> inport; next(pipeline=ingress,table=19); }". -This causes the packet to enter the ingress table ls_in_l2_lkup. - -We don't see similar conntrack leaks with UDP. Although there is an issue -with the acl reject action for UDP packets. When ovn-controller generates icmp -destination unreachable packet, it doesn't get delivered. And the IP checksum is -incorrect in this packet. A follow up patch will fix these issues. - -[1] - 28097d5adb95("Fix tcp_reset action handling") - -Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1819785 -Co-Authored-by: Tim Rozet -Signed-off-by: Tim Rozet -Acked-by: Dumitru Ceara -Acked-by: Lorenzo Bianconi -Signed-off-by: Numan Siddique ---- - AUTHORS.rst | 1 + - lib/actions.c | 6 +- - northd/ovn-northd.8.xml | 8 ++ - northd/ovn-northd.c | 14 ++-- - ovn-sb.xml | 10 ++- - tests/automake.mk | 3 +- - tests/ovn.at | 6 +- - tests/system-ovn.at | 170 +++++++++++++++++++++++++++++++++++----- - tests/test-tcp-rst.py | 37 +++++++++ - 9 files changed, 217 insertions(+), 38 deletions(-) - create mode 100644 tests/test-tcp-rst.py - -diff --git a/AUTHORS.rst b/AUTHORS.rst -index 230e487f0..c80fc1bae 100644 ---- a/AUTHORS.rst -+++ b/AUTHORS.rst -@@ -355,6 +355,7 @@ Thomas F. Herbert thomasfherbert@gmail.com - Thomas Goirand zigo@debian.org - Thomas Graf tgraf@noironetworks.com - Thomas Lacroix thomas.lacroix@citrix.com -+Tim Rozet trozet@redhat.com - Timo Puha timox.puha@intel.com - Timothy Redaelli tredaelli@redhat.com - Todd Deshane deshantm@gmail.com -diff --git a/lib/actions.c b/lib/actions.c -index 02141af30..41a742064 100644 ---- a/lib/actions.c -+++ b/lib/actions.c -@@ -319,11 +319,7 @@ parse_NEXT(struct action_context *ctx) - } - } - -- if (pipeline == OVNACT_P_EGRESS && ctx->pp->pipeline == OVNACT_P_INGRESS) { -- lexer_error(ctx->lexer, -- "\"next\" action cannot advance from ingress to egress " -- "pipeline (use \"output\" action instead)"); -- } else if (table >= ctx->pp->n_tables) { -+ if (table >= ctx->pp->n_tables) { - lexer_error(ctx->lexer, - "\"next\" action cannot advance beyond table %d.", - ctx->pp->n_tables - 1); -diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml -index efcc4b7fc..d39e259f6 100644 ---- a/northd/ovn-northd.8.xml -+++ b/northd/ovn-northd.8.xml -@@ -373,6 +373,14 @@ - for new connections and reg0[1] = 1; next; for existing - connections. - -+
  • -+ reject ACLs translate into logical -+ flows with the -+ tcp_reset { output <-> inport; -+ next(pipeline=egress,table=5);} -+ action for TCP connections and icmp4/icmp6 action -+ for UDP connections. -+
  • -
  • - Other ACLs translate to drop; for new or untracked - connections and ct_commit(ct_label=1/1); for known -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index d3d481ab8..0082e2e8b 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -4717,11 +4717,11 @@ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows) - * unreachable packets. */ - ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, - "nd || nd_rs || nd_ra || icmp4.type == 3 || " -- "icmp6.type == 1 || (tcp && tcp.flags == 20) || " -+ "icmp6.type == 1 || " - "(udp && udp.src == 546 && udp.dst == 547)", "next;"); - ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, - "nd || nd_rs || nd_ra || icmp4.type == 3 || " -- "icmp6.type == 1 || (tcp && tcp.flags == 20) ||" -+ "icmp6.type == 1 || " - "(udp && udp.src == 546 && udp.dst == 547)", "next;"); - - /* Ingress and Egress Pre-ACL Table (Priority 100). -@@ -4834,11 +4834,11 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, - /* Do not send ND packets to conntrack */ - ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110, - "nd || nd_rs || nd_ra || icmp4.type == 3 ||" -- "icmp6.type == 1 || (tcp && tcp.flags == 20)", -+ "icmp6.type == 1", - "next;"); - ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110, - "nd || nd_rs || nd_ra || icmp4.type == 3 ||" -- "icmp6.type == 1 || (tcp && tcp.flags == 20)", -+ "icmp6.type == 1", - "next;"); - - /* Do not send service monitor packets to conntrack. */ -@@ -4984,7 +4984,8 @@ build_reject_acl_rules(struct ovn_datapath *od, struct hmap *lflows, - ds_put_format(&actions, "reg0 = 0; " - "eth.dst <-> eth.src; ip4.dst <-> ip4.src; " - "tcp_reset { outport <-> inport; %s };", -- ingress ? "output;" : "next(pipeline=ingress,table=0);"); -+ ingress ? "next(pipeline=egress,table=5);" -+ : "next(pipeline=ingress,table=19);"); - ovn_lflow_add_with_hint(lflows, od, stage, - acl->priority + OVN_ACL_PRI_OFFSET + 10, - ds_cstr(&match), ds_cstr(&actions), stage_hint); -@@ -4998,7 +4999,8 @@ build_reject_acl_rules(struct ovn_datapath *od, struct hmap *lflows, - ds_put_format(&actions, "reg0 = 0; " - "eth.dst <-> eth.src; ip6.dst <-> ip6.src; " - "tcp_reset { outport <-> inport; %s };", -- ingress ? "output;" : "next(pipeline=ingress,table=0);"); -+ ingress ? "next(pipeline=egress,table=5);" -+ : "next(pipeline=ingress,table=19);"); - ovn_lflow_add_with_hint(lflows, od, stage, - acl->priority + OVN_ACL_PRI_OFFSET + 10, - ds_cstr(&match), ds_cstr(&actions), stage_hint); -diff --git a/ovn-sb.xml b/ovn-sb.xml -index 5f8da534c..3aa7cd4da 100644 ---- a/ovn-sb.xml -+++ b/ovn-sb.xml -@@ -1112,10 +1112,12 @@ - pipeline as a subroutine. The default table is - just after the current one. If pipeline is specified, it - may be ingress or egress; the default -- pipeline is the one currently executing. Actions in the -- ingress pipeline may not use next to jump into the -- egress pipeline (use the output instead), but -- transitions in the opposite direction are allowed. -+ pipeline is the one currently executing. Actions in the -+ both ingress and egress pipeline can use next to jump -+ across the other pipeline. Actions in the ingress pipeline should -+ use next to jump into the specific table of egress -+ pipeline only if it is certain that the packets are local and not -+ tunnelled and wants to skip certain stages in the packet processing. - - -
    field = constant;
    -diff --git a/tests/automake.mk b/tests/automake.mk -index 215fb432b..ed530dd77 100644 ---- a/tests/automake.mk -+++ b/tests/automake.mk -@@ -205,7 +205,8 @@ tests_ovstest_LDADD = $(OVS_LIBDIR)/libopenvswitch.la lib/libovn.la - # Python tests. - CHECK_PYFILES = \ - tests/test-l7.py \ -- tests/uuidfilt.py -+ tests/uuidfilt.py \ -+ tests/test-tcp-rst.py - - EXTRA_DIST += $(CHECK_PYFILES) - PYCOV_CLEAN_FILES += $(CHECK_PYFILES:.py=.py,cover) .coverage -diff --git a/tests/ovn.at b/tests/ovn.at -index b78637044..6da975554 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -850,7 +850,11 @@ next(pipeline=ingress, table=11); - encodes as resubmit(,19) - - next(pipeline=egress); -- "next" action cannot advance from ingress to egress pipeline (use "output" action instead) -+ formats as next(pipeline=egress, table=11); -+ encodes as resubmit(,51) -+ -+next(pipeline=egress, table=5); -+ encodes as resubmit(,45) - - next(table=10); - formats as next(10); -diff --git a/tests/system-ovn.at b/tests/system-ovn.at -index bdb9768d2..3b11cf92b 100644 ---- a/tests/system-ovn.at -+++ b/tests/system-ovn.at -@@ -3719,60 +3719,86 @@ start_daemon ovn-controller - - ovn-nbctl ls-add sw0 - --ovn-nbctl lsp-add sw0 sw0-p1 --ovn-nbctl lsp-set-addresses sw0-p1 "50:54:00:00:00:03 10.0.0.3 aef0::3" --ovn-nbctl lsp-set-port-security sw0-p1 "50:54:00:00:00:03 10.0.0.3 aef0::3" -+ovn-nbctl lsp-add sw0 sw0-p1-rej -+ovn-nbctl lsp-set-addresses sw0-p1-rej "50:54:00:00:00:03 10.0.0.3 aef0::3" -+ovn-nbctl lsp-set-port-security sw0-p1-rej "50:54:00:00:00:03 10.0.0.3 aef0::3" - --ovn-nbctl lsp-add sw0 sw0-p2 --ovn-nbctl lsp-set-addresses sw0-p2 "50:54:00:00:00:04 10.0.0.4 aef0::4" --ovn-nbctl lsp-set-port-security sw0-p2 "50:54:00:00:00:04 10.0.0.4 aef0::4" -+ovn-nbctl lsp-add sw0 sw0-p2-rej -+ovn-nbctl lsp-set-addresses sw0-p2-rej "50:54:00:00:00:04 10.0.0.4 aef0::4" -+ovn-nbctl lsp-set-port-security sw0-p2-rej "50:54:00:00:00:04 10.0.0.4 aef0::4" -+ -+#ovn-nbctl --log acl-add sw0 from-lport 1000 "inport == \"sw0-p1\" && tcp && tcp.dst == 80" reject -+#ovn-nbctl --log acl-add sw0 from-lport 1000 "inport == \"sw0-p2\" && ip6 && tcp && tcp.dst == 80" reject -+ -+# Create port group and ACLs for sw0 ports. -+ovn-nbctl pg-add pg0_drop sw0-p1-rej sw0-p2-rej -+ovn-nbctl acl-add pg0_drop from-lport 1001 "inport == @pg0_drop && ip" drop -+ovn-nbctl acl-add pg0_drop to-lport 1001 "outport == @pg0_drop && ip" drop -+ -+ovn-nbctl pg-add pg0 sw0-p1-rej sw0-p2-rej -+ovn-nbctl acl-add pg0 from-lport 1002 "inport == @pg0 && ip4" allow-related -+ovn-nbctl --log acl-add pg0 from-lport 1004 "inport == @pg0 && ip && tcp && tcp.dst == 80" reject - --ovn-nbctl --log acl-add sw0 from-lport 1000 "inport == \"sw0-p1\" && tcp && tcp.dst == 80" reject --ovn-nbctl --log acl-add sw0 from-lport 1000 "inport == \"sw0-p2\" && ip6 && tcp && tcp.dst == 80" reject -+ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && ip4.src == 0.0.0.0/0 && icmp4" allow-related -+ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && ip4.src == 0.0.0.0/0 && tcp && tcp.dst == 82" allow-related -+ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && ip4.src == 0.0.0.0/0 && udp && udp.dst == 82" allow-related -+ovn-nbctl --log acl-add pg0 to-lport 1004 "inport == @pg0 && ip && tcp && tcp.dst == 84" reject - - OVN_POPULATE_ARP - ovn-nbctl --wait=hv sync - --ADD_NAMESPACES(sw0-p1) --ADD_VETH(sw0-p1, sw0-p1, br-int, "10.0.0.3/24", "50:54:00:00:00:03", \ -+ADD_NAMESPACES(sw0-p1-rej) -+ADD_VETH(sw0-p1-rej, sw0-p1-rej, br-int, "10.0.0.3/24", "50:54:00:00:00:03", \ - "10.0.0.1") - --ADD_NAMESPACES(sw0-p2) --ADD_VETH(sw0-p2, sw0-p2, br-int, "10.0.0.4/24", "50:54:00:00:00:04", \ -+ADD_NAMESPACES(sw0-p2-rej) -+ADD_VETH(sw0-p2-rej, sw0-p2-rej, br-int, "10.0.0.4/24", "50:54:00:00:00:04", \ - "10.0.0.1") - --NS_CHECK_EXEC([sw0-p1], [ip a a aef0::3/64 dev sw0-p1], [0]) --NS_CHECK_EXEC([sw0-p2], [ip a a aef0::4/64 dev sw0-p2], [0]) -+NS_CHECK_EXEC([sw0-p1-rej], [ip a a aef0::3/64 dev sw0-p1-rej], [0]) -+NS_CHECK_EXEC([sw0-p2-rej], [ip a a aef0::4/64 dev sw0-p2-rej], [0]) - --# Capture packets in sw0-p1. --NS_CHECK_EXEC([sw0-p1], [tcpdump -n -c 2 -i sw0-p1 tcp port 80 > sw0-p1-ip4.pcap &], [0]) -+# Capture packets in sw0-p1-rej. -+NS_CHECK_EXEC([sw0-p1-rej], [tcpdump -n -c 2 -i sw0-p1-rej tcp port 80 > sw0-p1-rej-ip4.pcap &], [0]) - sleep 1 - --NS_CHECK_EXEC([sw0-p1], [nc 10.0.0.4 80], [1], [], -+NS_CHECK_EXEC([sw0-p1-rej], [nc 10.0.0.4 80], [1], [], - [dnl - Ncat: Connection refused. - ]) - - OVS_WAIT_UNTIL([ -- total=`cat sw0-p1-ip4.pcap | wc -l` -+ total=`cat sw0-p1-rej-ip4.pcap | wc -l` - echo "total = $total" - test "${total}" = "2" - ]) - -+# Now send traffic to port 84 -+NS_CHECK_EXEC([sw0-p1-rej], [nc 10.0.0.4 84], [1], [], -+[dnl -+Ncat: Connection refused. -+]) -+ -+AT_CHECK([ -+ n_pkt=$(ovs-ofctl dump-flows br-int table=44 | grep -v n_packets=0 | \ -+grep controller | grep tp_dst=84 -c) -+ test $n_pkt -eq 1 -+]) -+ - # Without this sleep, test case fails intermittently. - sleep 3 - --NS_CHECK_EXEC([sw0-p2], [tcpdump -n -c 2 -i sw0-p2 tcp port 80 > sw0-p2-ip6.pcap &], [0]) -+NS_CHECK_EXEC([sw0-p2-rej], [tcpdump -n -c 2 -i sw0-p2-rej tcp port 80 > sw0-p2-rej-ip6.pcap &], [0]) - - sleep 1 - --NS_CHECK_EXEC([sw0-p2], [nc -6 aef0::3 80], [1], [], -+NS_CHECK_EXEC([sw0-p2-rej], [nc -6 aef0::3 80], [1], [], - [dnl - Ncat: Connection refused. - ]) - - OVS_WAIT_UNTIL([ -- total=`cat sw0-p2-ip6.pcap | wc -l` -+ total=`cat sw0-p2-rej-ip6.pcap | wc -l` - echo "total = $total" - test "${total}" = "2" - ]) -@@ -3936,3 +3962,105 @@ as - OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d - /.*terminating with signal 15.*/d"]) - AT_CLEANUP -+ -+# Tests that when an established connection sends TCP reset, -+# the conntrack entry is not in established state. -+AT_SETUP([ovn -- conntrack TCP reset]) -+AT_KEYWORDS([conntrack]) -+ovn_start -+ -+OVS_TRAFFIC_VSWITCHD_START() -+ADD_BR([br-int]) -+ -+# Set external-ids in br-int needed for ovn-controller -+ovs-vsctl \ -+ -- set Open_vSwitch . external-ids:system-id=hv1 \ -+ -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ -+ -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ -+ -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ -+ -- set bridge br-int fail-mode=secure other-config:disable-in-band=true -+ -+# Start ovn-controller -+start_daemon ovn-controller -+ -+ovn-nbctl ls-add sw0 -+ -+ovn-nbctl lsp-add sw0 rst-p1 -+ovn-nbctl lsp-set-addresses rst-p1 "50:54:00:00:00:03" -+ovn-nbctl lsp-set-port-security rst-p1 "50:54:00:00:00:03" -+ -+ovn-nbctl lsp-add sw0 rst-p2 -+ovn-nbctl lsp-set-addresses rst-p2 "50:54:00:00:00:04 10.0.0.4" -+ovn-nbctl lsp-set-port-security rst-p2 "50:54:00:00:00:04 10.0.0.4" -+ -+# Create port group and ACLs for sw0 ports. -+ovn-nbctl pg-add pg0_drop rst-p1 rst-p2 -+ovn-nbctl acl-add pg0_drop from-lport 1001 "inport == @pg0_drop && ip" drop -+ovn-nbctl acl-add pg0_drop to-lport 1001 "outport == @pg0_drop && ip" drop -+ -+ovn-nbctl pg-add pg0 rst-p1 rst-p2 -+ovn-nbctl acl-add pg0 from-lport 1002 "inport == @pg0 && ip4" allow-related -+ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && ip4.src == 0.0.0.0/0 && icmp4" allow-related -+ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && ip4.src == 0.0.0.0/0 && tcp && tcp.dst == 80" allow-related -+ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && ip4.src == 0.0.0.0/0 && udp && udp.dst == 80" allow-related -+ -+# Create a logical router and attach to logical switch. -+ovn-nbctl lr-add lr0 -+ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 -+ovn-nbctl lsp-add sw0 sw0-lr0 -+ovn-nbctl lsp-set-type sw0-lr0 router -+ovn-nbctl lsp-set-addresses sw0-lr0 router -+ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 -+ -+ovn-nbctl lb-add lb1 10.0.0.10:80 10.0.0.3:80 -+ovn-nbctl --wait=sb ls-lb-add sw0 lb1 -+ovn-nbctl --wait=sb lr-lb-add lr0 lb1 -+ -+OVN_POPULATE_ARP -+ovn-nbctl --wait=hv sync -+ -+ADD_NAMESPACES(rst-p1) -+ADD_VETH(rst-p1, rst-p1, br-int, "10.0.0.3/24", "50:54:00:00:00:03", \ -+ "10.0.0.1") -+ -+ADD_NAMESPACES(rst-p2) -+ADD_VETH(rst-p2, rst-p2, br-int, "10.0.0.4/24", "50:54:00:00:00:04", \ -+ "10.0.0.1") -+ -+OVS_WAIT_UNTIL([test x$(ovn-nbctl lsp-get-up rst-p1) = xup]) -+OVS_WAIT_UNTIL([test x$(ovn-nbctl lsp-get-up rst-p2) = xup]) -+ -+# Start webservers in 'rst-p1'. -+OVS_START_L7([rst-p1], [http]) -+ -+NS_CHECK_EXEC([rst-p2], [$PYTHON $srcdir/test-tcp-rst.py --dst-port 80 --dst-ip 10.0.0.10]) -+ -+# When tcp reset is sent, conntrack entry should be in the state - CLOSED or CLOSING. -+# But there is a bug where tcp reset packet was not sent to the conntrack. -+# This test case checks that the tcp reset packet is sent to conntrack -+# and the state is not in established state. -+AT_CHECK([ -+ ct_est_count=$(ovs-appctl dpctl/dump-conntrack | grep 10.0.0.10 | grep state=ESTABLISHED -c) -+ test $ct_est_count -eq 0 -+ -+ ct_est_count=$(ovs-appctl dpctl/dump-conntrack | grep 10.0.0.10 | grep state=CLOS -c) -+ test $ct_est_count -eq 1 -+]) -+ -+OVS_APP_EXIT_AND_WAIT([ovn-controller]) -+ -+as ovn-sb -+OVS_APP_EXIT_AND_WAIT([ovsdb-server]) -+ -+as ovn-nb -+OVS_APP_EXIT_AND_WAIT([ovsdb-server]) -+ -+as northd -+OVS_APP_EXIT_AND_WAIT([ovn-northd]) -+ -+as -+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d -+/connection dropped.*/d -+/Service monitor not found.*/d"]) -+ -+AT_CLEANUP -diff --git a/tests/test-tcp-rst.py b/tests/test-tcp-rst.py -new file mode 100644 -index 000000000..6f96c5706 ---- /dev/null -+++ b/tests/test-tcp-rst.py -@@ -0,0 +1,37 @@ -+#!/usr/bin/env python3 -+# Copyright (c) 2020 Red Hat, Inc. -+# -+# Licensed under the Apache License, Version 2.0 (the "License"); -+# you may not use this file except in compliance with the License. -+# You may obtain a copy of the License at: -+# -+# http://www.apache.org/licenses/LICENSE-2.0 -+# -+# Unless required by applicable law or agreed to in writing, software -+# distributed under the License is distributed on an "AS IS" BASIS, -+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+# See the License for the specific language governing permissions and -+# limitations under the License. -+ -+# Simple python script which connects to tcp server and then -+# resets the connection. -+import argparse -+import socket -+import sys -+import struct -+import time -+ -+parser = argparse.ArgumentParser(description='') -+parser.add_argument("--src-port", type=int, default=11337, help="source port to use") -+parser.add_argument("--dst-port", type=int, help="dst port to use") -+parser.add_argument("--dst-ip", help="server ip to use") -+args = parser.parse_args() -+sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) -+server_address = (args.dst_ip, args.dst_port) -+sock.bind(('0.0.0.0', args.src_port)) -+sock.connect(server_address) -+l_onoff = 1 -+l_linger = 0 -+time.sleep(1) -+sock.setsockopt(socket.SOL_SOCKET, socket.SO_LINGER, struct.pack('ii', l_onoff, l_linger)) -+sock.close() --- -2.25.1 - diff --git a/SOURCES/0001-Fix-ovn-controller-crash-when-a-lport-of-type-virtua.patch b/SOURCES/0001-Fix-ovn-controller-crash-when-a-lport-of-type-virtua.patch new file mode 100644 index 0000000..d73b70f --- /dev/null +++ b/SOURCES/0001-Fix-ovn-controller-crash-when-a-lport-of-type-virtua.patch @@ -0,0 +1,86 @@ +From e1cd90a8ac7ede76dabc3714358c32076f9557e7 Mon Sep 17 00:00:00 2001 +From: Numan Siddique +Date: Wed, 26 Aug 2020 16:48:26 +0530 +Subject: [PATCH] Fix ovn-controller crash when a lport of type 'virtual' is + deleted. + +The below bt is seen when a lport of type 'virtual' is deleted. + +(gdb) bt +0x00001470c0708655 in __strlen_avx2 () from /lib64/libc.so.6 +0x0000563340037449 in hash_string (basis=0, s=s@entry=0x0) at lib/hash.h:342 +hash_name (name=name@entry=0x0) at lib/shash.c:28 +0x0000563340037a76 in shash_find (sh=0x5633407bb260, name=0x0) at lib/shash.c:231 +0x0000563340037b7d in shash_find_data (sh=, name=) at lib/shash.c:245 +0x000056333ff71151 in local_binding_find (name=, local_bindings=) at controller/binding.h:108 +get_lbinding_for_lport (b_ctx_out=0x7fff616745b0, lport_type=, pb=0x56334314d630) at controller/binding.c:1960 +handle_deleted_vif_lport (b_ctx_in=0x7fff61674600, b_ctx_in=0x7fff61674600, b_ctx_out=0x7fff616745b0, lport_type=, pb=0x56334314d630) at controller/binding.c:1979 +binding_handle_port_binding_changes (b_ctx_in=b_ctx_in@entry=0x7fff61674600, b_ctx_out=b_ctx_out@entry=0x7fff616745b0) at controller/binding.c:2087 +0x000056333ff8e208 in runtime_data_sb_port_binding_handler (node=0x7fff616759f0, data=0x5633407bb240) at controller/ovn-controller.c:1325 +0x000056333ffa6de3 in engine_compute (recompute_allowed=, node=) at lib/inc-proc-eng.c:306 +... +... + +Fixes: 354bdba51ab("ovn-controller: I-P for SB port binding and OVS interface in runtime_data.") +Acked-by: Mark Michelson +Signed-off-by: Numan Siddique + +(cherry-picked from master commit 51fea73f0e09a1e670b8b7ca52819963bfa29c7e) + +(cherry-picked from upstream branch-20.06 commit edc8b8ffdfd4d0250ea8650b1501e49fb4a82b6f) + +Change-Id: I926ee596800a5cc105e481845bd328acda578dd2 +--- + controller/binding.c | 12 ++++++++---- + tests/ovn.at | 11 +++++++++++ + 2 files changed, 19 insertions(+), 4 deletions(-) + +diff --git a/controller/binding.c b/controller/binding.c +index 880fbb13b..3c102dc7f 100644 +--- a/controller/binding.c ++++ b/controller/binding.c +@@ -1957,11 +1957,15 @@ get_lbinding_for_lport(const struct sbrec_port_binding *pb, + struct local_binding *parent_lbinding = NULL; + + if (lport_type == LP_VIRTUAL) { +- parent_lbinding = local_binding_find(b_ctx_out->local_bindings, +- pb->virtual_parent); ++ if (pb->virtual_parent) { ++ parent_lbinding = local_binding_find(b_ctx_out->local_bindings, ++ pb->virtual_parent); ++ } + } else { +- parent_lbinding = local_binding_find(b_ctx_out->local_bindings, +- pb->parent_port); ++ if (pb->parent_port) { ++ parent_lbinding = local_binding_find(b_ctx_out->local_bindings, ++ pb->parent_port); ++ } + } + + return parent_lbinding +diff --git a/tests/ovn.at b/tests/ovn.at +index 0d99adf3f..1216bc50f 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -16115,6 +16115,17 @@ ovn-nbctl lsp-set-addresses sw1-lr0 00:00:00:00:ff:02 + ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1 + + OVN_POPULATE_ARP ++ ++# Delete sw0-vir and add again. ++ovn-nbctl lsp-del sw0-vir ++ ++ovn-nbctl lsp-add sw0 sw0-vir ++ovn-nbctl lsp-set-addresses sw0-vir "50:54:00:00:00:10 10.0.0.10" ++ovn-nbctl lsp-set-port-security sw0-vir "50:54:00:00:00:10 10.0.0.10" ++ovn-nbctl lsp-set-type sw0-vir virtual ++ovn-nbctl set logical_switch_port sw0-vir options:virtual-ip=10.0.0.10 ++ovn-nbctl set logical_switch_port sw0-vir options:virtual-parents=sw0-p1,sw0-p2,sw0-p3 ++ + ovn-nbctl --wait=hv sync + + # Check that logical flows are added for sw0-vir in lsp_in_arp_rsp pipeline +-- +2.26.2 + diff --git a/SOURCES/0001-Fix-ovn-controller-generated-packets-from-getting-dr.patch b/SOURCES/0001-Fix-ovn-controller-generated-packets-from-getting-dr.patch deleted file mode 100644 index 1aadd22..0000000 --- a/SOURCES/0001-Fix-ovn-controller-generated-packets-from-getting-dr.patch +++ /dev/null @@ -1,152 +0,0 @@ -From 14f9bf6ba4bfa459f2e924dbf273a6337aab4107 Mon Sep 17 00:00:00 2001 -From: Numan Siddique -Date: Sun, 14 Jun 2020 18:40:07 +0530 -Subject: [PATCH 1/3] Fix ovn-controller generated packets from getting dropped - for reject ACL action. - -TCP reset/ICMP packet generated by ovn-controller for the ACL reject action -gets dropped by ovs-vswithd with the below messages in ovs-vswitchd log -even though ovn-controller sets the in_port as OFPP_CONTROLLER. - ----- -ofproto_dpif_upcall(handler1)|INFO|received packet on unassociated datapath port 4294967295 -ofproto_dpif_upcall(revalidator37)|WARN|Failed to acquire udpif_key corresponding to -unexpected flow (Invalid argument): ufid:0daac824-bda7-44d8-ad38-cdd9c5f0fc97 ----- - -ovs-vswitchd drops the packet because the in_port is 0. - -The below OF flow sets the in_port to 0 if 'MLF_ALLOW_LOOPBACK_BIT' is set in the REG0 -in table 64. - -priority=100,reg10=0x1/0x1,reg15=0x2,metadata=0x2 actions=push:NXM_OF_IN_PORT[],load:0->NXM_OF_IN_PORT[],resubmit(,65),pop:NXM_OF_IN_PORT[] - -This patch fixes this issue by setting the in_port to OFPP_NONE so that ovs-vswitchd -doesn't drop the packet. - -Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1832176 -Acked-by: Mark Michelson -Signed-off-by: Numan Siddique - -(cherry-picked from upstream master commit cfa5478211318b686ad0981e7b0620f96edd7168) - -Change-Id: Ia8e134013c30a6865322083c8054fa45b57c9353 ---- - controller/physical.c | 18 ++++++++++++------ - tests/system-ovn.at | 32 ++++++++++++++++++++++++++++++++ - 2 files changed, 44 insertions(+), 6 deletions(-) - -diff --git a/controller/physical.c b/controller/physical.c -index 144aeb7bd..3c5bbe027 100644 ---- a/controller/physical.c -+++ b/controller/physical.c -@@ -765,12 +765,18 @@ put_local_common_flows(uint32_t dp_key, uint32_t port_key, - * - or if the destination is a nested container - * - or if "nested_container" flag is set and the destination is the - * parent port, -- * temporarily set the in_port to zero, resubmit to -+ * temporarily set the in_port to OFPP_NONE, resubmit to - * table 65 for logical-to-physical translation, then restore - * the port number. - * - * If 'parent_port_key' is set, then the 'port_key' represents a nested -- * container. */ -+ * container. -+ * -+ * Note:We can set in_port to 0 too. But if recirculation happens -+ * later (eg. clone action to enter peer pipeline and a subsequent -+ * ct action), ovs-vswitchd will drop the packet if the frozen metadata -+ * in_port is 0. -+ * */ - - bool nested_container = parent_port_key ? true: false; - match_init_catchall(&match); -@@ -783,7 +789,7 @@ put_local_common_flows(uint32_t dp_key, uint32_t port_key, - } - - put_stack(MFF_IN_PORT, ofpact_put_STACK_PUSH(ofpacts_p)); -- put_load(0, MFF_IN_PORT, 0, 16, ofpacts_p); -+ put_load(ofp_to_u16(OFPP_NONE), MFF_IN_PORT, 0, 16, ofpacts_p); - put_resubmit(OFTABLE_LOG_TO_PHY, ofpacts_p); - put_stack(MFF_IN_PORT, ofpact_put_STACK_POP(ofpacts_p)); - ofctrl_add_flow(flow_table, OFTABLE_SAVE_INPORT, 100, 0, -@@ -792,8 +798,8 @@ put_local_common_flows(uint32_t dp_key, uint32_t port_key, - if (nested_container) { - /* It's a nested container and when the packet from the nested - * container is to be sent to the parent port, "nested_container" -- * flag will be set. We need to temporarily set the in_port to zero -- * as mentioned in the comment above. -+ * flag will be set. We need to temporarily set the in_port to -+ * OFPP_NONE as mentioned in the comment above. - * - * If a parent port has multiple child ports, then this if condition - * will be hit multiple times, but we want to add only one flow. -@@ -814,7 +820,7 @@ put_local_common_flows(uint32_t dp_key, uint32_t port_key, - MLF_NESTED_CONTAINER, MLF_NESTED_CONTAINER); - - put_stack(MFF_IN_PORT, ofpact_put_STACK_PUSH(ofpacts_p)); -- put_load(0, MFF_IN_PORT, 0, 16, ofpacts_p); -+ put_load(ofp_to_u16(OFPP_NONE), MFF_IN_PORT, 0, 16, ofpacts_p); - put_resubmit(OFTABLE_LOG_TO_PHY, ofpacts_p); - put_stack(MFF_IN_PORT, ofpact_put_STACK_POP(ofpacts_p)); - ofctrl_check_and_add_flow(flow_table, OFTABLE_SAVE_INPORT, 100, 0, -diff --git a/tests/system-ovn.at b/tests/system-ovn.at -index 9dfe6a4ad..52f05f07e 100644 ---- a/tests/system-ovn.at -+++ b/tests/system-ovn.at -@@ -3927,6 +3927,24 @@ ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && ip4.src == 0.0.0. - ovn-nbctl --log acl-add pg0 to-lport 1004 "inport == @pg0 && ip && tcp && tcp.dst == 84" reject - ovn-nbctl --log acl-add pg0 to-lport 1004 "inport == @pg0 && ip && udp && udp.dst == 94" reject - -+ovn-nbctl ls-add sw1 -+ovn-nbctl lsp-add sw1 sw1-p1-rej -+ovn-nbctl lsp-set-addresses sw1-p1-rej "40:54:00:00:00:03 20.0.0.3" -+ovn-nbctl lsp-set-port-security sw1-p1-rej "40:54:00:00:00:03 20.0.0.3" -+ -+ovn-nbctl lr-add lr0 -+ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 -+ovn-nbctl lsp-add sw0 sw0-lr0 -+ovn-nbctl lsp-set-type sw0-lr0 router -+ovn-nbctl lsp-set-addresses sw0-lr0 router -+ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 -+ -+ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 -+ovn-nbctl lsp-add sw1 sw1-lr0 -+ovn-nbctl lsp-set-type sw1-lr0 router -+ovn-nbctl lsp-set-addresses sw1-lr0 router -+ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1 -+ - OVN_POPULATE_ARP - ovn-nbctl --wait=hv sync - -@@ -3941,6 +3959,10 @@ ADD_VETH(sw0-p2-rej, sw0-p2-rej, br-int, "10.0.0.4/24", "50:54:00:00:00:04", \ - NS_CHECK_EXEC([sw0-p1-rej], [ip a a aef0::3/64 dev sw0-p1-rej], [0]) - NS_CHECK_EXEC([sw0-p2-rej], [ip a a aef0::4/64 dev sw0-p2-rej], [0]) - -+ADD_NAMESPACES(sw1-p1-rej) -+ADD_VETH(sw1-p1-rej, sw1-p1-rej, br-int, "20.0.0.3/24", "40:54:00:00:00:03", \ -+ "20.0.0.1") -+ - sleep 1 - - # Capture packets in sw0-p1-rej. -@@ -3993,6 +4015,16 @@ OVS_WAIT_UNTIL([ - test "${total}" = "2" - ]) - -+ovn-nbctl acl-add sw1 from-lport 1004 "ip" allow-related -+ovn-nbctl acl-add sw1 to-lport 1004 "ip" allow-related -+ovn-nbctl --log acl-add pg0 to-lport 1004 "outport == @pg0 && ip && tcp && tcp.dst == 84" reject -+ -+OVS_WAIT_UNTIL([ -+ ip netns exec sw1-p1-rej nc 10.0.0.4 84 2> r -+ res=$(cat r) -+ test "$res" = "Ncat: Connection refused." -+]) -+ - # Now test for IPv4 UDP. - NS_CHECK_EXEC([sw0-p1-rej], [tcpdump -n -c 1 -i sw0-p1-rej udp port 90 > sw0-p1-rej-udp.pcap &], [0]) - NS_CHECK_EXEC([sw0-p1-rej], [tcpdump -n -c 1 -i sw0-p1-rej icmp > sw0-p1-rej-icmp.pcap &], [0]) --- -2.26.2 - diff --git a/SOURCES/0001-Fix-the-data-type-for-DHCP-option-tftp_server-66.patch b/SOURCES/0001-Fix-the-data-type-for-DHCP-option-tftp_server-66.patch new file mode 100644 index 0000000..36246f3 --- /dev/null +++ b/SOURCES/0001-Fix-the-data-type-for-DHCP-option-tftp_server-66.patch @@ -0,0 +1,279 @@ +From fe0be84c42c1b304bfbe49e59aa11eea100e16b1 Mon Sep 17 00:00:00 2001 +From: Dhathri Purohith +Date: Thu, 11 Jun 2020 13:44:40 -0700 +Subject: [PATCH 01/22] Fix the data type for DHCP option tftp_server (66) + +Currently, DHCP option is of type ipv4. But, according to RFC 2132, +option 66 can be a hostname i.e, we should also accept string type. +In order to be backward compatible, a new type called "host_id" is +introduced, which accepts both ipv4 address and string. Type for DHCP +option 66 is changed to "host_id" instead of ipv4. +OVN northd code that updates the OVN southbound database is enhanced to +consider the change in the type and code for DHCP option, so that the +change in datatype is reflected. + +Signed-off-by: Dhathri Purohith +Signed-off-by: Ankur Sharma +Signed-off-by: Numan Siddique + +(cherry-picked from upstream master commit b06319993debbeb6d116901afa511d627543c10d) + +Change-Id: I45b4d783bfc8849a69d7c8c8584429c2740e668c +--- + lib/actions.c | 12 ++++++++ + lib/ovn-l7.h | 2 +- + northd/ovn-northd.c | 7 ++++- + ovn-nb.xml | 18 ++++++++---- + ovn-sb.ovsschema | 7 +++-- + ovn-sb.xml | 13 +++++++++ + tests/ovn.at | 68 +++++++++++++++++++++++++++++++++++++++++++++ + tests/test-ovn.c | 2 +- + 8 files changed, 117 insertions(+), 12 deletions(-) + +diff --git a/lib/actions.c b/lib/actions.c +index 6d0d687b3..616c93e8a 100644 +--- a/lib/actions.c ++++ b/lib/actions.c +@@ -2083,6 +2083,10 @@ parse_gen_opt(struct action_context *ctx, struct ovnact_gen_option *o, + return; + } + ++ if (!strcmp(o->option->type, "host_id")) { ++ return; ++ } ++ + if (!strcmp(o->option->type, "str")) { + if (o->value.type != EXPR_C_STRING) { + lexer_error(ctx->lexer, "%s option %s requires string value.", +@@ -2410,6 +2414,14 @@ encode_put_dhcpv4_option(const struct ovnact_gen_option *o, + } else if (!strcmp(o->option->type, "str")) { + opt_header[1] = strlen(c->string); + ofpbuf_put(ofpacts, c->string, opt_header[1]); ++ } else if (!strcmp(o->option->type, "host_id")) { ++ if (o->value.type == EXPR_C_STRING) { ++ opt_header[1] = strlen(c->string); ++ ofpbuf_put(ofpacts, c->string, opt_header[1]); ++ } else { ++ opt_header[1] = sizeof(ovs_be32); ++ ofpbuf_put(ofpacts, &c->value.ipv4, sizeof(ovs_be32)); ++ } + } + } + +diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h +index d5c6feaeb..22a2153de 100644 +--- a/lib/ovn-l7.h ++++ b/lib/ovn-l7.h +@@ -57,7 +57,7 @@ struct gen_opts_map { + #define DHCP_OPT_NIS_SERVER DHCP_OPTION("nis_server", 41, "ipv4") + #define DHCP_OPT_NTP_SERVER DHCP_OPTION("ntp_server", 42, "ipv4") + #define DHCP_OPT_SERVER_ID DHCP_OPTION("server_id", 54, "ipv4") +-#define DHCP_OPT_TFTP_SERVER DHCP_OPTION("tftp_server", 66, "ipv4") ++#define DHCP_OPT_TFTP_SERVER DHCP_OPTION("tftp_server", 66, "host_id") + + #define DHCP_OPT_CLASSLESS_STATIC_ROUTE \ + DHCP_OPTION("classless_static_route", 121, "static_routes") +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 6858bf8fd..14be87435 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -11785,7 +11785,12 @@ check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx) + struct gen_opts_map *dhcp_opt = + dhcp_opts_find(&dhcp_opts_to_add, opt_row->name); + if (dhcp_opt) { +- hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node); ++ if (!strcmp(dhcp_opt->type, opt_row->type) && ++ dhcp_opt->code == opt_row->code) { ++ hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node); ++ } else { ++ sbrec_dhcp_options_delete(opt_row); ++ } + } else { + sbrec_dhcp_options_delete(opt_row); + } +diff --git a/ovn-nb.xml b/ovn-nb.xml +index e947c440d..8d04d3d3b 100644 +--- a/ovn-nb.xml ++++ b/ovn-nb.xml +@@ -2830,12 +2830,6 @@ +

    + + +- +-

    +- The DHCPv4 option code for this option is 66. +-

    +-
    +- + +

    + The DHCPv4 option code for this option is 121. +@@ -2984,6 +2978,18 @@ +

    +
    + ++ ++ ++

    ++ These options accept either an IPv4 address or a string value. ++

    ++ ++ ++

    ++ The DHCPv4 option code for this option is 66. ++

    ++
    ++
    + + + +diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema +index c196ddaf3..2ec729b77 100644 +--- a/ovn-sb.ovsschema ++++ b/ovn-sb.ovsschema +@@ -1,7 +1,7 @@ + { + "name": "OVN_Southbound", +- "version": "2.8.0", +- "cksum": "1643994484 21853", ++ "version": "2.8.1", ++ "cksum": "236203406 21905", + "tables": { + "SB_Global": { + "columns": { +@@ -217,7 +217,8 @@ + "type": {"key": { + "type": "string", + "enum": ["set", ["bool", "uint8", "uint16", "uint32", +- "ipv4", "static_routes", "str"]]}}}}, ++ "ipv4", "static_routes", "str", ++ "host_id"]]}}}}, + "isRoot": true}, + "DHCPv6_Options": { + "columns": { +diff --git a/ovn-sb.xml b/ovn-sb.xml +index 0641e4942..2edafd48f 100644 +--- a/ovn-sb.xml ++++ b/ovn-sb.xml +@@ -3274,6 +3274,19 @@ tcp.flags = RST; + Example. "name=host_name", "code=12", "type=str". +

    + ++ ++
    value: host_id
    ++
    ++

    ++ This indicates that the value of the DHCP option is a host_id. ++ It can either be a host_name or an IP address. ++

    ++ ++

    ++ Example. "name=tftp_server", "code=66", "type=host_id". ++

    ++
    ++ + + +
  • +diff --git a/tests/ovn.at b/tests/ovn.at +index f6adbb7a3..4e98790af 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -1249,6 +1249,12 @@ reg2[5] = put_dhcp_opts(offerip=10.0.0.4,router=10.0.0.1,netmask=255.255.254.0,m + reg0[15] = put_dhcp_opts(offerip=10.0.0.4,router=10.0.0.1,netmask=255.255.255.0,mtu=1400,ip_forward_enable=1,default_ttl=121,dns_server={8.8.8.8,7.7.7.7},classless_static_route={30.0.0.0/24,10.0.0.4,40.0.0.0/16,10.0.0.6,0.0.0.0/0,10.0.0.1},ethernet_encap=1,router_discovery=0,tftp_server_address={10.0.0.4,10.0.0.5},arp_cache_timeout=10,tcp_keepalive_interval=10); + formats as reg0[15] = put_dhcp_opts(offerip = 10.0.0.4, router = 10.0.0.1, netmask = 255.255.255.0, mtu = 1400, ip_forward_enable = 1, default_ttl = 121, dns_server = {8.8.8.8, 7.7.7.7}, classless_static_route = {30.0.0.0/24, 10.0.0.4, 40.0.0.0/16, 10.0.0.6, 0.0.0.0/0, 10.0.0.1}, ethernet_encap = 1, router_discovery = 0, tftp_server_address = {10.0.0.4, 10.0.0.5}, arp_cache_timeout = 10, tcp_keepalive_interval = 10); + encodes as controller(userdata=00.00.00.02.00.00.00.00.00.01.de.10.00.00.00.6f.0a.00.00.04.03.04.0a.00.00.01.01.04.ff.ff.ff.00.1a.02.05.78.13.01.01.17.01.79.06.08.08.08.08.08.07.07.07.07.79.14.18.1e.00.00.0a.00.00.04.10.28.00.0a.00.00.06.00.0a.00.00.01.24.01.01.1f.01.00.96.08.0a.00.00.04.0a.00.00.05.23.04.00.00.00.0a.26.04.00.00.00.0a,pause) ++reg0[15] = put_dhcp_opts(offerip=10.0.0.4,router=10.0.0.1,netmask=255.255.255.0,mtu=1400,ip_forward_enable=1,default_ttl=121,dns_server={8.8.8.8,7.7.7.7},classless_static_route={30.0.0.0/24,10.0.0.4,40.0.0.0/16,10.0.0.6,0.0.0.0/0,10.0.0.1},ethernet_encap=1,router_discovery=0,tftp_server=10.0.0.10); ++ formats as reg0[15] = put_dhcp_opts(offerip = 10.0.0.4, router = 10.0.0.1, netmask = 255.255.255.0, mtu = 1400, ip_forward_enable = 1, default_ttl = 121, dns_server = {8.8.8.8, 7.7.7.7}, classless_static_route = {30.0.0.0/24, 10.0.0.4, 40.0.0.0/16, 10.0.0.6, 0.0.0.0/0, 10.0.0.1}, ethernet_encap = 1, router_discovery = 0, tftp_server = 10.0.0.10); ++ encodes as controller(userdata=00.00.00.02.00.00.00.00.00.01.de.10.00.00.00.6f.0a.00.00.04.03.04.0a.00.00.01.01.04.ff.ff.ff.00.1a.02.05.78.13.01.01.17.01.79.06.08.08.08.08.08.07.07.07.07.79.14.18.1e.00.00.0a.00.00.04.10.28.00.0a.00.00.06.00.0a.00.00.01.24.01.01.1f.01.00.42.04.0a.00.00.0a,pause) ++reg0[15] = put_dhcp_opts(offerip=10.0.0.4,router=10.0.0.1,netmask=255.255.255.0,mtu=1400,ip_forward_enable=1,default_ttl=121,dns_server={8.8.8.8,7.7.7.7},classless_static_route={30.0.0.0/24,10.0.0.4,40.0.0.0/16,10.0.0.6,0.0.0.0/0,10.0.0.1},ethernet_encap=1,router_discovery=0,tftp_server="tftp_server_test"); ++ formats as reg0[15] = put_dhcp_opts(offerip = 10.0.0.4, router = 10.0.0.1, netmask = 255.255.255.0, mtu = 1400, ip_forward_enable = 1, default_ttl = 121, dns_server = {8.8.8.8, 7.7.7.7}, classless_static_route = {30.0.0.0/24, 10.0.0.4, 40.0.0.0/16, 10.0.0.6, 0.0.0.0/0, 10.0.0.1}, ethernet_encap = 1, router_discovery = 0, tftp_server = "tftp_server_test"); ++ encodes as controller(userdata=00.00.00.02.00.00.00.00.00.01.de.10.00.00.00.6f.0a.00.00.04.03.04.0a.00.00.01.01.04.ff.ff.ff.00.1a.02.05.78.13.01.01.17.01.79.06.08.08.08.08.08.07.07.07.07.79.14.18.1e.00.00.0a.00.00.04.10.28.00.0a.00.00.06.00.0a.00.00.01.24.01.01.1f.01.00.42.10.74.66.74.70.5f.73.65.72.76.65.72.5f.74.65.73.74,pause) + + reg1[0..1] = put_dhcp_opts(offerip = 1.2.3.4, router = 10.0.0.1); + Cannot use 2-bit field reg1[0..1] where 1-bit field is required. +@@ -5624,6 +5630,68 @@ AT_CHECK([cat 1.packets | cut -c -48], [0], [expout]) + cat 1.expected | cut -c 53- > expout + AT_CHECK([cat 1.packets | cut -c 53-], [0], [expout]) + ++reset_pcap_file hv1-vif1 hv1/vif1 ++reset_pcap_file hv1-vif2 hv1/vif2 ++rm -f 1.expected ++rm -f 2.expected ++ ++# Set tftp server option (IPv4 address) for ls1 ++echo "------ Set tftp server (IPv4 address) --------" ++ovn-nbctl dhcp-options-set-options $d1 server_id=10.0.0.1 \ ++server_mac=ff:10:00:00:00:01 lease_time=3600 router=10.0.0.1 \ ++tftp_server=10.10.10.10 ++echo "----------------------------------------------" ++ ++# Send DHCPREQUEST in the SELECTING/INIT-REBOOT state with the offered IP ++# address in the Requested IP Address option. ++offer_ip=`ip_to_hex 10 0 0 6` ++server_ip=`ip_to_hex 10 0 0 1` ++ciaddr=`ip_to_hex 0 0 0 0` ++request_ip=$offer_ip ++expected_dhcp_opts=330400000e100104ffffff0003040a00000136040a00000142040a0a0a0a ++test_dhcp 2 f00000000002 03 0 $ciaddr $offer_ip $request_ip 0 ff1000000001 $server_ip 05 $expected_dhcp_opts ++ ++# NXT_RESUMEs should be 10. ++OVS_WAIT_UNTIL([test 10 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) ++ ++$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets ++cat 2.expected | cut -c -48 > expout ++AT_CHECK([cat 2.packets | cut -c -48], [0], [expout]) ++# Skipping the IPv4 checksum. ++cat 2.expected | cut -c 53- > expout ++AT_CHECK([cat 2.packets | cut -c 53-], [0], [expout]) ++ ++reset_pcap_file hv1-vif1 hv1/vif1 ++reset_pcap_file hv1-vif2 hv1/vif2 ++rm -f 1.expected ++rm -f 2.expected ++ ++# Set tftp server option (Hostname) for ls1 ++echo "------ Set tftp server (hostname) --------" ++ovn-nbctl dhcp-options-set-options $d1 server_id=10.0.0.1 \ ++server_mac=ff:10:00:00:00:01 lease_time=3600 router=10.0.0.1 \ ++tftp_server=\"test_tftp_server\" ++echo "------------------------------------------" ++ ++# Send DHCPREQUEST in the SELECTING/INIT-REBOOT state with the offered IP ++# address in the Requested IP Address option. ++offer_ip=`ip_to_hex 10 0 0 6` ++server_ip=`ip_to_hex 10 0 0 1` ++ciaddr=`ip_to_hex 0 0 0 0` ++request_ip=$offer_ip ++expected_dhcp_opts=330400000e100104ffffff0003040a00000136040a0000014210746573745f746674705f736572766572 ++test_dhcp 2 f00000000002 03 0 $ciaddr $offer_ip $request_ip 0 ff1000000001 $server_ip 05 $expected_dhcp_opts ++ ++# NXT_RESUMEs should be 11. ++OVS_WAIT_UNTIL([test 11 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) ++ ++$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets ++cat 2.expected | cut -c -48 > expout ++AT_CHECK([cat 2.packets | cut -c -48], [0], [expout]) ++# Skipping the IPv4 checksum. ++cat 2.expected | cut -c 53- > expout ++AT_CHECK([cat 2.packets | cut -c 53-], [0], [expout]) ++ + OVN_CLEANUP([hv1]) + + AT_CLEANUP +diff --git a/tests/test-ovn.c b/tests/test-ovn.c +index 8f1bb7e01..29d343b60 100644 +--- a/tests/test-ovn.c ++++ b/tests/test-ovn.c +@@ -175,7 +175,7 @@ create_gen_opts(struct hmap *dhcp_opts, struct hmap *dhcpv6_opts, + dhcp_opt_add(dhcp_opts, "nis_server", 41, "ipv4"); + dhcp_opt_add(dhcp_opts, "ntp_server", 42, "ipv4"); + dhcp_opt_add(dhcp_opts, "server_id", 54, "ipv4"); +- dhcp_opt_add(dhcp_opts, "tftp_server", 66, "ipv4"); ++ dhcp_opt_add(dhcp_opts, "tftp_server", 66, "host_id"); + dhcp_opt_add(dhcp_opts, "classless_static_route", 121, "static_routes"); + dhcp_opt_add(dhcp_opts, "ip_forward_enable", 19, "bool"); + dhcp_opt_add(dhcp_opts, "router_discovery", 31, "bool"); +-- +2.26.2 + diff --git a/SOURCES/0001-I-P-engine-Provide-the-option-to-store-client-data-i.patch b/SOURCES/0001-I-P-engine-Provide-the-option-to-store-client-data-i.patch new file mode 100644 index 0000000..a4614fa --- /dev/null +++ b/SOURCES/0001-I-P-engine-Provide-the-option-to-store-client-data-i.patch @@ -0,0 +1,33 @@ +From 22ae217041eee8b7d655cc489797ac88432495d6 Mon Sep 17 00:00:00 2001 +From: Numan Siddique +Date: Mon, 24 Aug 2020 16:42:48 +0530 +Subject: [PATCH 1/2] I-P engine: Provide the option to store client data in + engine ctx. + +There can be some client specific data which could change from one engine run +to another. Adding a 'void *' data in 'struct engine_ctx' will be useful. +One such usecase is to provide a config option in ovn-controller to turn on or +off logical flow expr caching. And this config knob can be stored in the engine_ctx. +An upcoming patch will make use of this. + +Acked-by: Mark Michelson +Signed-off-by: Numan Siddique +--- + lib/inc-proc-eng.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/lib/inc-proc-eng.h b/lib/inc-proc-eng.h +index e25bcb29c..80de75029 100644 +--- a/lib/inc-proc-eng.h ++++ b/lib/inc-proc-eng.h +@@ -66,6 +66,7 @@ + struct engine_context { + struct ovsdb_idl_txn *ovs_idl_txn; + struct ovsdb_idl_txn *ovnsb_idl_txn; ++ void *client_ctx; + }; + + /* Arguments to be passed to the engine at engine_init(). */ +-- +2.26.2 + diff --git a/SOURCES/0001-IPv6-PD-assume-status-to-be-Success-if-not-present.patch b/SOURCES/0001-IPv6-PD-assume-status-to-be-Success-if-not-present.patch deleted file mode 100644 index 2ef8f34..0000000 --- a/SOURCES/0001-IPv6-PD-assume-status-to-be-Success-if-not-present.patch +++ /dev/null @@ -1,36 +0,0 @@ -From f3f604a41e44a17b1953ebd3d2162c1fc046f49f Mon Sep 17 00:00:00 2001 -Message-Id: -From: Lorenzo Bianconi -Date: Sat, 25 Apr 2020 12:18:12 +0200 -Subject: [PATCH] IPv6 PD: assume status to be Success if not present - -According to the RFC3315 (section 22.13. Status Code Option), -if status code option is not present in the delegation server -reply, it will be assumed to be Success. In this particular case, -do not stop IPv6 PD state machine - -Signed-off-by: Lorenzo Bianconi -Signed-off-by: Numan Siddique ---- - controller/pinctrl.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - ---- a/controller/pinctrl.c -+++ b/controller/pinctrl.c -@@ -835,12 +835,15 @@ pinctrl_parse_dhcpv6_reply(struct dp_pac - plife_time = ntohl(ia_hdr->plife_time); - vlife_time = ntohl(ia_hdr->vlife_time); - memcpy(&ipv6, &ia_hdr->ipv6, sizeof (struct in6_addr)); -+ status = true; - } - if (ntohs(in_opt->code) == DHCPV6_OPT_STATUS_CODE) { - struct dhcpv6_opt_status *status_hdr; - - status_hdr = (struct dhcpv6_opt_status *)in_opt; -- status = ntohs(status_hdr->status_code) == 0; -+ if (ntohs(status_hdr->status_code)) { -+ status = false; -+ } - } - size += sizeof *in_opt + ntohs(in_opt->len); - in_opt = (struct dhcpv6_opt_header *)(in_dhcpv6_data + size); diff --git a/SOURCES/0001-IPv6-PD-time-parameter-checks.patch b/SOURCES/0001-IPv6-PD-time-parameter-checks.patch deleted file mode 100644 index 1221fb8..0000000 --- a/SOURCES/0001-IPv6-PD-time-parameter-checks.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 942f7b2b9e3acfc7b1d6ea5c48fc22171b14549a Mon Sep 17 00:00:00 2001 -Message-Id: <942f7b2b9e3acfc7b1d6ea5c48fc22171b14549a.1588586761.git.lorenzo.bianconi@redhat.com> -From: Lorenzo Bianconi -Date: Thu, 23 Apr 2020 18:25:20 +0200 -Subject: [PATCH] IPv6 PD: time parameter checks - -RFC3633 imposes the following constraints for IPv6 pd time parameters: - -Identity Association for Prefix Delegation Option: --------------------------------------------------- -t1 must not be greater than t2 if both of them are greater than 0 - -IA_PD Prefix option: --------------------- -preferred lifetime must not be greater than valid lifetime - -Add checks for previous constraints in ovn implementation - -Signed-off-by: Lorenzo Bianconi -Signed-off-by: Numan Siddique ---- - controller/pinctrl.c | 19 ++++++++++++++++++- - 1 file changed, 18 insertions(+), 1 deletion(-) - ---- a/controller/pinctrl.c -+++ b/controller/pinctrl.c -@@ -653,6 +653,11 @@ pinctrl_parse_dhcpv6_advt(struct rconn * - case DHCPV6_OPT_IA_PD: { - struct dhcpv6_opt_ia_na *ia_na = (struct dhcpv6_opt_ia_na *)in_opt; - int orig_len = len, hdr_len = 0, size = sizeof *in_opt + 12; -+ uint32_t t1 = ntohl(ia_na->t1), t2 = ntohl(ia_na->t2); -+ -+ if (t1 > t2 && t2 > 0) { -+ goto out; -+ } - - aid = ntohl(ia_na->iaid); - memcpy(&data[len], in_opt, size); -@@ -667,6 +672,15 @@ pinctrl_parse_dhcpv6_advt(struct rconn * - } - - if (ntohs(in_opt->code) == DHCPV6_OPT_IA_PREFIX) { -+ struct dhcpv6_opt_ia_prefix *ia_hdr = -+ (struct dhcpv6_opt_ia_prefix *)in_opt; -+ uint32_t plife_time = ntohl(ia_hdr->plife_time); -+ uint32_t vlife_time = ntohl(ia_hdr->vlife_time); -+ -+ if (plife_time > vlife_time) { -+ goto out; -+ } -+ - memcpy(&data[len], in_opt, flen); - hdr_len += flen; - len += flen; -@@ -831,9 +845,12 @@ pinctrl_parse_dhcpv6_reply(struct dp_pac - struct dhcpv6_opt_ia_prefix *ia_hdr = - (struct dhcpv6_opt_ia_prefix *)(in_dhcpv6_data + size); - -- prefix_len = ia_hdr->plen; - plife_time = ntohl(ia_hdr->plife_time); - vlife_time = ntohl(ia_hdr->vlife_time); -+ if (plife_time > vlife_time) { -+ break; -+ } -+ prefix_len = ia_hdr->plen; - memcpy(&ipv6, &ia_hdr->ipv6, sizeof (struct in6_addr)); - status = true; - } diff --git a/SOURCES/0001-Introduce-DHCPDECLINE-msg-support-to-OVN-DHCP-server.patch b/SOURCES/0001-Introduce-DHCPDECLINE-msg-support-to-OVN-DHCP-server.patch new file mode 100644 index 0000000..8607655 --- /dev/null +++ b/SOURCES/0001-Introduce-DHCPDECLINE-msg-support-to-OVN-DHCP-server.patch @@ -0,0 +1,74 @@ +From 9ad199184959deac21b3dbf0efbbb3d23f6baed1 Mon Sep 17 00:00:00 2001 +Message-Id: <9ad199184959deac21b3dbf0efbbb3d23f6baed1.1599568829.git.lorenzo.bianconi@redhat.com> +From: Lorenzo Bianconi +Date: Wed, 26 Aug 2020 12:15:46 +0200 +Subject: [PATCH] Introduce DHCPDECLINE msg support to OVN DHCP server + +According to the RFC2131 (https://tools.ietf.org/html/rfc2131), if the +server server receives a DHCPDECLINE message, the client has discovered +through some other means that the suggested network address is already +in use. The server SHOULD notify the local system administrator of a +possible configuration problem. + +Signed-off-by: Lorenzo Bianconi +Signed-off-by: Numan Siddique +--- + controller/pinctrl.c | 6 ++++++ + lib/ovn-l7.h | 1 + + tests/ovn.at | 13 +++++++++++++ + 3 files changed, 20 insertions(+) + +--- a/controller/pinctrl.c ++++ b/controller/pinctrl.c +@@ -1889,6 +1889,12 @@ pinctrl_handle_put_dhcp_opts( + + break; + } ++ case OVN_DHCP_MSG_DECLINE: ++ if (request_ip == *offer_ip) { ++ VLOG_INFO("DHCPDECLINE from "ETH_ADDR_FMT ", "IP_FMT" duplicated", ++ ETH_ADDR_ARGS(in_flow->dl_src), IP_ARGS(*offer_ip)); ++ } ++ goto exit; + default: { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_WARN_RL(&rl, "Invalid DHCP message type: %d", *in_dhcp_msg_type); +--- a/lib/ovn-l7.h ++++ b/lib/ovn-l7.h +@@ -180,6 +180,7 @@ struct dhcp_opt6_header { + }; + + /* These are not defined in ovs/lib/dhcp.h, hence defining here. */ ++#define OVN_DHCP_MSG_DECLINE 4 + #define OVN_DHCP_MSG_RELEASE 7 + #define OVN_DHCP_MSG_INFORM 8 + +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -5332,6 +5332,10 @@ test_dhcp() { + reply_dst_ip=${offer_ip} + fi + ++ if test "$dhcp_type" == "04"; then ++ ciaddr=$offer_ip ++ fi ++ + local request=ffffffffffff${src_mac}08004510${ip_len}0000000080110000${src_ip}${dst_ip} + # udp header and dhcp header + request=${request}00440043${udp_len}0000 +@@ -5897,6 +5901,15 @@ AT_CHECK([cat 2.packets | cut -c -48], [ + cat 2.expected | cut -c 53- > expout + AT_CHECK([cat 2.packets | cut -c 53-], [0], [expout]) + ++# test DHCPDECLINE ++offer_ip=`ip_to_hex 10 0 0 4` ++server_ip=`ip_to_hex 10 0 0 1` ++ciaddr=`ip_to_hex 0 0 0 0` ++request_ip=0 ++expected_dhcp_opts="" ++test_dhcp 1 f00000000001 04 0 $ciaddr $offer_ip $request_ip 0 ff1000000001 $server_ip 02 $expected_dhcp_opts ++AT_CHECK([fgrep -iq 'DHCPDECLINE from f0:00:00:00:00:01, 10.0.0.4 duplicated' hv1/ovn-controller.log], [0], []) ++ + OVN_CLEANUP([hv1]) + + AT_CLEANUP diff --git a/SOURCES/0001-Make-the-notify-calls-work-with-IPv6-in-the-OCF-reso.patch b/SOURCES/0001-Make-the-notify-calls-work-with-IPv6-in-the-OCF-reso.patch deleted file mode 100644 index 262e59b..0000000 --- a/SOURCES/0001-Make-the-notify-calls-work-with-IPv6-in-the-OCF-reso.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 5d47f08c60600708aae354b021da9779a9c8e5e6 Mon Sep 17 00:00:00 2001 -From: Michele Baldessari -Date: Wed, 3 Jun 2020 14:43:47 +0200 -Subject: [PATCH] Make the notify() calls work with IPv6 in the OCF - resource-agent - -When the VIP is an IPv6 address we get the following error in the -resource agent: -ovndb_servers_notify_0:355:stderr [ + ovn-sbctl -- --id=@conn_uuid create Connection 'target=ptcp\:6642\:[fd00:fd00:fd00:2000::a2]' inactivity_probe=180000 -- set SB_Global . connections=@conn_uuid ] -ovndb_servers_notify_0:355:stderr [ ovn-sbctl: ptcp\:6642\:[fd00:fd00:fd00:2000::a2]: unexpected "[" parsing string ] - -This is because MASTER_IP is an IPv6 address and is being passed to -ovn-[ns]bctl without being escaped and the command errors out with -unexpected parsing string errors. The rest of the create Connection -command was already escaping the columns, we are just missing the ip -address bits in case of IPv6. - -Let's make sure we escape the '[]:' characters and avoid this problem. -Tested this on an OpenStack environment on both IPv6 and IPv4. - -Signed-off-by: Michele Baldessari -Signed-off-by: Numan Siddique ---- - utilities/ovndb-servers.ocf | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/utilities/ovndb-servers.ocf b/utilities/ovndb-servers.ocf -index 56c2bc322..7351c7d64 100755 ---- a/utilities/ovndb-servers.ocf -+++ b/utilities/ovndb-servers.ocf -@@ -249,7 +249,9 @@ ovsdb_server_notify() { - if [ "x${LISTEN_ON_MASTER_IP_ONLY}" = xno ]; then - LISTEN_ON_IP="0.0.0.0" - else -- LISTEN_ON_IP=${MASTER_IP} -+ # ovn-[sn]bctl want ':[]' characters to be escaped. We do so in -+ # order to make this work when MASTER_IP is an IPv6 address. -+ LISTEN_ON_IP=$(sed -e 's/\(\[\|\]\|:\)/\\\1/g' <<< ${MASTER_IP}) - fi - conn=`ovn-nbctl get NB_global . connections` - if [ "$conn" == "[]" ] --- -2.26.2 - diff --git a/SOURCES/0001-Rely-on-unique-name-for-ovn-qos-meters.patch b/SOURCES/0001-Rely-on-unique-name-for-ovn-qos-meters.patch deleted file mode 100644 index 04bf3fd..0000000 --- a/SOURCES/0001-Rely-on-unique-name-for-ovn-qos-meters.patch +++ /dev/null @@ -1,73 +0,0 @@ -From ab7a370f24ec88a019f1aa4da76f1a050bf398c6 Mon Sep 17 00:00:00 2001 -Message-Id: -From: Lorenzo Bianconi -Date: Mon, 27 Apr 2020 17:45:20 +0200 -Subject: [PATCH] Rely on unique name for ovn qos meters - -ovn currently identifies qos meters according to the rate and burst values -configured. Doing so 2 meters on the same hv assigned to 2 different logical -switch ports and configured with the same values for rate and burst will be -mapped to the same ovs kernel mater and will share the bandwidth. -Fix this behavior making qos meter name unique - -Tested-By: Maciej Jozefczyk -Acked-by: Han Zhou -Signed-off-by: Lorenzo Bianconi -Signed-off-by: Numan Siddique ---- - controller/ofctrl.c | 2 +- - lib/actions.c | 11 ++++++----- - tests/ovn.at | 10 ++++++++++ - 3 files changed, 17 insertions(+), 6 deletions(-) - ---- a/controller/ofctrl.c -+++ b/controller/ofctrl.c -@@ -970,7 +970,7 @@ add_meter_string(struct ovn_extend_table - enum ofputil_protocol usable_protocols; - char *meter_string = xasprintf("meter=%"PRIu32",%s", - m_desired->table_id, -- &m_desired->name[9]); -+ &m_desired->name[52]); - char *error = parse_ofp_meter_mod_str(&mm, meter_string, OFPMC13_ADD, - &usable_protocols); - if (!error) { ---- a/lib/actions.c -+++ b/lib/actions.c -@@ -2796,12 +2796,13 @@ encode_SET_METER(const struct ovnact_set - * describes the meter itself. */ - char *name; - if (cl->burst) { -- name = xasprintf("__string: kbps burst stats bands=type=drop " -- "rate=%"PRId64" burst_size=%"PRId64"", cl->rate, -- cl->burst); -+ name = xasprintf("__string: uuid "UUID_FMT" kbps burst stats " -+ "bands=type=drop rate=%"PRId64" burst_size=%"PRId64, -+ UUID_ARGS(&ep->lflow_uuid), cl->rate, cl->burst); - } else { -- name = xasprintf("__string: kbps stats bands=type=drop " -- "rate=%"PRId64"", cl->rate); -+ name = xasprintf("__string: uuid "UUID_FMT" kbps stats " -+ "bands=type=drop rate=%"PRId64, -+ UUID_ARGS(&ep->lflow_uuid), cl->rate); - } - - table_id = ovn_extend_table_assign_id(ep->meter_table, name, ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -7653,6 +7653,16 @@ AT_CHECK([as hv ovs-ofctl dump-flows br- - AT_CHECK([as hv ovs-ofctl dump-meters br-int -O OpenFlow13 | grep rate=11123 | wc -l], [0], [0 - ]) - -+# Check multiple qos meters -+ovn-nbctl qos-del lsw0 -+ovn-nbctl qos-add lsw0 to-lport 1001 'inport=="lp1" && is_chassis_resident("lp1")' rate=100000 burst=100000 -+ovn-nbctl qos-add lsw0 to-lport 1001 'inport=="lp2" && is_chassis_resident("lp2")' rate=100000 burst=100000 -+ovn-nbctl qos-add lsw0 to-lport 1002 'inport=="lp1" && is_chassis_resident("lp1")' rate=100001 burst=100001 -+ovn-nbctl qos-add lsw0 to-lport 1002 'inport=="lp2" && is_chassis_resident("lp2")' rate=100001 burst=100001 -+ -+AT_CHECK([as hv ovs-ofctl dump-meters br-int -O OpenFlow13 | grep meter | wc -l], [0], [4 -+]) -+ - OVN_CLEANUP([hv]) - AT_CLEANUP - diff --git a/SOURCES/0001-Split-SB-Port_Group-per-datapath.patch b/SOURCES/0001-Split-SB-Port_Group-per-datapath.patch deleted file mode 100644 index 695a0fe..0000000 --- a/SOURCES/0001-Split-SB-Port_Group-per-datapath.patch +++ /dev/null @@ -1,556 +0,0 @@ -From 3cdc2f9cdd9b4911a236c731dfa76535e3af38e6 Mon Sep 17 00:00:00 2001 -From: Dumitru Ceara -Date: Mon, 29 Jun 2020 17:24:41 +0200 -Subject: [PATCH] Split SB Port_Group per datapath. - -In order to avoid ovn-controller reinstalling all logical flows that -refer a port_group when some ports are added/removed from the port group -we now change the way ovn-northd populates the Southbound DB Port_Group -table. - -Instead of copying NB.Port_Group.name to SB.Port_Group.name we now -create one SB.Port_Group record for every datapath that has ports -referenced by the NB.Port_Group.ports field. In order to maintain the -SB.Port_Group.name uniqueness constraint, ovn-northd populates the field -with the value: _. - -In specific scenarios we see significant improvements in time to -install/remove all logical flows to/from OVS. One such scenario, in the -BZ referenced below has: - -$ ovn-nbctl acl-list pg - from-lport 1001 (inport == @pg && ip) drop - to-lport 1001 (outport == @pg && ip) drop - -Then, incrementally, creates new logical ports on different logical -switches, binds them to OVS interfaces and adds them to the port_group. - -Measuring the total time to perform the above steps 500 times (for 500 -new ports attached to 100 switches, 5 per switch) on a test setup -we observe an improvement of 50% in time it takes to install all -openflow rules when port_groups are split in the SB database. - -Suggested-by: Numan Siddique -Reported-by: Venkata Anil -Reported-at: https://bugzilla.redhat.com/1818128 -Signed-off-by: Dumitru Ceara -Acked-by: Numan Siddique -Signed-off-by: Mark Michelson - -Conflicts: - TODO.rst - lib/ovn-util.h - -Change-Id: Ibb8ffc5dbf4deb33a46e94b3f7b57c248d669073 ---- - TODO.rst | 8 ++++++ - controller/lflow.c | 4 ++- - include/ovn/expr.h | 4 ++- - lib/actions.c | 2 +- - lib/expr.c | 48 ++++++++++++++++++++++++------- - lib/ovn-util.h | 7 +++++ - northd/ovn-northd.c | 79 ++++++++++++++++++++++++++++++++++----------------- - tests/ovn-northd.at | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++ - tests/test-ovn.c | 10 +++---- - utilities/ovn-trace.c | 3 +- - 10 files changed, 198 insertions(+), 46 deletions(-) - -diff --git a/TODO.rst b/TODO.rst -index 809d1c9..cfd33be 100644 ---- a/TODO.rst -+++ b/TODO.rst -@@ -149,3 +149,11 @@ OVN To-do List - * OVN Interconnection - - * Packaging for RHEL, Debian, etc. -+ -+* ovn-controller: Remove backwards compatibility for Southbound DB Port_Group -+ names in expr.c a few releases after the 20.09 version. Right now -+ ovn-controller maintains backwards compatibility when connecting to a -+ SB database that doesn't store Port_Group.name as -+ . This causes an additional -+ hashtable lookup in parse_port_group() which can be avoided when we are sure -+ that the Southbound DB uses the new format. -diff --git a/controller/lflow.c b/controller/lflow.c -index 01214a3..0e57327 100644 ---- a/controller/lflow.c -+++ b/controller/lflow.c -@@ -552,7 +552,9 @@ consider_logical_flow(const struct sbrec_logical_flow *lflow, - struct sset port_groups_ref = SSET_INITIALIZER(&port_groups_ref); - expr = expr_parse_string(lflow->match, &symtab, l_ctx_in->addr_sets, - l_ctx_in->port_groups, -- &addr_sets_ref, &port_groups_ref, &error); -+ &addr_sets_ref, &port_groups_ref, -+ lflow->logical_datapath->tunnel_key, -+ &error); - const char *addr_set_name; - SSET_FOR_EACH (addr_set_name, &addr_sets_ref) { - lflow_resource_add(l_ctx_out->lfrr, REF_TYPE_ADDRSET, addr_set_name, -diff --git a/include/ovn/expr.h b/include/ovn/expr.h -index 21bf51c..9838251 100644 ---- a/include/ovn/expr.h -+++ b/include/ovn/expr.h -@@ -391,12 +391,14 @@ struct expr *expr_parse(struct lexer *, const struct shash *symtab, - const struct shash *addr_sets, - const struct shash *port_groups, - struct sset *addr_sets_ref, -- struct sset *port_groups_ref); -+ struct sset *port_groups_ref, -+ int64_t dp_id); - struct expr *expr_parse_string(const char *, const struct shash *symtab, - const struct shash *addr_sets, - const struct shash *port_groups, - struct sset *addr_sets_ref, - struct sset *port_groups_ref, -+ int64_t dp_id, - char **errorp); - - struct expr *expr_clone(struct expr *); -diff --git a/lib/actions.c b/lib/actions.c -index 3181126..d107871 100644 ---- a/lib/actions.c -+++ b/lib/actions.c -@@ -242,7 +242,7 @@ add_prerequisite(struct action_context *ctx, const char *prerequisite) - char *error; - - expr = expr_parse_string(prerequisite, ctx->pp->symtab, NULL, NULL, -- NULL, NULL, &error); -+ NULL, NULL, 0, &error); - ovs_assert(!error); - ctx->prereqs = expr_combine(EXPR_T_AND, ctx->prereqs, expr); - } -diff --git a/lib/expr.c b/lib/expr.c -index 078d178..497b2ac 100644 ---- a/lib/expr.c -+++ b/lib/expr.c -@@ -29,6 +29,7 @@ - #include "simap.h" - #include "sset.h" - #include "util.h" -+#include "ovn-util.h" - - VLOG_DEFINE_THIS_MODULE(expr); - -@@ -482,6 +483,10 @@ struct expr_context { - const struct shash *port_groups; /* Port group table. */ - struct sset *addr_sets_ref; /* The set of address set referenced. */ - struct sset *port_groups_ref; /* The set of port groups referenced. */ -+ int64_t dp_id; /* The tunnel_key of the datapath for -+ which we're parsing the current -+ expression. */ -+ - bool not; /* True inside odd number of NOT operators. */ - unsigned int paren_depth; /* Depth of nested parentheses. */ - }; -@@ -783,14 +788,32 @@ static bool - parse_port_group(struct expr_context *ctx, struct expr_constant_set *cs, - size_t *allocated_values) - { -+ struct ds sb_name = DS_EMPTY_INITIALIZER; -+ -+ get_sb_port_group_name(ctx->lexer->token.s, ctx->dp_id, &sb_name); - if (ctx->port_groups_ref) { -- sset_add(ctx->port_groups_ref, ctx->lexer->token.s); -+ sset_add(ctx->port_groups_ref, ds_cstr(&sb_name)); -+ } -+ -+ struct expr_constant_set *port_group = NULL; -+ -+ if (ctx->port_groups) { -+ port_group = shash_find_data(ctx->port_groups, ds_cstr(&sb_name)); -+ if (!port_group) { -+ /* For backwards compatibility (e.g., ovn-controller was -+ * upgraded but ovn-northd not yet), perform an additional -+ * lookup because the NB Port_Group.name might have been -+ * stored as is in the SB Port_Group.name field. -+ */ -+ port_group = shash_find_data(ctx->port_groups, -+ ctx->lexer->token.s); -+ if (ctx->port_groups_ref) { -+ sset_add(ctx->port_groups_ref, ctx->lexer->token.s); -+ } -+ } - } -+ ds_destroy(&sb_name); - -- struct expr_constant_set *port_group -- = (ctx->port_groups -- ? shash_find_data(ctx->port_groups, ctx->lexer->token.s) -- : NULL); - if (!port_group) { - lexer_syntax_error(ctx->lexer, "expecting port group name"); - return false; -@@ -1302,14 +1325,16 @@ expr_parse(struct lexer *lexer, const struct shash *symtab, - const struct shash *addr_sets, - const struct shash *port_groups, - struct sset *addr_sets_ref, -- struct sset *port_groups_ref) -+ struct sset *port_groups_ref, -+ int64_t dp_id) - { - struct expr_context ctx = { .lexer = lexer, - .symtab = symtab, - .addr_sets = addr_sets, - .port_groups = port_groups, - .addr_sets_ref = addr_sets_ref, -- .port_groups_ref = port_groups_ref }; -+ .port_groups_ref = port_groups_ref, -+ .dp_id = dp_id }; - return lexer->error ? NULL : expr_parse__(&ctx); - } - -@@ -1325,6 +1350,7 @@ expr_parse_string(const char *s, const struct shash *symtab, - const struct shash *port_groups, - struct sset *addr_sets_ref, - struct sset *port_groups_ref, -+ int64_t dp_id, - char **errorp) - { - struct lexer lexer; -@@ -1332,7 +1358,7 @@ expr_parse_string(const char *s, const struct shash *symtab, - lexer_init(&lexer, s); - lexer_get(&lexer); - struct expr *expr = expr_parse(&lexer, symtab, addr_sets, port_groups, -- addr_sets_ref, port_groups_ref); -+ addr_sets_ref, port_groups_ref, dp_id); - lexer_force_end(&lexer); - *errorp = lexer_steal_error(&lexer); - if (*errorp) { -@@ -1558,7 +1584,7 @@ expr_get_level(const struct expr *expr) - static enum expr_level - expr_parse_level(const char *s, const struct shash *symtab, char **errorp) - { -- struct expr *expr = expr_parse_string(s, symtab, NULL, NULL, NULL, NULL, -+ struct expr *expr = expr_parse_string(s, symtab, NULL, NULL, NULL, NULL, 0, - errorp); - enum expr_level level = expr ? expr_get_level(expr) : EXPR_L_NOMINAL; - expr_destroy(expr); -@@ -1730,7 +1756,7 @@ parse_and_annotate(const char *s, const struct shash *symtab, - char *error; - struct expr *expr; - -- expr = expr_parse_string(s, symtab, NULL, NULL, NULL, NULL, &error); -+ expr = expr_parse_string(s, symtab, NULL, NULL, NULL, NULL, 0, &error); - if (expr) { - expr = expr_annotate_(expr, symtab, nesting, &error); - } -@@ -3456,7 +3482,7 @@ expr_parse_microflow(const char *s, const struct shash *symtab, - lexer_get(&lexer); - - struct expr *e = expr_parse(&lexer, symtab, addr_sets, port_groups, -- NULL, NULL); -+ NULL, NULL, 0); - lexer_force_end(&lexer); - - if (e) { -diff --git a/lib/ovn-util.h b/lib/ovn-util.h -index ec5f2cf..9c6d357 100644 ---- a/lib/ovn-util.h -+++ b/lib/ovn-util.h -@@ -111,6 +111,13 @@ bool ovn_tnlid_in_use(const struct hmap *set, uint32_t tnlid); - uint32_t ovn_allocate_tnlid(struct hmap *set, const char *name, uint32_t min, - uint32_t max, uint32_t *hint); - -+static inline void -+get_sb_port_group_name(const char *nb_pg_name, int64_t dp_tunnel_key, -+ struct ds *sb_pg_name) -+{ -+ ds_put_format(sb_pg_name, "%"PRId64"_%s", dp_tunnel_key, nb_pg_name); -+} -+ - char *ovn_chassis_redirect_name(const char *port_name); - void ovn_set_pidfile(const char *name); - -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index fc25031..8a809d0 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -4457,7 +4457,11 @@ build_dhcpv6_action(struct ovn_port *op, struct in6_addr *offer_ip, - struct ovn_port_group_ls { - struct hmap_node key_node; /* Index on 'key'. */ - struct uuid key; /* nb_ls->header_.uuid. */ -- const struct nbrec_logical_switch *nb_ls; -+ struct ovn_datapath *od; -+ -+ struct ovn_port **ports; /* Ports in 'od' referrenced by the PG. */ -+ size_t n_ports; -+ size_t n_allocated_ports; - }; - - struct ovn_port_group { -@@ -4467,14 +4471,14 @@ struct ovn_port_group { - struct hmap nb_lswitches; /* NB lswitches related to the port group */ - }; - --static void --ovn_port_group_ls_add(struct ovn_port_group *pg, -- const struct nbrec_logical_switch *nb_ls) -+static struct ovn_port_group_ls * -+ovn_port_group_ls_add(struct ovn_port_group *pg, struct ovn_datapath *od) - { - struct ovn_port_group_ls *pg_ls = xzalloc(sizeof *pg_ls); -- pg_ls->key = nb_ls->header_.uuid; -- pg_ls->nb_ls = nb_ls; -+ pg_ls->key = od->nbs->header_.uuid; -+ pg_ls->od = od; - hmap_insert(&pg->nb_lswitches, &pg_ls->key_node, uuid_hash(&pg_ls->key)); -+ return pg_ls; - } - - static struct ovn_port_group_ls * -@@ -4491,6 +4495,18 @@ ovn_port_group_ls_find(struct ovn_port_group *pg, const struct uuid *ls_uuid) - return NULL; - } - -+static void -+ovn_port_group_ls_add_port(struct ovn_port_group_ls *pg_ls, -+ struct ovn_port *op) -+{ -+ if (pg_ls->n_ports == pg_ls->n_allocated_ports) { -+ pg_ls->ports = x2nrealloc(pg_ls->ports, -+ &pg_ls->n_allocated_ports, -+ sizeof *pg_ls->ports); -+ } -+ pg_ls->ports[pg_ls->n_ports++] = op; -+} -+ - struct ovn_ls_port_group { - struct hmap_node key_node; /* Index on 'key'. */ - struct uuid key; /* nb_pg->header_.uuid. */ -@@ -5250,6 +5266,7 @@ ovn_port_group_destroy(struct hmap *pgs, struct ovn_port_group *pg) - hmap_remove(pgs, &pg->key_node); - struct ovn_port_group_ls *ls; - HMAP_FOR_EACH_POP (ls, key_node, &pg->nb_lswitches) { -+ free(ls->ports); - free(ls); - } - hmap_destroy(&pg->nb_lswitches); -@@ -5287,9 +5304,10 @@ build_port_group_lswitches(struct northd_context *ctx, struct hmap *pgs, - struct ovn_port_group_ls *pg_ls = - ovn_port_group_ls_find(pg, &op->od->nbs->header_.uuid); - if (!pg_ls) { -- ovn_port_group_ls_add(pg, op->od->nbs); -+ pg_ls = ovn_port_group_ls_add(pg, op->od); - ovn_ls_port_group_add(&op->od->nb_pgs, nb_pg); - } -+ ovn_port_group_ls_add_port(pg_ls, op); - } - } - } -@@ -10454,7 +10472,7 @@ sync_address_sets(struct northd_context *ctx) - * contains lport uuids, while in OVN_Southbound we store the lport names. - */ - static void --sync_port_groups(struct northd_context *ctx) -+sync_port_groups(struct northd_context *ctx, struct hmap *pgs) - { - struct shash sb_port_groups = SHASH_INITIALIZER(&sb_port_groups); - -@@ -10463,26 +10481,35 @@ sync_port_groups(struct northd_context *ctx) - shash_add(&sb_port_groups, sb_port_group->name, sb_port_group); - } - -- const struct nbrec_port_group *nb_port_group; -- NBREC_PORT_GROUP_FOR_EACH (nb_port_group, ctx->ovnnb_idl) { -- sb_port_group = shash_find_and_delete(&sb_port_groups, -- nb_port_group->name); -- if (!sb_port_group) { -- sb_port_group = sbrec_port_group_insert(ctx->ovnsb_txn); -- sbrec_port_group_set_name(sb_port_group, nb_port_group->name); -- } -+ struct ds sb_name = DS_EMPTY_INITIALIZER; - -- const char **nb_port_names = xcalloc(nb_port_group->n_ports, -- sizeof *nb_port_names); -- int i; -- for (i = 0; i < nb_port_group->n_ports; i++) { -- nb_port_names[i] = nb_port_group->ports[i]->name; -+ struct ovn_port_group *pg; -+ HMAP_FOR_EACH (pg, key_node, pgs) { -+ -+ struct ovn_port_group_ls *pg_ls; -+ HMAP_FOR_EACH (pg_ls, key_node, &pg->nb_lswitches) { -+ ds_clear(&sb_name); -+ get_sb_port_group_name(pg->nb_pg->name, pg_ls->od->sb->tunnel_key, -+ &sb_name); -+ sb_port_group = shash_find_and_delete(&sb_port_groups, -+ ds_cstr(&sb_name)); -+ if (!sb_port_group) { -+ sb_port_group = sbrec_port_group_insert(ctx->ovnsb_txn); -+ sbrec_port_group_set_name(sb_port_group, ds_cstr(&sb_name)); -+ } -+ -+ const char **nb_port_names = xcalloc(pg_ls->n_ports, -+ sizeof *nb_port_names); -+ for (size_t i = 0; i < pg_ls->n_ports; i++) { -+ nb_port_names[i] = pg_ls->ports[i]->nbsp->name; -+ } -+ sbrec_port_group_set_ports(sb_port_group, -+ nb_port_names, -+ pg_ls->n_ports); -+ free(nb_port_names); - } -- sbrec_port_group_set_ports(sb_port_group, -- nb_port_names, -- nb_port_group->n_ports); -- free(nb_port_names); - } -+ ds_destroy(&sb_name); - - struct shash_node *node, *next; - SHASH_FOR_EACH_SAFE (node, next, &sb_port_groups) { -@@ -11081,7 +11108,7 @@ ovnnb_db_run(struct northd_context *ctx, - ovn_update_ipv6_prefix(ports); - - sync_address_sets(ctx); -- sync_port_groups(ctx); -+ sync_port_groups(ctx, &port_groups); - sync_meters(ctx); - sync_dns_entries(ctx, datapaths); - destroy_ovn_lbs(&lbs); -diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at -index e6a8c04..37805d3 100644 ---- a/tests/ovn-northd.at -+++ b/tests/ovn-northd.at -@@ -1406,3 +1406,82 @@ AT_CHECK([ovn-nbctl --wait=sb sync], [0]) - AT_CHECK([test 0 = $(ovn-sbctl list Ha_Chassis_Group | wc -l)]) - - AT_CLEANUP -+ -+AT_SETUP([ovn -- check NB/SB Port_Group translation (lsp add/del)]) -+ovn_start -+ -+ovn-nbctl ls-add ls1 -+ovn-nbctl ls-add ls2 -+ovn-nbctl lsp-add ls1 lsp1 -+ovn-nbctl lsp-add ls2 lsp2 -+ovn-nbct --wait=sb sync -+ls1_key=$(ovn-sbctl --columns tunnel_key --bare list Datapath_Binding ls1) -+ls2_key=$(ovn-sbctl --columns tunnel_key --bare list Datapath_Binding ls2) -+ -+# Add an empty port group. This should generate no entry in the SB. -+ovn-nbctl --wait=sb pg-add pg_test -+AT_CHECK([test 0 = $(ovn-sbctl --columns _uuid list Port_Group | grep uuid -c)]) -+ -+# Add lsp1 to the port group. This should generate an entry in the SB only -+# for ls1. -+ovn-nbctl --wait=sb pg-set-ports pg_test lsp1 -+AT_CHECK([test 1 = $(ovn-sbctl --columns _uuid list Port_Group | grep uuid -c)]) -+AT_CHECK([ovn-sbctl --columns ports --bare find Port_Group name=${ls1_key}_pg_test], [0], [dnl -+lsp1 -+]) -+ -+# Add lsp2 to the port group. This should generate a new entry in the SB, for -+# ls2. -+ovn-nbctl --wait=sb pg-set-ports pg_test lsp1 lsp2 -+AT_CHECK([test 2 = $(ovn-sbctl --columns _uuid list Port_Group | grep uuid -c)]) -+AT_CHECK([ovn-sbctl --columns ports --bare find Port_Group name=${ls1_key}_pg_test], [0], [dnl -+lsp1 -+]) -+AT_CHECK([ovn-sbctl --columns ports --bare find Port_Group name=${ls2_key}_pg_test], [0], [dnl -+lsp2 -+]) -+ -+# Remove lsp1 from the port group. The SB Port_Group for ls1 should be -+# removed. -+ovn-nbctl --wait=sb pg-set-ports pg_test lsp2 -+AT_CHECK([test 1 = $(ovn-sbctl --columns _uuid list Port_Group | grep uuid -c)]) -+AT_CHECK([ovn-sbctl --columns ports --bare find Port_Group name=${ls2_key}_pg_test], [0], [dnl -+lsp2 -+]) -+ -+# Remove lsp2 from the port group. All SB Port_Groups should be purged. -+ovn-nbctl --wait=sb clear Port_Group pg_test ports -+AT_CHECK([test 0 = $(ovn-sbctl --columns _uuid list Port_Group | grep uuid -c)]) -+ -+AT_CLEANUP -+ -+AT_SETUP([ovn -- check NB/SB Port_Group translation (ls del)]) -+ovn_start -+ -+ovn-nbctl ls-add ls1 -+ovn-nbctl ls-add ls2 -+ovn-nbctl lsp-add ls1 lsp1 -+ovn-nbctl lsp-add ls2 lsp2 -+ovn-nbct --wait=sb sync -+ls1_key=$(ovn-sbctl --columns tunnel_key --bare list Datapath_Binding ls1) -+ls2_key=$(ovn-sbctl --columns tunnel_key --bare list Datapath_Binding ls2) -+ -+# Add lsp1 & lsp2 to a port group. This should generate two entries in the -+# SB (one per logical switch). -+ovn-nbctl --wait=sb pg-add pg_test lsp1 lsp2 -+AT_CHECK([test 2 = $(ovn-sbctl --columns _uuid list Port_Group | grep uuid -c)]) -+AT_CHECK([ovn-sbctl --columns ports --bare find Port_Group name=${ls1_key}_pg_test], [0], [dnl -+lsp1 -+]) -+AT_CHECK([ovn-sbctl --columns ports --bare find Port_Group name=${ls2_key}_pg_test], [0], [dnl -+lsp2 -+]) -+ -+# Delete logical switch ls1. This should remove the associated SB Port_Group. -+ovn-nbctl --wait=sb ls-del ls1 -+AT_CHECK([test 1 = $(ovn-sbctl --columns _uuid list Port_Group | grep uuid -c)]) -+AT_CHECK([ovn-sbctl --columns ports --bare find Port_Group name=${ls2_key}_pg_test], [0], [dnl -+lsp2 -+]) -+ -+AT_CLEANUP -diff --git a/tests/test-ovn.c b/tests/test-ovn.c -index 11697eb..9f74c5c 100644 ---- a/tests/test-ovn.c -+++ b/tests/test-ovn.c -@@ -235,8 +235,8 @@ create_port_groups(struct shash *port_groups) - }; - static const char *const pg2[] = { NULL }; - -- expr_const_sets_add(port_groups, "pg1", pg1, 3, false); -- expr_const_sets_add(port_groups, "pg_empty", pg2, 0, false); -+ expr_const_sets_add(port_groups, "0_pg1", pg1, 3, false); -+ expr_const_sets_add(port_groups, "0_pg_empty", pg2, 0, false); - } - - static bool -@@ -302,7 +302,7 @@ test_parse_expr__(int steps) - char *error; - - expr = expr_parse_string(ds_cstr(&input), &symtab, &addr_sets, -- &port_groups, NULL, NULL, &error); -+ &port_groups, NULL, NULL, 0, &error); - if (!error && steps > 0) { - expr = expr_annotate(expr, &symtab, &error); - } -@@ -428,7 +428,7 @@ test_evaluate_expr(struct ovs_cmdl_context *ctx) - struct expr *expr; - - expr = expr_parse_string(ds_cstr(&input), &symtab, NULL, NULL, -- NULL, NULL, &error); -+ NULL, NULL, 0, &error); - if (!error) { - expr = expr_annotate(expr, &symtab, &error); - } -@@ -903,7 +903,7 @@ test_tree_shape_exhaustively(struct expr *expr, struct shash *symtab, - - char *error; - modified = expr_parse_string(ds_cstr(&s), symtab, NULL, -- NULL, NULL, NULL, &error); -+ NULL, NULL, NULL, 0, &error); - if (error) { - fprintf(stderr, "%s fails to parse (%s)\n", - ds_cstr(&s), error); -diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c -index d7251e7..2666c10 100644 ---- a/utilities/ovn-trace.c -+++ b/utilities/ovn-trace.c -@@ -889,7 +889,8 @@ read_flows(void) - char *error; - struct expr *match; - match = expr_parse_string(sblf->match, &symtab, &address_sets, -- &port_groups, NULL, NULL, &error); -+ &port_groups, NULL, NULL, dp->tunnel_key, -+ &error); - if (error) { - VLOG_WARN("%s: parsing expression failed (%s)", - sblf->match, error); --- -1.8.3.1 - diff --git a/SOURCES/0001-chassis-Fix-the-way-encaps-are-updated-for-a-chassis.patch b/SOURCES/0001-chassis-Fix-the-way-encaps-are-updated-for-a-chassis.patch new file mode 100644 index 0000000..ee4a429 --- /dev/null +++ b/SOURCES/0001-chassis-Fix-the-way-encaps-are-updated-for-a-chassis.patch @@ -0,0 +1,175 @@ +From 536d6aa32497ab17e12767446f294fc8467cfc7c Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Thu, 3 Sep 2020 17:03:45 +0200 +Subject: [PATCH 1/2] chassis: Fix the way encaps are updated for a chassis + record. + +ovn-controller always stores the last configured system-id/chassis-id in +memory regardless if the connection to the SB is up or down. This is OK +as long as the change can be committed successfully when the SB DB +connection comes back up. + +Without this change, if the chassis-id changes while the SB connection is +down, ovn-controller will fail to create the new record but nevertheless +update its in-memory chassis-id. When the SB connection is restored +ovn-controller tries to find the record corresponding to the chassis-id +it stored in memory. This fails causing ovn-controller to try to insert +a new record. But at this point a constraint violation is hit in the SB +because the Encap records of the "stale" chassis still exist in the DB, +along with the old chassis record. + +This commit changes the way we search for a "stale" chassis record in the +SB to cover the above mentioned case. Also, in such cases there's no need +to recreate the Encaps, it's sufficient to update the chassis_name field. + +Fixes: 5344f24ecb1a ("ovn-controller: Refactor chassis.c to abstract the string parsing") +Signed-off-by: Dumitru Ceara +Signed-off-by: Numan Siddique + +(cherry-picked from master commit 94a32fca2d2b825fece0ef5b1873459bd9857dd3) + +(cherry picked from upstream commit 0716c4f4cf2d97f03c1f2e5099fece92f3183d43) + +Change-Id: I20f0c61fddffba599b16f43d15b110414a6a6e6b +--- + controller/chassis.c | 60 +++++++++++++++++++++++++++++++------------------ + tests/ovn-controller.at | 17 ++++++++++++++ + 2 files changed, 55 insertions(+), 22 deletions(-) + +diff --git a/controller/chassis.c b/controller/chassis.c +index 6ac591e..773d966 100644 +--- a/controller/chassis.c ++++ b/controller/chassis.c +@@ -397,10 +397,7 @@ chassis_tunnels_changed(const struct sset *encap_type_set, + { + size_t encap_type_count = 0; + +- for (int i = 0; i < chassis_rec->n_encaps; i++) { +- if (strcmp(chassis_rec->name, chassis_rec->encaps[i]->chassis_name)) { +- return true; +- } ++ for (size_t i = 0; i < chassis_rec->n_encaps; i++) { + + if (!sset_contains(encap_type_set, chassis_rec->encaps[i]->type)) { + return true; +@@ -473,6 +470,19 @@ chassis_build_encaps(struct ovsdb_idl_txn *ovnsb_idl_txn, + } + + /* ++ * Updates encaps for a given chassis. This can happen when the chassis ++ * name has changed. Also, the only thing we support updating is the ++ * chassis_name. For other changes the encaps will be recreated. ++ */ ++static void ++chassis_update_encaps(const struct sbrec_chassis *chassis) ++{ ++ for (size_t i = 0; i < chassis->n_encaps; i++) { ++ sbrec_encap_set_chassis_name(chassis->encaps[i], chassis->name); ++ } ++} ++ ++/* + * Returns a pointer to a chassis record from 'chassis_table' that + * matches at least one tunnel config. + */ +@@ -503,9 +513,10 @@ chassis_get_stale_record(const struct sbrec_chassis_table *chassis_table, + /* If this is a chassis config update after we initialized the record once + * then we should always be able to find it with the ID we saved in + * chassis_state. +- * Otherwise (i.e., first time we create the record) then we check if there's +- * a stale record from a previous controller run that didn't end gracefully +- * and reuse it. If not then we create a new record. ++ * Otherwise (i.e., first time we create the record or if the system-id ++ * changed) then we check if there's a stale record from a previous ++ * controller run that didn't end gracefully and reuse it. If not then we ++ * create a new record. + * + * Sets '*chassis_rec' to point to the local chassis record. + * Returns true if this record was already in the database, false if it was +@@ -519,28 +530,32 @@ chassis_get_record(struct ovsdb_idl_txn *ovnsb_idl_txn, + const char *chassis_id, + const struct sbrec_chassis **chassis_rec) + { ++ const struct sbrec_chassis *chassis = NULL; ++ + if (chassis_info_id_inited(&chassis_state)) { +- *chassis_rec = chassis_lookup_by_name(sbrec_chassis_by_name, +- chassis_info_id(&chassis_state)); +- if (!(*chassis_rec)) { +- VLOG_DBG("Could not find Chassis, will create it" +- ": stored (%s) ovs (%s)", ++ chassis = chassis_lookup_by_name(sbrec_chassis_by_name, ++ chassis_info_id(&chassis_state)); ++ if (!chassis) { ++ VLOG_DBG("Could not find Chassis, will check if the id changed: " ++ "stored (%s) ovs (%s)", + chassis_info_id(&chassis_state), chassis_id); +- if (ovnsb_idl_txn) { +- /* Recreate the chassis record. */ +- *chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn); +- return false; +- } + } +- } else { +- *chassis_rec = +- chassis_get_stale_record(chassis_table, ovs_cfg, chassis_id); ++ } + +- if (!(*chassis_rec) && ovnsb_idl_txn) { ++ if (!chassis) { ++ chassis = chassis_get_stale_record(chassis_table, ovs_cfg, chassis_id); ++ } ++ ++ if (!chassis) { ++ /* Recreate the chassis record. */ ++ VLOG_DBG("Could not find Chassis, will create it: %s", chassis_id); ++ if (ovnsb_idl_txn) { + *chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn); +- return false; + } ++ return false; + } ++ ++ *chassis_rec = chassis; + return true; + } + +@@ -602,6 +617,7 @@ chassis_update(const struct sbrec_chassis *chassis_rec, + &ovs_cfg->encap_ip_set, ovs_cfg->encap_csum, + chassis_rec); + if (!tunnels_changed) { ++ chassis_update_encaps(chassis_rec); + return updated; + } + +diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at +index 1b96934..f2faf1f 100644 +--- a/tests/ovn-controller.at ++++ b/tests/ovn-controller.at +@@ -196,6 +196,23 @@ OVS_WAIT_UNTIL([ + test "${sysid}" = "${chassis_id}" + ]) + ++# Simulate system-id changing while ovn-controller is disconnected from the ++# SB. ++valid_remote=$(ovs-vsctl get Open_vSwitch . external_ids:ovn-remote) ++invalid_remote=tcp:0.0.0.0:4242 ++ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${invalid_remote} ++expected_state="not connected" ++OVS_WAIT_UNTIL([ ++ test "${expected_state}" = "$(ovn-appctl -t ovn-controller connection-status)" ++]) ++sysid=${sysid}-bar ++ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}" ++ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${valid_remote} ++OVS_WAIT_UNTIL([ ++ chassis_id=$(ovn-sbctl get Chassis "${sysid}" name) ++ test "${sysid}" = "${chassis_id}" ++]) ++ + # Gracefully terminate daemons + OVN_CLEANUP_SBOX([hv]) + OVN_CLEANUP_VSWITCH([main]) +-- +1.8.3.1 + diff --git a/SOURCES/0001-controller-Use-OpenFlow-version-1.5.patch b/SOURCES/0001-controller-Use-OpenFlow-version-1.5.patch deleted file mode 100644 index b979eea..0000000 --- a/SOURCES/0001-controller-Use-OpenFlow-version-1.5.patch +++ /dev/null @@ -1,259 +0,0 @@ -From a8fcc8cc07ff9acbf9ff328e6ac2e781d73d3f8b Mon Sep 17 00:00:00 2001 -From: Numan Siddique -Date: Tue, 21 Apr 2020 19:28:23 +0530 -Subject: [PATCH 1/4] controller: Use OpenFlow version 1.5 - -When adding flows to the group table, we need to use OFP15_VERSION to -set the selection_method. Right now ovn-controller is setting -select_method=dp_hash for OVN load balancers, but when encoding the -group mod, it is ignored. - -Acked-by: Han Zhou -Signed-off-by: Numan Siddique ---- - NEWS | 1 + - controller/ofctrl.c | 14 +++++++------- - controller/ovn-controller.c | 2 +- - controller/pinctrl.c | 2 +- - lib/actions.c | 10 +++++----- - lib/expr.c | 2 +- - tests/ovn.at | 6 +++--- - utilities/ovn-sbctl.c | 4 ++-- - utilities/ovn-trace.c | 4 ++-- - 9 files changed, 23 insertions(+), 22 deletions(-) - -diff --git a/NEWS b/NEWS -index 21c80f0dc..e77343c89 100644 ---- a/NEWS -+++ b/NEWS -@@ -8,6 +8,7 @@ OVN v20.03.0 - 28 Feb 2020 - - Added support for MLD Snooping and MLD Querier. - - Added support for ECMP routes in OVN router. - - Added IPv6 Prefix Delegation support in OVN. -+ - OVN now uses OpenFlow 1.5. - - - OVN Interconnection: - * Support for L3 interconnection of multiple OVN deployments with tunnels -diff --git a/controller/ofctrl.c b/controller/ofctrl.c -index 485a857d1..4b51cd86e 100644 ---- a/controller/ofctrl.c -+++ b/controller/ofctrl.c -@@ -178,7 +178,7 @@ ofctrl_init(struct ovn_extend_table *group_table, - int inactivity_probe_interval) - { - swconn = rconn_create(inactivity_probe_interval, 0, -- DSCP_DEFAULT, 1 << OFP13_VERSION); -+ DSCP_DEFAULT, 1 << OFP15_VERSION); - tx_counter = rconn_packet_counter_create(); - hmap_init(&installed_flows); - ovs_list_init(&flow_updates); -@@ -282,8 +282,8 @@ process_tlv_table_reply(const struct ofputil_tlv_table_reply *reply) - ovs_list_init(&ttm.mappings); - ovs_list_push_back(&ttm.mappings, &tm.list_node); - -- xid = queue_msg(ofputil_encode_tlv_table_mod(OFP13_VERSION, &ttm)); -- xid2 = queue_msg(ofputil_encode_barrier_request(OFP13_VERSION)); -+ xid = queue_msg(ofputil_encode_tlv_table_mod(OFP15_VERSION, &ttm)); -+ xid2 = queue_msg(ofputil_encode_barrier_request(OFP15_VERSION)); - state = S_TLV_TABLE_MOD_SENT; - - return true; -@@ -911,7 +911,7 @@ encode_flow_mod(struct ofputil_flow_mod *fm) - fm->buffer_id = UINT32_MAX; - fm->out_port = OFPP_ANY; - fm->out_group = OFPG_ANY; -- return ofputil_encode_flow_mod(fm, OFPUTIL_P_OF13_OXM); -+ return ofputil_encode_flow_mod(fm, OFPUTIL_P_OF15_OXM); - } - - static void -@@ -926,7 +926,7 @@ add_flow_mod(struct ofputil_flow_mod *fm, struct ovs_list *msgs) - static struct ofpbuf * - encode_group_mod(const struct ofputil_group_mod *gm) - { -- return ofputil_encode_group_mod(OFP13_VERSION, gm, NULL, -1); -+ return ofputil_encode_group_mod(OFP15_VERSION, gm, NULL, -1); - } - - static void -@@ -940,7 +940,7 @@ add_group_mod(const struct ofputil_group_mod *gm, struct ovs_list *msgs) - static struct ofpbuf * - encode_meter_mod(const struct ofputil_meter_mod *mm) - { -- return ofputil_encode_meter_mod(OFP13_VERSION, mm); -+ return ofputil_encode_meter_mod(OFP15_VERSION, mm); - } - - static void -@@ -1281,7 +1281,7 @@ ofctrl_put(struct ovn_desired_flow_table *flow_table, - - if (!ovs_list_is_empty(&msgs)) { - /* Add a barrier to the list of messages. */ -- struct ofpbuf *barrier = ofputil_encode_barrier_request(OFP13_VERSION); -+ struct ofpbuf *barrier = ofputil_encode_barrier_request(OFP15_VERSION); - const struct ofp_header *oh = barrier->data; - ovs_be32 xid_ = oh->xid; - ovs_list_push_back(&msgs, &barrier->list_node); -diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c -index 6ff897325..a2d92429c 100644 ---- a/controller/ovn-controller.c -+++ b/controller/ovn-controller.c -@@ -2297,7 +2297,7 @@ parse_options(int argc, char *argv[]) - usage(); - - case 'V': -- ovs_print_version(OFP13_VERSION, OFP13_VERSION); -+ ovs_print_version(OFP15_VERSION, OFP15_VERSION); - exit(EXIT_SUCCESS); - - VLOG_OPTION_HANDLERS -diff --git a/controller/pinctrl.c b/controller/pinctrl.c -index 9d5b7c3c0..6b0ac3483 100644 ---- a/controller/pinctrl.c -+++ b/controller/pinctrl.c -@@ -2805,7 +2805,7 @@ pinctrl_handler(void *arg_) - static long long int svc_monitors_next_run_time = LLONG_MAX; - static long long int send_prefixd_time = LLONG_MAX; - -- swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP13_VERSION); -+ swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP15_VERSION); - - while (!latch_is_set(&pctrl->pinctrl_thread_exit)) { - if (pctrl->br_int_name) { -diff --git a/lib/actions.c b/lib/actions.c -index 2dba9a922..605dbffe4 100644 ---- a/lib/actions.c -+++ b/lib/actions.c -@@ -1457,7 +1457,7 @@ encode_nested_actions(const struct ovnact_nest *on, - size_t oc_offset = encode_start_controller_op(opcode, false, - NX_CTLR_NO_METER, ofpacts); - ofpacts_put_openflow_actions(inner_ofpacts.data, inner_ofpacts.size, -- ofpacts, OFP13_VERSION); -+ ofpacts, OFP15_VERSION); - encode_finish_controller_op(oc_offset, ofpacts); - - /* Free memory. */ -@@ -2260,7 +2260,7 @@ encode_PUT_DHCPV4_OPTS(const struct ovnact_put_opts *pdo, - size_t oc_offset = encode_start_controller_op(ACTION_OPCODE_PUT_DHCP_OPTS, - true, NX_CTLR_NO_METER, - ofpacts); -- nx_put_header(ofpacts, dst.field->id, OFP13_VERSION, false); -+ nx_put_header(ofpacts, dst.field->id, OFP15_VERSION, false); - ovs_be32 ofs = htonl(dst.ofs); - ofpbuf_put(ofpacts, &ofs, sizeof ofs); - -@@ -2291,7 +2291,7 @@ encode_PUT_DHCPV6_OPTS(const struct ovnact_put_opts *pdo, - - size_t oc_offset = encode_start_controller_op( - ACTION_OPCODE_PUT_DHCPV6_OPTS, true, NX_CTLR_NO_METER, ofpacts); -- nx_put_header(ofpacts, dst.field->id, OFP13_VERSION, false); -+ nx_put_header(ofpacts, dst.field->id, OFP15_VERSION, false); - ovs_be32 ofs = htonl(dst.ofs); - ofpbuf_put(ofpacts, &ofs, sizeof ofs); - -@@ -2401,7 +2401,7 @@ encode_DNS_LOOKUP(const struct ovnact_dns_lookup *dl, - size_t oc_offset = encode_start_controller_op(ACTION_OPCODE_DNS_LOOKUP, - true, NX_CTLR_NO_METER, - ofpacts); -- nx_put_header(ofpacts, dst.field->id, OFP13_VERSION, false); -+ nx_put_header(ofpacts, dst.field->id, OFP15_VERSION, false); - ovs_be32 ofs = htonl(dst.ofs); - ofpbuf_put(ofpacts, &ofs, sizeof ofs); - encode_finish_controller_op(oc_offset, ofpacts); -@@ -2565,7 +2565,7 @@ encode_PUT_ND_RA_OPTS(const struct ovnact_put_opts *po, - - size_t oc_offset = encode_start_controller_op( - ACTION_OPCODE_PUT_ND_RA_OPTS, true, NX_CTLR_NO_METER, ofpacts); -- nx_put_header(ofpacts, dst.field->id, OFP13_VERSION, false); -+ nx_put_header(ofpacts, dst.field->id, OFP15_VERSION, false); - ovs_be32 ofs = htonl(dst.ofs); - ofpbuf_put(ofpacts, &ofs, sizeof ofs); - -diff --git a/lib/expr.c b/lib/expr.c -index 78646a1af..078d17840 100644 ---- a/lib/expr.c -+++ b/lib/expr.c -@@ -1414,7 +1414,7 @@ expr_symbol_format(const struct expr_symbol *symbol, struct ds *s) - } else if (symbol->ovn_field) { - ds_put_cstr(s, symbol->name); - } else { -- nx_format_field_name(symbol->field->id, OFP13_VERSION, s); -+ nx_format_field_name(symbol->field->id, OFP15_VERSION, s); - } - } - -diff --git a/tests/ovn.at b/tests/ovn.at -index 35415f2b6..5fb100ad4 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -1186,7 +1186,7 @@ reg1[0] = put_dhcp_opts(offerip=1.2.3.4, domain_name=1.2.3.4); - - # nd_ns - nd_ns { nd.target = xxreg0; output; }; -- encodes as controller(userdata=00.00.00.09.00.00.00.00.ff.ff.00.18.00.00.23.20.00.06.00.80.00.00.00.00.00.01.de.10.00.01.2e.10.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00) -+ encodes as controller(userdata=00.00.00.09.00.00.00.00.00.1c.00.18.00.80.00.00.00.00.00.00.00.01.de.10.80.00.3e.10.00.00.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00) - has prereqs ip6 - - nd_ns { }; -@@ -1197,12 +1197,12 @@ nd_ns { }; - # nd_na - nd_na { eth.src = 12:34:56:78:9a:bc; nd.tll = 12:34:56:78:9a:bc; outport = inport; inport = ""; /* Allow sending out inport. */ output; }; - formats as nd_na { eth.src = 12:34:56:78:9a:bc; nd.tll = 12:34:56:78:9a:bc; outport = inport; inport = ""; output; }; -- encodes as controller(userdata=00.00.00.03.00.00.00.00.00.19.00.10.80.00.08.06.12.34.56.78.9a.bc.00.00.00.19.00.10.80.00.42.06.12.34.56.78.9a.bc.00.00.ff.ff.00.18.00.00.23.20.00.06.00.20.00.00.00.00.00.01.1c.04.00.01.1e.04.00.19.00.10.00.01.1c.04.00.00.00.00.00.00.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00) -+ encodes as controller(userdata=00.00.00.03.00.00.00.00.00.19.00.10.80.00.08.06.12.34.56.78.9a.bc.00.00.00.19.00.10.80.00.42.06.12.34.56.78.9a.bc.00.00.00.1c.00.18.00.20.00.00.00.00.00.00.00.01.1c.04.00.01.1e.04.00.00.00.00.00.19.00.10.00.01.1c.04.00.00.00.00.00.00.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00) - has prereqs nd_ns - # nd_na_router - nd_na_router { eth.src = 12:34:56:78:9a:bc; nd.tll = 12:34:56:78:9a:bc; outport = inport; inport = ""; /* Allow sending out inport. */ output; }; - formats as nd_na_router { eth.src = 12:34:56:78:9a:bc; nd.tll = 12:34:56:78:9a:bc; outport = inport; inport = ""; output; }; -- encodes as controller(userdata=00.00.00.0c.00.00.00.00.00.19.00.10.80.00.08.06.12.34.56.78.9a.bc.00.00.00.19.00.10.80.00.42.06.12.34.56.78.9a.bc.00.00.ff.ff.00.18.00.00.23.20.00.06.00.20.00.00.00.00.00.01.1c.04.00.01.1e.04.00.19.00.10.00.01.1c.04.00.00.00.00.00.00.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00) -+ encodes as controller(userdata=00.00.00.0c.00.00.00.00.00.19.00.10.80.00.08.06.12.34.56.78.9a.bc.00.00.00.19.00.10.80.00.42.06.12.34.56.78.9a.bc.00.00.00.1c.00.18.00.20.00.00.00.00.00.00.00.01.1c.04.00.01.1e.04.00.00.00.00.00.19.00.10.00.01.1c.04.00.00.00.00.00.00.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00) - has prereqs nd_ns - - # get_nd -diff --git a/utilities/ovn-sbctl.c b/utilities/ovn-sbctl.c -index d8bb3dcbc..04e082c70 100644 ---- a/utilities/ovn-sbctl.c -+++ b/utilities/ovn-sbctl.c -@@ -795,7 +795,7 @@ sbctl_open_vconn(struct shash *options) - - char *remote = ovs->data ? xstrdup(ovs->data) : default_ovs(); - struct vconn *vconn; -- int retval = vconn_open_block(remote, 1 << OFP13_VERSION, 0, -1, &vconn); -+ int retval = vconn_open_block(remote, 1 << OFP15_VERSION, 0, -1, &vconn); - if (retval) { - VLOG_WARN("%s: connection failed (%s)", remote, ovs_strerror(retval)); - } -@@ -816,7 +816,7 @@ sbctl_dump_openflow(struct vconn *vconn, const struct uuid *uuid, bool stats) - - struct ofputil_flow_stats *fses; - size_t n_fses; -- int error = vconn_dump_flows(vconn, &fsr, OFPUTIL_P_OF13_OXM, -+ int error = vconn_dump_flows(vconn, &fsr, OFPUTIL_P_OF15_OXM, - &fses, &n_fses); - if (error) { - VLOG_WARN("%s: error obtaining flow stats (%s)", -diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c -index c9d72285c..d7251e7ed 100644 ---- a/utilities/ovn-trace.c -+++ b/utilities/ovn-trace.c -@@ -2326,7 +2326,7 @@ trace_openflow(const struct ovntrace_flow *f, struct ovs_list *super) - - struct ofputil_flow_stats *fses; - size_t n_fses; -- int error = vconn_dump_flows(vconn, &fsr, OFPUTIL_P_OF13_OXM, -+ int error = vconn_dump_flows(vconn, &fsr, OFPUTIL_P_OF15_OXM, - &fses, &n_fses); - if (error) { - ovntrace_node_append(super, OVNTRACE_NODE_ERROR, -@@ -2435,7 +2435,7 @@ trace(const char *dp_s, const char *flow_s) - ds_put_char(&output, '\n'); - - if (ovs) { -- int retval = vconn_open_block(ovs, 1 << OFP13_VERSION, 0, -1, &vconn); -+ int retval = vconn_open_block(ovs, 1 << OFP15_VERSION, 0, -1, &vconn); - if (retval) { - VLOG_WARN("%s: connection failed (%s)", ovs, ovs_strerror(retval)); - } --- -2.26.2 - diff --git a/SOURCES/0001-controller-use-LLA-IPv6-address-as-NS-source-address.patch b/SOURCES/0001-controller-use-LLA-IPv6-address-as-NS-source-address.patch deleted file mode 100644 index 091ee2d..0000000 --- a/SOURCES/0001-controller-use-LLA-IPv6-address-as-NS-source-address.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 7f60417ae6c7438565a21d5aee0bb8ae0b3a9b68 Mon Sep 17 00:00:00 2001 -Message-Id: <7f60417ae6c7438565a21d5aee0bb8ae0b3a9b68.1585835882.git.me@lorenzobianconi.net> -From: Lorenzo Bianconi -Date: Tue, 24 Mar 2020 20:33:27 +0100 -Subject: [PATCH] controller: use LLA IPv6 address as NS source address - -Use router LLA IPv6 address as IPv6 source address for Neighbor -Solicitation packets - -Fixes: c0bf32d72 ("Manage ARP process locally in a DVR scenario") -Change-Id: Iafa26f4b3c20e181bd5b54a357d468ce61b589b6 -Signed-off-by: Lorenzo Bianconi -Signed-off-by: Numan Siddique -Signed-off-by: Lorenzo Bianconi ---- - controller/pinctrl.c | 4 +++- - tests/ovn.at | 15 +++++++++------ - 2 files changed, 12 insertions(+), 7 deletions(-) - ---- a/controller/pinctrl.c -+++ b/controller/pinctrl.c -@@ -4563,9 +4563,11 @@ pinctrl_handle_nd_ns(struct rconn *swcon - - uint64_t packet_stub[128 / 8]; - struct dp_packet packet; -+ struct in6_addr ipv6_src; - dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); - -- compose_nd_ns(&packet, ip_flow->dl_src, &ip_flow->ipv6_src, -+ in6_generate_lla(ip_flow->dl_src, &ipv6_src); -+ compose_nd_ns(&packet, ip_flow->dl_src, &ipv6_src, - &ip_flow->ipv6_dst); - - /* Reload previous packet metadata and set actions from userdata. */ ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -11280,13 +11280,13 @@ options:rxq_pcap=${pcap_file}-rx.pcap - # This function sends ipv6 packet - test_ipv6() { - local inport=$1 src_mac=$2 dst_mac=$3 src_ip=$4 dst_ip=$5 -- local dst_mcast_mac=$6 mcast_node_ip=$7 nd_target=$8 -+ local dst_mcast_mac=$6 mcast_node_ip=$7 nd_target=$8 nd_src_ip=$9 - - local packet=${dst_mac}${src_mac}86dd6000000000083aff${src_ip}${dst_ip} - packet=${packet}8000000000000000 - - src_mac=000002010204 -- expected_packet=${dst_mcast_mac}${src_mac}86dd6000000000203aff${src_ip} -+ expected_packet=${dst_mcast_mac}${src_mac}86dd6000000000203aff${nd_src_ip} - expected_packet=${expected_packet}${mcast_node_ip}8700XXXX00000000 - expected_packet=${expected_packet}${nd_target}0101${src_mac} - -@@ -11298,6 +11298,7 @@ test_ipv6() { - src_mac=506400000002 - dst_mac=00000000af01 - src_ip=aef0000000000000526400fffe000002 -+nd_src_ip=fe80000000000000020002fffe010204 - dst_ip=20010db800010000020002fffe010205 - dst_mcast_mac=3333ff010205 - mcast_node_ip=ff0200000000000000000001ff010205 -@@ -11305,7 +11306,7 @@ nd_target=20010db800010000020002fffe0102 - # Send an IPv6 packet. Generated IPv6 Neighbor solicitation packet - # should be received by the ports attached to br-phys. - test_ipv6 1 $src_mac $dst_mac $src_ip $dst_ip $dst_mcast_mac \ --$mcast_node_ip $nd_target -+$mcast_node_ip $nd_target $nd_src_ip - - OVS_WAIT_WHILE([test 24 = $(wc -c hv1/br-phys_n1-tx.pcap | cut -d " " -f1)]) - OVS_WAIT_WHILE([test 24 = $(wc -c hv1/br-phys-tx.pcap | cut -d " " -f1)]) -@@ -11338,7 +11339,7 @@ dst_mcast_mac=3333ff011305 - mcast_node_ip=ff0200000000000000000001ff011305 - nd_target=20010db800010000020002fffe011305 - test_ipv6 1 $src_mac $dst_mac $src_ip $dst_ip $dst_mcast_mac \ --$mcast_node_ip $nd_target -+$mcast_node_ip $nd_target $nd_src_ip - - OVS_WAIT_WHILE([test 24 = $(wc -c hv1/br-phys_n1-tx.pcap | cut -d " " -f1)]) - OVS_WAIT_WHILE([test 24 = $(wc -c hv1/br-phys-tx.pcap | cut -d " " -f1)]) -@@ -14262,7 +14263,7 @@ send_na() { - get_nd() { - local eth_src=$1 src_ip=$2 dst_ip=$3 ta=$4 - local ip6_hdr=6000000000203aff${src_ip}${dst_ip} -- request=3333ff000010${eth_src}86dd${ip6_hdr}8700357600000000${ta}0101${eth_src} -+ request=3333ff000010${eth_src}86dd${ip6_hdr}870051f400000000${ta}0101${eth_src} - - echo $request - } -@@ -14325,6 +14326,8 @@ router_mac1=000002010203 - router_ip=$(ip_to_hex 172 16 1 1) - router_ip6=20020000000000000000000000000001 - -+nd_src_ip6=fe80000000000000020002fffe010203 -+ - dst_mac=001122334455 - dst_ip=$(ip_to_hex 172 16 1 10) - dst_ip6=20020000000000000000000000000010 -@@ -14342,7 +14345,7 @@ nd_ip=ff0200000000000000000001ff000010 - ip6_hdr=6000000000083afe${src_ip6}${dst_ip6} - - send_icmp6_packet 1 1 $src_mac $router_mac0 $src_ip6 $dst_ip6 --echo $(get_nd $router_mac1 $src_ip6 $nd_ip $dst_ip6) >> expected -+echo $(get_nd $router_mac1 $nd_src_ip6 $nd_ip $dst_ip6) >> expected - echo "${dst_mac}${router_mac1}86dd${ip6_hdr}8000dcb662f00001" >> expected - send_na 2 1 $dst_mac $router_mac1 $dst_ip6 $router_ip6 - diff --git a/SOURCES/0001-lex-Allow-unmasked-bits-in-value-mask-tokens.patch b/SOURCES/0001-lex-Allow-unmasked-bits-in-value-mask-tokens.patch new file mode 100644 index 0000000..e1d5078 --- /dev/null +++ b/SOURCES/0001-lex-Allow-unmasked-bits-in-value-mask-tokens.patch @@ -0,0 +1,95 @@ +From 714a097ba82ad53b90cfff921ea3749cd1130f3e Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Tue, 23 Jun 2020 10:17:50 +0200 +Subject: [PATCH] lex: Allow unmasked bits in value/mask tokens. + +It's quite restrictive to not accept ACLs/policies that match on a CIDR +that has non-zero host bits. Right now this generates a lexer error that +can only be detected in the logs. + +There's no real harm in automatically zero-ing the unmasked bits. + +Reported-at: https://bugzilla.redhat.com/1812820 +Reported-by: Ying Xu +Signed-off-by: Dumitru Ceara +Acked-by: Mark Michelson +Signed-off-by: Numan Siddique +(cherry picked from upstream commit 2104f67aacd62f62a31f4e23a6720aeeaa751154) + +Change-Id: I90c57fe51170d63fcd08d1a57d6d9555755a43be +--- + lib/lex.c | 10 ++-------- + tests/ovn.at | 11 +++++++---- + 2 files changed, 9 insertions(+), 12 deletions(-) + +diff --git a/lib/lex.c b/lib/lex.c +index 94f6c77..4d92199 100644 +--- a/lib/lex.c ++++ b/lib/lex.c +@@ -485,16 +485,10 @@ lex_parse_mask(const char *p, struct lex_token *token) + return p; + } + +- /* Check invariant that a 1-bit in the value corresponds to a 1-bit in the ++ /* Apply invariant that a 1-bit in the value corresponds to a 1-bit in the + * mask. */ + for (int i = 0; i < ARRAY_SIZE(token->mask.be32); i++) { +- ovs_be32 v = token->value.be32[i]; +- ovs_be32 m = token->mask.be32[i]; +- +- if (v & ~m) { +- lex_error(token, "Value contains unmasked 1-bits."); +- break; +- } ++ token->value.be32[i] &= token->mask.be32[i]; + } + + /* Done! */ +diff --git a/tests/ovn.at b/tests/ovn.at +index cf521af..e7e0439 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -79,7 +79,7 @@ a/b => a error("`/' is only valid as part of `//' or `/*'.") b + + 0/0 + 0/1 +-1/0 => error("Value contains unmasked 1-bits.") ++1/0 => 0/0 + 1/1 + 128/384 + 1/3 +@@ -99,7 +99,7 @@ a/b => a error("`/' is only valid as part of `//' or `/*'.") b + 0X => error("Hex digits expected following 0X.") + 0x0/0x0 => 0/0 + 0x0/0x1 => 0/0x1 +-0x1/0x0 => error("Value contains unmasked 1-bits.") ++0x1/0x0 => 0/0 + 0xffff/0x1ffff + 0x. => error("Invalid syntax in hexadecimal constant.") + +@@ -109,9 +109,12 @@ a/b => a error("`/' is only valid as part of `//' or `/*'.") b + 192.168.0.0/255.255.0.0 => 192.168.0.0/16 + 192.168.0.0/255.255.255.0 => 192.168.0.0/24 + 192.168.0.0/255.255.0.255 +-192.168.0.0/255.0.0.0 => error("Value contains unmasked 1-bits.") ++192.168.0.0/255.0.0.0 => 192.0.0.0/8 + 192.168.0.0/32 + 192.168.0.0/255.255.255.255 => 192.168.0.0/32 ++192.168.0.2/32 ++192.168.0.2/30 => 192.168.0.0/30 ++192.168.0.2/24 => 192.168.0.0/24 + 1.2.3.4:5 => 1.2.3.4 : 5 + + :: +@@ -135,7 +138,7 @@ FE:DC:ba:98:76:54 => fe:dc:ba:98:76:54 + 01:00:00:00:00:00/01:00:00:00:00:00 + ff:ff:ff:ff:ff:ff/ff:ff:ff:ff:ff:ff + fe:ff:ff:ff:ff:ff/ff:ff:ff:ff:ff:ff +-ff:ff:ff:ff:ff:ff/fe:ff:ff:ff:ff:ff => error("Value contains unmasked 1-bits.") ++ff:ff:ff:ff:ff:ff/fe:ff:ff:ff:ff:ff => fe:ff:ff:ff:ff:ff/fe:ff:ff:ff:ff:ff + fe:x => error("Invalid numeric constant.") + 00:01:02:03:04:x => error("Invalid numeric constant.") + +-- +1.8.3.1 + diff --git a/SOURCES/0001-northd-By-pass-IPv6-Router-Adv-and-Router-Solicitati.patch b/SOURCES/0001-northd-By-pass-IPv6-Router-Adv-and-Router-Solicitati.patch deleted file mode 100644 index 2352aab..0000000 --- a/SOURCES/0001-northd-By-pass-IPv6-Router-Adv-and-Router-Solicitati.patch +++ /dev/null @@ -1,65 +0,0 @@ -From d64f501d787571a50eb2e5380947d1d0a3e2ca74 Mon Sep 17 00:00:00 2001 -From: Numan Siddique -Date: Thu, 11 Jun 2020 18:44:41 +0530 -Subject: [PATCH] northd: By pass IPv6 Router Adv and Router Solicitation - packets from ACL stages. - -We already add below logical flows to by pass IPv6 Neighbor discovery packets -from in/out ACL stage. - -table=6 (ls_in_acl ), priority=65535, match=(nd), action=(next;) -table=4 (ls_out_acl ), priority=65535, match=(nd), action=(next;) - -This patch also adds nd_rs and nd_ra to these logical flows. Without these -the IPv6 Router Adv packets generated by ovn-controller are dropped if -CMS has configured ACLs. - -Reported-by: Jakub Libosvar -Signed-off-by: Numan Siddique -Acked-by: Mark Michelson - -(cherry-picked from upstream master commit 90e5971018277ab0f383a56f59ffcfe17466a2c6) - -Change-Id: I33fcb3032fe946f2b2333a8cf2791af75dceaf44 ---- - northd/ovn-northd.8.xml | 6 ++++++ - northd/ovn-northd.c | 6 ++++-- - 2 files changed, 10 insertions(+), 2 deletions(-) - -diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml -index dc56de273..081536ab4 100644 ---- a/northd/ovn-northd.8.xml -+++ b/northd/ovn-northd.8.xml -@@ -439,6 +439,12 @@ - ACL re-allow this connection. - - -+
  • -+ A priority-65535 flow that allows IPv6 Neighbor solicitation, -+ Neighbor discover, Router solicitation and Router advertisement -+ packets. -+
  • -+ -
  • - A priority 34000 logical flow is added for each logical switch datapath - with the match eth.dst = E to allow the service -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index cffe3de17..fc250318f 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -5390,8 +5390,10 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows, - /* Ingress and Egress ACL Table (Priority 65535). - * - * Not to do conntrack on ND packets. */ -- ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;"); -- ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;"); -+ ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, -+ "nd || nd_ra || nd_rs", "next;"); -+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, -+ "nd || nd_ra || nd_rs", "next;"); - } - - /* Ingress or Egress ACL Table (Various priorities). */ --- -2.26.2 - diff --git a/SOURCES/0001-northd-do-not-insert-identical-lflows-in-S_ROUTER_IN.patch b/SOURCES/0001-northd-do-not-insert-identical-lflows-in-S_ROUTER_IN.patch deleted file mode 100644 index d000815..0000000 --- a/SOURCES/0001-northd-do-not-insert-identical-lflows-in-S_ROUTER_IN.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 5f3e15c3d5809134d70892b4f65031e5bd110c8f Mon Sep 17 00:00:00 2001 -From: Lorenzo Bianconi -Date: Wed, 11 Mar 2020 17:41:59 +0100 -Subject: [PATCH 1/2] northd: do not insert identical lflows in - S_ROUTER_IN_ARP_RESOLVE - -Avoid to configure multiple identical logical flows in -S_ROUTER_IN_ARP_RESOLVE stage. This can happen adding L2 destination -address info about snat since multiple nat entries will use the same -external_ip - -Signed-off-by: Lorenzo Bianconi -Signed-off-by: Numan Siddique - -(cherry picked from upstream OVS branch20.03 commit 20aa8c3c5a1930805a32ec8121affa07b2ac7dff) - -Change-Id: Ic5c1df529363469092a55454fdfbcae31a06ccf5 ---- - northd/ovn-northd.c | 36 ++++++++++++++++++++++-------------- - 1 file changed, 22 insertions(+), 14 deletions(-) - -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index 787ca2f80..cdaeff401 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -8630,6 +8630,8 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - continue; - } - -+ struct sset nat_entries = SSET_INITIALIZER(&nat_entries); -+ - struct v46_ip snat_ip, lb_snat_ip; - const char *dnat_force_snat_ip = get_force_snat_ip(od, "dnat", - &snat_ip); -@@ -8855,20 +8857,24 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - &nat->header_); - } - -- ds_clear(&match); -- ds_put_format( -- &match, "outport == %s && %s == %s", -- od->l3dgw_port->json_key, -- is_v6 ? "xxreg0" : "reg0", nat->external_ip); -- ds_clear(&actions); -- ds_put_format( -- &actions, "eth.dst = %s; next;", -- distributed ? nat->external_mac : -- od->l3dgw_port->lrp_networks.ea_s); -- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ARP_RESOLVE, -- 100, ds_cstr(&match), -- ds_cstr(&actions), -- &nat->header_); -+ if (!sset_contains(&nat_entries, nat->external_ip)) { -+ ds_clear(&match); -+ ds_put_format( -+ &match, "outport == %s && %s == %s", -+ od->l3dgw_port->json_key, -+ is_v6 ? "xxreg0" : "reg0", nat->external_ip); -+ ds_clear(&actions); -+ ds_put_format( -+ &actions, "eth.dst = %s; next;", -+ distributed ? nat->external_mac : -+ od->l3dgw_port->lrp_networks.ea_s); -+ ovn_lflow_add_with_hint(lflows, od, -+ S_ROUTER_IN_ARP_RESOLVE, -+ 100, ds_cstr(&match), -+ ds_cstr(&actions), -+ &nat->header_); -+ sset_add(&nat_entries, nat->external_ip); -+ } - } - - /* Egress UNDNAT table: It is for already established connections' -@@ -9049,6 +9055,8 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - } - } - -+ sset_destroy(&nat_entries); -+ - /* Handle force SNAT options set in the gateway router. */ - if (dnat_force_snat_ip && !od->l3dgw_port) { - /* If a packet with destination IP address as that of the --- -2.25.1 - diff --git a/SOURCES/0001-northd-fix-empty_lb_backends-controller_event-for-IP.patch b/SOURCES/0001-northd-fix-empty_lb_backends-controller_event-for-IP.patch new file mode 100644 index 0000000..51d2497 --- /dev/null +++ b/SOURCES/0001-northd-fix-empty_lb_backends-controller_event-for-IP.patch @@ -0,0 +1,115 @@ +From 940fff5c75ccc1ef0d66f37ce167f89f8ae6d098 Mon Sep 17 00:00:00 2001 +Message-Id: <940fff5c75ccc1ef0d66f37ce167f89f8ae6d098.1599499391.git.lorenzo.bianconi@redhat.com> +From: Lorenzo Bianconi +Date: Fri, 4 Sep 2020 15:44:49 +0200 +Subject: [PATCH] northd: fix empty_lb_backends controller_event for IPv6 + +Introduce missing square brackets defining IPv6 empty_lb_backends +controller_event logical flows in order to properly extract vip ip +and port from the related string + +Fixes: bb9f2b9ce56c ("ovn-northd: Consider load balancer active backends in router pipeline") +Fixes: 821e1e54abcb ("OVN: use trigger_event action to report 'empty_lb_rule' events") +Acked-by: Dumitru Ceara +Signed-off-by: Lorenzo Bianconi +Signed-off-by: Numan Siddique +--- + northd/ovn-northd.c | 7 ++++--- + tests/ovn.at | 30 +++++++++++++++++++++++++++--- + 2 files changed, 31 insertions(+), 6 deletions(-) + +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -4995,6 +4995,7 @@ build_empty_lb_event_flow(struct ovn_dat + return; + } + ++ bool ipv4 = (lb_vip->addr_family == AF_INET); + struct ds match = DS_EMPTY_INITIALIZER; + char *meter = "", *action; + +@@ -5003,14 +5004,14 @@ build_empty_lb_event_flow(struct ovn_dat + } + + ds_put_format(&match, "ip%s.dst == %s && %s", +- lb_vip->addr_family == AF_INET ? "4": "6", +- lb_vip->vip, lb->protocol); ++ ipv4 ? "4": "6", lb_vip->vip, lb->protocol); + + char *vip = lb_vip->vip; + if (lb_vip->vip_port) { + ds_put_format(&match, " && %s.dst == %u", lb->protocol, + lb_vip->vip_port); +- vip = xasprintf("%s:%u", lb_vip->vip, lb_vip->vip_port); ++ vip = xasprintf("%s%s%s:%u", ipv4 ? "" : "[", lb_vip->vip, ++ ipv4 ? "" : "]", lb_vip->vip_port); + } + + action = xasprintf("trigger_event(event = \"%s\", " +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -16404,7 +16404,7 @@ ovn-nbctl create Logical_Router name=lr0 + for i in 0 1; do + idx=$((i+1)) + ovn-nbctl ls-add sw$i +- ovn-nbctl lrp-add lr0 lrp$i 00:00:00:00:ff:0$idx 192.168.$idx.254/24 ++ ovn-nbctl lrp-add lr0 lrp$i 00:00:00:00:ff:0$idx 192.168.$idx.254/24 200$idx::a/64 + ovn-nbctl \ + -- lsp-add sw$i lrp$i-attachment \ + -- set Logical_Switch_Port lrp$i-attachment type=router \ +@@ -16420,7 +16420,7 @@ for i in 1 2; do + + for j in 1 2; do + ovn-nbctl lsp-add sw0 sw0-p$i$j -- \ +- lsp-set-addresses sw0-p$i$j "00:00:00:00:00:$i$j 192.168.1.$i$j" ++ lsp-set-addresses sw0-p$i$j "00:00:00:00:00:$i$j 192.168.1.$i$j 2001::$i$j" + + ovs-vsctl -- add-port br-int vif$i$j -- \ + set interface vif$i$j \ +@@ -16433,7 +16433,7 @@ done + + as hv1 + ovn-nbctl lsp-add sw1 sw1-p0 \ +- -- lsp-set-addresses sw1-p0 "00:00:00:00:00:33 192.168.2.11" ++ -- lsp-set-addresses sw1-p0 "00:00:00:00:00:33 192.168.2.11 2002::1" + ovs-vsctl -- add-port br-int vif33 -- \ + set interface vif33 \ + external-ids:iface-id=sw1-p0 \ +@@ -16449,6 +16449,11 @@ uuid_lb0=$(ovn-nbctl --bare --columns=_u + ovn-nbctl lb-add lb1 192.168.2.100:80 "" + ovn-nbctl lr-lb-add lr0 lb1 + uuid_lb1=$(ovn-nbctl --bare --columns=_uuid find load_balancer name=lb1) ++ ++ovn-nbctl lb-add lb2 [[2001::10]]:50051 "" ++ovn-nbctl ls-lb-add sw0 lb2 ++uuid_lb2=$(ovn-nbctl --bare --columns=_uuid find load_balancer name=lb2) ++ + ovn-nbctl --wait=hv meter-add event-elb drop 100 pktps 10 + + OVN_POPULATE_ARP +@@ -16493,6 +16498,25 @@ empty_lb_backends + AT_CHECK([ovn-sbctl get controller_event $uuid event_info:vip], [0], [dnl + "192.168.2.100:80" + ]) ++ovn-sbctl destroy controller_event $uuid ++ ++packet2="inport==\"sw0-p11\" && eth.src==00:00:00:00:00:11 && eth.dst==00:00:00:00:00:21 && ++ ip6 && ip.ttl==64 && ip6.src==2001::11 && ip6.dst==2001::10 && ++ tcp && tcp.src==10000 && tcp.dst==50051" ++ ++as hv1 ovs-appctl -t ovn-controller inject-pkt "$packet2" ++ovn-sbctl list controller_event ++uuid=$(ovn-sbctl list controller_event | awk '/_uuid/{print $3}') ++ ++AT_CHECK([ovn-sbctl get controller_event $uuid event_type], [0], [dnl ++empty_lb_backends ++]) ++AT_CHECK([ovn-sbctl get controller_event $uuid event_info:vip], [0], [dnl ++"[[2001::10]]:50051" ++]) ++AT_CHECK_UNQUOTED([ovn-sbctl get controller_event $uuid event_info:load_balancer], [0], [dnl ++"$uuid_lb2" ++]) + + OVN_CLEANUP([hv1], [hv2]) + AT_CLEANUP diff --git a/SOURCES/0001-ofctrl-Split-large-group_mod-messages-up.patch b/SOURCES/0001-ofctrl-Split-large-group_mod-messages-up.patch deleted file mode 100644 index 6f7f2ec..0000000 --- a/SOURCES/0001-ofctrl-Split-large-group_mod-messages-up.patch +++ /dev/null @@ -1,161 +0,0 @@ -From 88056d15bffe67c033322de16c01a013e7bc7c7c Mon Sep 17 00:00:00 2001 -From: Mark Michelson -Date: Wed, 6 May 2020 09:49:55 -0400 -Subject: [PATCH] ofctrl: Split large group_mod messages up. - -Group mod messages have the possibility of growing very large if OVN -installs a load balancer with a great many backends. The current -approach is to send a single ADD message with the entire group contents. -If the size of this message exceeds UINT16_MAX, then OpenFlow cannot -properly express the length of the message since the OpenFlow header's -length is limited to 16 bits. - -This patch solves the problem by breaking the message into pieces. The -first piece is an ADD, and subsequent messages are INSERT_BUCKET -messages. This way, we end up being able to express the entire size of -the group through multiple OpenFlow messages. - -Signed-off-by: Mark Michelson -Acked-by: Numan Siddique ---- - controller/ofctrl.c | 70 ++++++++++++++++++++++++++++++++++++++++++--- - tests/ovn.at | 29 +++++++++++++++++++ - 2 files changed, 95 insertions(+), 4 deletions(-) - -diff --git a/controller/ofctrl.c b/controller/ofctrl.c -index 4b51cd86e..073e076c7 100644 ---- a/controller/ofctrl.c -+++ b/controller/ofctrl.c -@@ -930,10 +930,72 @@ encode_group_mod(const struct ofputil_group_mod *gm) - } - - static void --add_group_mod(const struct ofputil_group_mod *gm, struct ovs_list *msgs) -+add_group_mod(struct ofputil_group_mod *gm, struct ovs_list *msgs) - { - struct ofpbuf *msg = encode_group_mod(gm); -- ovs_list_push_back(msgs, &msg->list_node); -+ if (msg->size <= UINT16_MAX) { -+ ovs_list_push_back(msgs, &msg->list_node); -+ return; -+ } -+ /* This group mod request is too large to fit in a single OF message -+ * since the header can only specify a 16-bit size. We need to break -+ * this into multiple group_mod requests. -+ */ -+ -+ /* Pull the first bucket. All buckets are approximately the same length -+ * since they contain near-identical actions. Using its length can give -+ * us a good approximation of how many buckets we can fit in a single -+ * OF message. -+ */ -+ ofpraw_pull_assert(msg); -+ struct ofp15_group_mod *ogm = ofpbuf_pull(msg, sizeof(*ogm)); -+ struct ofp15_bucket *of_bucket = ofpbuf_pull(msg, sizeof(*of_bucket)); -+ uint16_t bucket_size = ntohs(of_bucket->len); -+ -+ ofpbuf_delete(msg); -+ -+ /* Dividing by 2 here ensures that just in case there are variations in -+ * the size of the buckets, we will not put too many in our new group_mod -+ * message. -+ */ -+ size_t max_buckets = ((UINT16_MAX - sizeof *ogm) / bucket_size) / 2; -+ -+ ovs_assert(max_buckets < ovs_list_size(&gm->buckets)); -+ -+ uint16_t command = OFPGC15_INSERT_BUCKET; -+ if (gm->command == OFPGC15_DELETE || -+ gm->command == OFPGC15_REMOVE_BUCKET) { -+ command = OFPGC15_REMOVE_BUCKET; -+ } -+ struct ofputil_group_mod split = { -+ .command = command, -+ .type = gm->type, -+ .group_id = gm->group_id, -+ .command_bucket_id = OFPG15_BUCKET_LAST, -+ }; -+ ovs_list_init(&split.buckets); -+ -+ size_t i = 0; -+ struct ofputil_bucket *bucket; -+ LIST_FOR_EACH (bucket, list_node, &gm->buckets) { -+ if (i++ < max_buckets) { -+ continue; -+ } -+ break; -+ } -+ -+ ovs_list_splice(&split.buckets, &bucket->list_node, &gm->buckets); -+ -+ struct ofpbuf *orig = encode_group_mod(gm); -+ ovs_list_push_back(msgs, &orig->list_node); -+ -+ /* We call this recursively just in case our new -+ * INSERT_BUCKET/REMOVE_BUCKET group_mod is still too -+ * large for an OF message. This will allow for it to -+ * be broken into pieces, too. -+ */ -+ add_group_mod(&split, msgs); -+ ofputil_uninit_group_mod(&split); - } - - -@@ -1124,7 +1186,7 @@ ofctrl_put(struct ovn_desired_flow_table *flow_table, - char *group_string = xasprintf("group_id=%"PRIu32",%s", - desired->table_id, - desired->name); -- char *error = parse_ofp_group_mod_str(&gm, OFPGC11_ADD, group_string, -+ char *error = parse_ofp_group_mod_str(&gm, OFPGC15_ADD, group_string, - NULL, NULL, &usable_protocols); - if (!error) { - add_group_mod(&gm, &msgs); -@@ -1243,7 +1305,7 @@ ofctrl_put(struct ovn_desired_flow_table *flow_table, - enum ofputil_protocol usable_protocols; - char *group_string = xasprintf("group_id=%"PRIu32"", - installed->table_id); -- char *error = parse_ofp_group_mod_str(&gm, OFPGC11_DELETE, -+ char *error = parse_ofp_group_mod_str(&gm, OFPGC15_DELETE, - group_string, NULL, NULL, - &usable_protocols); - if (!error) { -diff --git a/tests/ovn.at b/tests/ovn.at -index 52d994972..f39fda2e4 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -19179,3 +19179,32 @@ OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected]) - - OVN_CLEANUP([hv1]) - AT_CLEANUP -+ -+AT_SETUP([ovn -- Big Load Balancer]) -+ovn_start -+ -+ovn-nbctl ls-add ls1 -+ovn-nbctl lsp-add ls1 lsp1 -+ -+net_add n1 -+sim_add hv1 -+ -+as hv1 -+ovs-vsctl add-br br-phys -+ovn_attach n1 br-phys 192.168.0.1 -+ovs-vsctl add-port br-int p1 -- set Interface p1 external-ids:iface-id=lsp1 -+ -+IPS=192.169.0.1:80 -+for i in `seq 1 9` ; do -+ for j in `seq 1 254` ; do -+ IPS=${IPS},192.169.$i.$j:80 -+ done -+done -+ -+ovn-nbctl lb-add lb0 172.172.0.1:8080 "${IPS}" -+ovn-nbctl --wait=hv ls-lb-add ls1 lb0 -+ -+AT_CHECK([test 2287 = `ovs-ofctl dump-group-stats br-int | grep -o bucket | wc -l`]) -+ -+OVN_CLEANUP([hv1]) -+AT_CLEANUP --- -2.25.4 - diff --git a/SOURCES/0001-ovn-controller-Fix-incremental-processing-of-Port_Bi.patch b/SOURCES/0001-ovn-controller-Fix-incremental-processing-of-Port_Bi.patch new file mode 100644 index 0000000..e525ef5 --- /dev/null +++ b/SOURCES/0001-ovn-controller-Fix-incremental-processing-of-Port_Bi.patch @@ -0,0 +1,207 @@ +From 499546979fcf98c8423fb18263261005f747b228 Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Mon, 31 Aug 2020 14:14:52 +0200 +Subject: [PATCH] ovn-controller: Fix incremental processing of Port_Binding + deletes. + +If a Port_Binding is deleted from the Southbound DB and the +corresponding OVS interface is also deleted from the OVS DB, and +if both updates are received and processed by ovn-controller in +the same iteration, ovn-controller should process port_binding +delete operations first. + +This commit also adds three new unixctl debug commands for +ovn-controller: +- debug/pause: pause ovn-controller processing, except unixctl commands. +- debug/resume: resume ovn-controller processing. +- debug/status: return the status of the ovn-controller processing. + +These new commands are needed by the test for this scenario as without +them we have no way of ensuring predictable results. Users should not +use these commands in production. This is also why the commands are not +documented. + +CC: Numan Siddique +Fixes: 6b0f01116bab ("ovn-controller: Handle runtime data changes in flow output engine") +Reported-by: Tim Rozet +Reported-at: https://bugzilla.redhat.com/1871961 +Signed-off-by: Dumitru Ceara +Signed-off-by: Numan Siddique +(cherry picked from upstream commit d2de07a627721ef443d8929fe04eb888c5086e98) + +Change-Id: I889235d31cf9b7fd6a9dde6e0903038c87106b13 +--- + controller/ovn-controller.c | 71 ++++++++++++++++++++++++++++++++++++++++++--- + tests/ovn.at | 38 ++++++++++++++++++++++++ + 2 files changed, 105 insertions(+), 4 deletions(-) + +diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c +index 933acf6..28ca7a8 100644 +--- a/controller/ovn-controller.c ++++ b/controller/ovn-controller.c +@@ -73,6 +73,9 @@ static unixctl_cb_func extend_table_list; + static unixctl_cb_func inject_pkt; + static unixctl_cb_func engine_recompute_cmd; + static unixctl_cb_func cluster_state_reset_cmd; ++static unixctl_cb_func debug_pause_execution; ++static unixctl_cb_func debug_resume_execution; ++static unixctl_cb_func debug_status_execution; + + #define DEFAULT_BRIDGE_NAME "br-int" + #define DEFAULT_PROBE_INTERVAL_MSEC 5000 +@@ -2253,10 +2256,6 @@ main(int argc, char *argv[]) + + engine_add_input(&en_runtime_data, &en_ovs_open_vswitch, NULL); + engine_add_input(&en_runtime_data, &en_ovs_bridge, NULL); +- engine_add_input(&en_runtime_data, &en_ovs_port, +- engine_noop_handler); +- engine_add_input(&en_runtime_data, &en_ovs_interface, +- runtime_data_ovs_interface_handler); + engine_add_input(&en_runtime_data, &en_ovs_qos, NULL); + + engine_add_input(&en_runtime_data, &en_sb_chassis, NULL); +@@ -2265,6 +2264,15 @@ main(int argc, char *argv[]) + engine_add_input(&en_runtime_data, &en_sb_port_binding, + runtime_data_sb_port_binding_handler); + ++ /* The OVS interface handler for runtime_data changes MUST be executed ++ * after the sb_port_binding_handler as port_binding deletes must be ++ * processed first. ++ */ ++ engine_add_input(&en_runtime_data, &en_ovs_port, ++ engine_noop_handler); ++ engine_add_input(&en_runtime_data, &en_ovs_interface, ++ runtime_data_ovs_interface_handler); ++ + struct engine_arg engine_arg = { + .sb_idl = ovnsb_idl_loop.idl, + .ovs_idl = ovs_idl_loop.idl, +@@ -2319,6 +2327,14 @@ main(int argc, char *argv[]) + cluster_state_reset_cmd, + &reset_ovnsb_idl_min_index); + ++ bool paused = false; ++ unixctl_command_register("debug/pause", "", 0, 0, debug_pause_execution, ++ &paused); ++ unixctl_command_register("debug/resume", "", 0, 0, debug_resume_execution, ++ &paused); ++ unixctl_command_register("debug/status", "", 0, 0, debug_status_execution, ++ &paused); ++ + unsigned int ovs_cond_seqno = UINT_MAX; + unsigned int ovnsb_cond_seqno = UINT_MAX; + +@@ -2327,6 +2343,15 @@ main(int argc, char *argv[]) + restart = false; + bool sb_monitor_all = false; + while (!exiting) { ++ /* If we're paused just run the unixctl server and skip most of the ++ * processing loop. ++ */ ++ if (paused) { ++ unixctl_server_run(unixctl); ++ unixctl_server_wait(unixctl); ++ goto loop_done; ++ } ++ + engine_init_run(); + + struct ovsdb_idl_txn *ovs_idl_txn = ovsdb_idl_loop_run(&ovs_idl_loop); +@@ -2581,6 +2606,8 @@ main(int argc, char *argv[]) + + ovsdb_idl_track_clear(ovnsb_idl_loop.idl); + ovsdb_idl_track_clear(ovs_idl_loop.idl); ++ ++loop_done: + poll_block(); + if (should_service_stop()) { + exiting = true; +@@ -2834,3 +2861,39 @@ cluster_state_reset_cmd(struct unixctl_conn *conn, int argc OVS_UNUSED, + poll_immediate_wake(); + unixctl_command_reply(conn, NULL); + } ++ ++static void ++debug_pause_execution(struct unixctl_conn *conn, int argc OVS_UNUSED, ++ const char *argv[] OVS_UNUSED, void *paused_) ++{ ++ bool *paused = paused_; ++ ++ VLOG_INFO("User triggered execution pause."); ++ *paused = true; ++ unixctl_command_reply(conn, NULL); ++} ++ ++static void ++debug_resume_execution(struct unixctl_conn *conn, int argc OVS_UNUSED, ++ const char *argv[] OVS_UNUSED, void *paused_) ++{ ++ bool *paused = paused_; ++ ++ VLOG_INFO("User triggered execution resume."); ++ *paused = false; ++ poll_immediate_wake(); ++ unixctl_command_reply(conn, NULL); ++} ++ ++static void ++debug_status_execution(struct unixctl_conn *conn, int argc OVS_UNUSED, ++ const char *argv[] OVS_UNUSED, void *paused_) ++{ ++ bool *paused = paused_; ++ ++ if (*paused) { ++ unixctl_command_reply(conn, "paused"); ++ } else { ++ unixctl_command_reply(conn, "running"); ++ } ++} +diff --git a/tests/ovn.at b/tests/ovn.at +index 1216bc5..cf521af 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -21361,3 +21361,41 @@ AT_CHECK([ovn-sbctl find mac ip=10.0.0.2 mac='"00:00:00:00:03:02"' logical_port= + OVN_CLEANUP([hv1],[hv2]) + + AT_CLEANUP ++ ++AT_SETUP([ovn -- Delete Port_Binding and OVS port Incremental Processing]) ++ovn_start ++ ++net_add n1 ++sim_add hv1 ++as hv1 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.10 ++ ++ovn-nbctl ls-add ls ++ovn-nbctl lsp-add ls lsp ++ ++as hv1 ovs-vsctl \ ++ -- add-port br-int vif1 \ ++ -- set Interface vif1 external_ids:iface-id=lsp ++ ++# Wait for port to be bound. ++OVS_WAIT_UNTIL([test $(ovn-sbctl --columns _uuid --bare list chassis hv1 | wc -l) -eq 1]) ++ch=$(ovn-sbctl --columns _uuid --bare list chassis hv1) ++OVS_WAIT_UNTIL([test $(ovn-sbctl --columns chassis --bare list port_binding lsp | grep $ch -c) -eq 1]) ++ ++# Pause ovn-controller. ++as hv1 ovn-appctl -t ovn-controller debug/pause ++ ++# Delete port binding and OVS port. The updates will be processed in the same ++# loop in ovn-controller when it resumes. ++ovn-nbctl --wait=sb lsp-del lsp ++as hv1 ovs-vsctl del-port vif1 ++ ++# Resume ovn-controller. ++as hv1 ovn-appctl -t ovn-controller debug/resume ++ ++# Make sure ovn-controller runs fine. ++OVS_WAIT_UNTIL([test x$(as hv1 ovn-appctl -t ovn-controller debug/status) = "xrunning"]) ++ ++OVN_CLEANUP([hv1]) ++AT_CLEANUP +-- +1.8.3.1 + diff --git a/SOURCES/0001-ovn-controller-Fix-potential-segfault-with-virtual-p.patch b/SOURCES/0001-ovn-controller-Fix-potential-segfault-with-virtual-p.patch deleted file mode 100644 index 34b4c29..0000000 --- a/SOURCES/0001-ovn-controller-Fix-potential-segfault-with-virtual-p.patch +++ /dev/null @@ -1,104 +0,0 @@ -From 08dfddbe4b1559dd91747cee435eb8945555b348 Mon Sep 17 00:00:00 2001 -From: Dumitru Ceara -Date: Tue, 31 Mar 2020 13:47:04 +0200 -Subject: [PATCH] ovn-controller: Fix potential segfault with "virtual" port - bindings. - -Even though ovn-controller tries to set port_binding->chassis to NULL -every time port_binding->virtual_parent is set to NULL for bindings of -type="virtual", there's no way to enforce that an operator doesn't -manually clear the "virtual_parent" column in the Southbound database. - -In such scenario ovn-controller would crash because of trying to -dereference the NULL port_binding->virtual_parent column. - -Add an extra check and release "virtual" port bindings that have -"virtual_parent" NULL. - -Reported-at: https://bugzilla.redhat.com/1818844 -CC: Numan Siddique -Fixes: 054f4c85c413 ("Add a new logical switch port type - 'virtual'") -Signed-off-by: Dumitru Ceara -Signed-off-by: Numan Siddique -(cherry picked from upstream commit 5b3e9879be2b6c9b07ed5c9e073f1c24080a49f7) - -Change-Id: I10c2a8dd3731b34f606c4fa2db42711c81f431cc ---- - controller/binding.c | 26 +++++++++++++++----------- - tests/ovn.at | 18 ++++++++++++++++++ - 2 files changed, 33 insertions(+), 11 deletions(-) - -diff --git a/controller/binding.c b/controller/binding.c -index c3376e2..5ea12a8 100644 ---- a/controller/binding.c -+++ b/controller/binding.c -@@ -625,22 +625,26 @@ consider_local_virtual_port(struct ovsdb_idl_index *sbrec_port_binding_by_name, - const struct sbrec_chassis *chassis_rec, - const struct sbrec_port_binding *binding_rec) - { -+ if (binding_rec->virtual_parent) { -+ const struct sbrec_port_binding *parent = -+ lport_lookup_by_name(sbrec_port_binding_by_name, -+ binding_rec->virtual_parent); -+ if (parent && parent->chassis == chassis_rec) { -+ return; -+ } -+ } -+ - /* pinctrl module takes care of binding the ports of type 'virtual'. - * Release such ports if their virtual parents are no longer claimed by - * this chassis. - */ -- const struct sbrec_port_binding *parent = -- lport_lookup_by_name(sbrec_port_binding_by_name, -- binding_rec->virtual_parent); -- if (!parent || parent->chassis != chassis_rec) { -- VLOG_INFO("Releasing lport %s from this chassis.", -- binding_rec->logical_port); -- if (binding_rec->encap) { -- sbrec_port_binding_set_encap(binding_rec, NULL); -- } -- sbrec_port_binding_set_chassis(binding_rec, NULL); -- sbrec_port_binding_set_virtual_parent(binding_rec, NULL); -+ VLOG_INFO("Releasing lport %s from this chassis.", -+ binding_rec->logical_port); -+ if (binding_rec->encap) { -+ sbrec_port_binding_set_encap(binding_rec, NULL); - } -+ sbrec_port_binding_set_chassis(binding_rec, NULL); -+ sbrec_port_binding_set_virtual_parent(binding_rec, NULL); - } - - static void -diff --git a/tests/ovn.at b/tests/ovn.at -index 9a44f0a..1402fae 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -15007,6 +15007,24 @@ AT_CHECK([cat lflows.txt], [0], [dnl - table=12(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:03; next;) - ]) - -+# Forcibly clear virtual_parent. ovn-controller should release the binding -+# gracefully. -+pb_uuid=$(ovn-sbctl --bare --columns _uuid find port_binding logical_port=sw0-vir) -+ovn-sbctl clear port_binding $pb_uuid virtual_parent -+ -+OVS_WAIT_UNTIL([test x$(ovn-sbctl --bare --columns chassis find port_binding \ -+logical_port=sw0-vir) = x]) -+ -+# From sw0-p0 resend GARP for 10.0.0.10. hv1 should reclaim sw0-vir -+# and sw0-p1 should be its virtual_parent. -+send_garp 1 1 $eth_src $eth_dst $spa $tpa -+ -+OVS_WAIT_UNTIL([test x$(ovn-sbctl --bare --columns chassis find port_binding \ -+logical_port=sw0-vir) = x$hv1_ch_uuid], [0], []) -+ -+AT_CHECK([test x$(ovn-sbctl --bare --columns virtual_parent find port_binding \ -+logical_port=sw0-vir) = xsw0-p1]) -+ - # From sw0-p3 send GARP for 10.0.0.10. hv1 should claim sw0-vir - # and sw0-p3 should be its virtual_parent. - eth_src=505400000005 --- -1.8.3.1 - diff --git a/SOURCES/0001-ovn-controller-Skip-vport-bindings-done-through-OVS-.patch b/SOURCES/0001-ovn-controller-Skip-vport-bindings-done-through-OVS-.patch deleted file mode 100644 index 85088c5..0000000 --- a/SOURCES/0001-ovn-controller-Skip-vport-bindings-done-through-OVS-.patch +++ /dev/null @@ -1,90 +0,0 @@ -From a8acc52e37d5a74487b0a787bf8a519debc3a031 Mon Sep 17 00:00:00 2001 -From: Dumitru Ceara -Date: Thu, 2 Apr 2020 10:35:32 +0200 -Subject: [PATCH] ovn-controller: Skip vport bindings done through OVS - external_ids:iface-id. - -Port bindings of type "virtual" should not have an associated OVS port -in the integration bridge. If this is the case, it's a misconfig and -ovn-controller should ignore it. - -If such a situation is detected, ovn-controller will also log a warning -message to inform the user about the wrong configuration. - -Reported-at: https://bugzilla.redhat.com/1818844 -CC: Numan Siddique -Fixes: 054f4c85c413 ("Add a new logical switch port type - 'virtual'") -Signed-off-by: Dumitru Ceara -Signed-off-by: Numan Siddique -(cherry picked from upstream commit 523b1f5f45682bd6dd454281a97a09c3f429c457) - -Change-Id: Ie35818921a6e67c637feaea3be41c59880bb1b96 ---- - controller/binding.c | 12 ++++++++++++ - tests/ovn.at | 20 ++++++++++++++++++++ - 2 files changed, 32 insertions(+) - -diff --git a/controller/binding.c b/controller/binding.c -index 5ea12a8..20a89d0 100644 ---- a/controller/binding.c -+++ b/controller/binding.c -@@ -447,6 +447,18 @@ is_our_chassis(const struct sbrec_chassis *chassis_rec, - const struct ovsrec_interface *iface_rec - = shash_find_data(lport_to_iface, binding_rec->logical_port); - -+ /* Ports of type "virtual" should never be explicitly bound to an OVS -+ * port in the integration bridge. If that's the case, ignore the binding -+ * and log a warning. -+ */ -+ if (iface_rec && !strcmp(binding_rec->type, "virtual")) { -+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); -+ VLOG_WARN_RL(&rl, -+ "Virtual port %s should not be bound to OVS port %s", -+ binding_rec->logical_port, iface_rec->name); -+ return false; -+ } -+ - bool our_chassis = false; - if (iface_rec - || (binding_rec->parent_port && binding_rec->parent_port[0] && -diff --git a/tests/ovn.at b/tests/ovn.at -index 0135838..e8554f6 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -14894,6 +14894,11 @@ ovs-vsctl -- add-port br-int hv1-vif2 -- \ - options:tx_pcap=hv1/vif2-tx.pcap \ - options:rxq_pcap=hv1/vif2-rx.pcap \ - ofport-request=2 -+ovs-vsctl -- add-port br-int hv1-vif3 -- \ -+ set interface hv1-vif3 \ -+ options:tx_pcap=hv1/vif3-tx.pcap \ -+ options:rxq_pcap=hv1/vif3-rx.pcap \ -+ ofport-request=3 - - sim_add hv2 - as hv2 -@@ -14987,6 +14992,21 @@ logical_port=sw0-vir) = x], [0], []) - AT_CHECK([test x$(ovn-sbctl --bare --columns virtual_parent find port_binding \ - logical_port=sw0-vir) = x]) - -+# Try to bind sw0-vir directly to an OVS port. This should be ignored by -+# ovn-controller. -+as hv1 -+ovs-vsctl set interface hv1-vif3 external-ids:iface-id=sw0-vir -+ -+AT_CHECK([test x$(ovn-sbctl --bare --columns chassis find port_binding \ -+logical_port=sw0-vir) = x], [0], []) -+ -+# Cleanup hv1-vif3. -+as hv1 -+ovs-vsctl del-port hv1-vif3 -+ -+AT_CHECK([test x$(ovn-sbctl --bare --columns chassis find port_binding \ -+logical_port=sw0-vir) = x], [0], []) -+ - # From sw0-p0 send GARP for 10.0.0.10. hv1 should claim sw0-vir - # and sw0-p1 should be its virtual_parent. - eth_src=505400000003 --- -1.8.3.1 - diff --git a/SOURCES/0001-ovn-ctl-Provide-the-option-to-configure-inactive-pro.patch b/SOURCES/0001-ovn-ctl-Provide-the-option-to-configure-inactive-pro.patch deleted file mode 100644 index 8f7266e..0000000 --- a/SOURCES/0001-ovn-ctl-Provide-the-option-to-configure-inactive-pro.patch +++ /dev/null @@ -1,182 +0,0 @@ -From 1a34ed1dee90ad3ae82d91725bf8f5e86cf007c6 Mon Sep 17 00:00:00 2001 -From: Numan Siddique -Date: Mon, 17 Feb 2020 11:23:45 +0530 -Subject: [PATCH] ovn-ctl: Provide the option to configure inactive probe from - standby to active. - -Recently ovsdb-server supported an unixctl command - -ovsdb-server/set-active-ovsdb-server-probe-interval to configure inactive probe -interval from standby connection to the active. This patch provides the -option to configure this from ovn-ctl and the pacemaker OVN OCF script. - -Signed-off-by: Numan Siddique -Acked-by: Han Zhou ---- - utilities/ovn-ctl | 14 +++++++++++--- - utilities/ovn-ctl.8.xml | 8 ++++++-- - utilities/ovndb-servers.ocf | 23 +++++++++++++++++++---- - 3 files changed, 36 insertions(+), 9 deletions(-) - -diff --git a/utilities/ovn-ctl b/utilities/ovn-ctl -index c7cb42bc1..2a337ae27 100755 ---- a/utilities/ovn-ctl -+++ b/utilities/ovn-ctl -@@ -82,7 +82,8 @@ demote_xx_ovsdb () { - local sync_from_proto=$2 - local sync_from_port=$3 - local active_conf_file=$4 -- local ctl_file=$5 -+ local inactive_probe_to_active=$5 -+ local ctl_file=$6 - - if test ! -z "$sync_from_addr"; then - echo "$sync_from_proto:$sync_from_addr:$sync_from_port" > $active_conf_file -@@ -91,6 +92,7 @@ demote_xx_ovsdb () { - if test -e $active_conf_file; then - ovn-appctl -t $OVN_RUNDIR/$ctl_file ovsdb-server/set-active-ovsdb-server `cat $active_conf_file` - ovn-appctl -t $OVN_RUNDIR/$ctl_file ovsdb-server/connect-active-ovsdb-server -+ ovn-appctl -t $OVN_RUNDIR/$ctl_file ovsdb-server/set-active-ovsdb-server-probe-interval $inactive_probe_to_active - else - echo >&2 "$0: active server details not set" - exit 1 -@@ -99,12 +101,14 @@ demote_xx_ovsdb () { - - demote_ovnnb() { - demote_xx_ovsdb $DB_NB_SYNC_FROM_ADDR $DB_NB_SYNC_FROM_PROTO \ -- $DB_NB_SYNC_FROM_PORT $ovnnb_active_conf_file ovnnb_db.ctl -+ $DB_NB_SYNC_FROM_PORT $ovnnb_active_conf_file \ -+ $DB_NB_PROBE_INTERVAL_TO_ACTIVE ovnnb_db.ctl - } - - demote_ovnsb() { - demote_xx_ovsdb $DB_SB_SYNC_FROM_ADDR $DB_SB_SYNC_FROM_PROTO \ -- $DB_SB_SYNC_FROM_PORT $ovnsb_active_conf_file ovnsb_db.ctl -+ $DB_SB_SYNC_FROM_PORT $ovnsb_active_conf_file \ -+ $DB_SB_PROBE_INTERVAL_TO_ACTIVE ovnsb_db.ctl - } - - demote_ic_nb() { -@@ -642,6 +646,7 @@ set_defaults () { - DB_NB_SYNC_FROM_PROTO=tcp - DB_NB_SYNC_FROM_ADDR= - DB_NB_SYNC_FROM_PORT=6641 -+ DB_NB_PROBE_INTERVAL_TO_ACTIVE=60000 - - DB_SB_SOCK=$OVN_RUNDIR/ovnsb_db.sock - DB_SB_PID=$OVN_RUNDIR/ovnsb_db.pid -@@ -652,6 +657,7 @@ set_defaults () { - DB_SB_SYNC_FROM_PROTO=tcp - DB_SB_SYNC_FROM_ADDR= - DB_SB_SYNC_FROM_PORT=6642 -+ DB_SB_PROBE_INTERVAL_TO_ACTIVE=60000 - - DB_IC_NB_SOCK=$OVN_RUNDIR/ovn_ic_nb_db.sock - DB_IC_NB_PID=$OVN_RUNDIR/ovn_ic_nb_db.pid -@@ -923,10 +929,12 @@ File location options: - --db-nb-sync-from-port=PORT OVN Northbound active db tcp port (default: $DB_NB_SYNC_FROM_PORT) - --db-nb-sync-from-proto=PROTO OVN Northbound active db transport (default: $DB_NB_SYNC_FROM_PROTO) - --db-nb-create-insecure-remote=yes|no Create ptcp OVN Northbound remote (default: $DB_NB_CREATE_INSECURE_REMOTE) -+ --db-nb-probe-interval-to-active Active probe interval from standby to active ovsdb-server remote (default: $DB_NB_PROBE_INTERVAL_TO_ACTIVE) - --db-sb-sync-from-addr=ADDR OVN Southbound active db tcp address (default: $DB_SB_SYNC_FROM_ADDR) - --db-sb-sync-from-port=ADDR OVN Southbound active db tcp port (default: $DB_SB_SYNC_FROM_PORT) - --db-sb-sync-from-proto=PROTO OVN Southbound active db transport (default: $DB_SB_SYNC_FROM_PROTO) - --db-sb-create-insecure-remote=yes|no Create ptcp OVN Southbound remote (default: $DB_SB_CREATE_INSECURE_REMOTE) -+ --db-sb-probe-interval-to-active Active probe interval from standby to active ovsdb-server remote (default: $DB_SB_PROBE_INTERVAL_TO_ACTIVE) - --db-nb-cluster-local-addr=ADDR OVN_Northbound cluster local address \ - (default: $DB_NB_CLUSTER_LOCAL_ADDR) - --db-nb-cluster-local-port=PORT OVN_Northbound cluster local tcp port \ -diff --git a/utilities/ovn-ctl.8.xml b/utilities/ovn-ctl.8.xml -index 816701379..f5b7f7aeb 100644 ---- a/utilities/ovn-ctl.8.xml -+++ b/utilities/ovn-ctl.8.xml -@@ -150,6 +150,10 @@ -

    --db-ic-sb-cluster-remote-port=PORT NUMBER

    -

    --db-ic-sb-cluster-remote-proto=PROTO (tcp/ssl)

    - -+

    Probe interval options

    -+

    --db-nb-probe-interval-to-active=Time in milliseconds

    -+

    --db-sb-probe-interval-to-active=Time in milliseconds

    -+ -

    Configuration files

    -

    Following are the optional configuration files. If present, it should be located in the etc dir

    - -@@ -241,8 +245,8 @@ -

    Promote and demote ovsdb servers

    -

    # ovn-ctl promote_ovnnb

    -

    # ovn-ctl promote_ovnsb

    --

    # ovn-ctl --db-nb-sync-from-addr=x.x.x.x --db-nb-sync-from-port=6641 demote_ovnnb

    --

    # ovn-ctl --db-sb-sync-from-addr=x.x.x.x --db-sb-sync-from-port=6642 demote_ovnsb

    -+

    # ovn-ctl --db-nb-sync-from-addr=x.x.x.x --db-nb-sync-from-port=6641 --db-nb-probe-interval-to-active=60000 demote_ovnnb

    -+

    # ovn-ctl --db-sb-sync-from-addr=x.x.x.x --db-sb-sync-from-port=6642 --db-sb-probe-interval-to-active=60000 demote_ovnsb

    - -

    Creating a clustered db on 3 nodes with IPs x.x.x.x, y.y.y.y and z.z.z.z

    -

    Starting OVN ovsdb servers and ovn-northd on the node with IP x.x.x.x

    -diff --git a/utilities/ovndb-servers.ocf b/utilities/ovndb-servers.ocf -index 42e0412ad..56c2bc322 100755 ---- a/utilities/ovndb-servers.ocf -+++ b/utilities/ovndb-servers.ocf -@@ -9,6 +9,7 @@ - : ${SB_MASTER_PROTO_DEFAULT="tcp"} - : ${MANAGE_NORTHD_DEFAULT="no"} - : ${INACTIVE_PROBE_DEFAULT="5000"} -+: ${INACTIVE_PROBE_TO_MASTER_DEFAULT="60000"} - : ${LISTEN_ON_MASTER_IP_ONLY_DEFAULT="yes"} - : ${NB_SSL_KEY_DEFAULT="/etc/openvswitch/ovnnb-privkey.pem"} - : ${NB_SSL_CERT_DEFAULT="/etc/openvswitch/ovnnb-cert.pem"} -@@ -27,6 +28,7 @@ SB_MASTER_PORT=${OCF_RESKEY_sb_master_port:-${SB_MASTER_PORT_DEFAULT}} - SB_MASTER_PROTO=${OCF_RESKEY_sb_master_protocol:-${SB_MASTER_PROTO_DEFAULT}} - MANAGE_NORTHD=${OCF_RESKEY_manage_northd:-${MANAGE_NORTHD_DEFAULT}} - INACTIVE_PROBE=${OCF_RESKEY_inactive_probe_interval:-${INACTIVE_PROBE_DEFAULT}} -+INACTIVE_PROBE_TO_MASTER=${OCF_RESKEY_inactive_probe_interval_to_master:-${INACTIVE_PROBE_TO_MASTER_DEFAULT}} - NB_PRIVKEY=${OCF_RESKEY_ovn_nb_db_privkey:-${NB_SSL_KEY_DEFAULT}} - NB_CERT=${OCF_RESKEY_ovn_nb_db_cert:-${NB_SSL_CERT_DEFAULT}} - NB_CACERT=${OCF_RESKEY_ovn_nb_db_cacert:-${NB_SSL_CACERT_DEFAULT}} -@@ -135,6 +137,15 @@ ovsdb_server_metadata() { - - - -+ -+ -+ Inactive probe interval to use for the connection from standby -+ ovsdb-server to master ovsdb-server. -+ -+ Set inactive probe interval to master -+ -+ -+ - - - If set to yes, the OVNDBs will listen on master IP. Otherwise, it will -@@ -266,10 +277,12 @@ inactivity_probe=$INACTIVE_PROBE -- set SB_Global . connections=@conn_uuid - ocf_log debug "ovndb_server: Connecting to the new master ${OCF_RESKEY_CRM_meta_notify_promote_uname}" - ${OVN_CTL} demote_ovnnb --db-nb-sync-from-addr=${MASTER_IP} \ - --db-nb-sync-from-port=${NB_MASTER_PORT} \ -- --db-nb-sync-from-proto=${NB_MASTER_PROTO} -+ --db-nb-sync-from-proto=${NB_MASTER_PROTO} \ -+ --db-nb-probe-interval-to-active=${INACTIVE_PROBE_TO_MASTER} - ${OVN_CTL} demote_ovnsb --db-sb-sync-from-addr=${MASTER_IP} \ - --db-sb-sync-from-port=${SB_MASTER_PORT} \ -- --db-sb-sync-from-proto=${SB_MASTER_PROTO} -+ --db-sb-sync-from-proto=${SB_MASTER_PROTO} \ -+ --db-sb-probe-interval-to-active=${INACTIVE_PROBE_TO_MASTER} - fi - } - -@@ -596,10 +609,12 @@ ovsdb_server_demote() { - # being demoted. Sync to the surviving one - ${OVN_CTL} demote_ovnnb --db-nb-sync-from-addr=${MASTER_IP} \ - --db-nb-sync-from-port=${NB_MASTER_PORT} \ -- --db-nb-sync-from-proto=${NB_MASTER_PROTO} -+ --db-nb-sync-from-proto=${NB_MASTER_PROTO} \ -+ --db-nb-probe-interval-to-active=${INACTIVE_PROBE_TO_MASTER} - ${OVN_CTL} demote_ovnsb --db-sb-sync-from-addr=${MASTER_IP} \ - --db-sb-sync-from-port=${SB_MASTER_PORT} \ -- --db-sb-sync-from-proto=${SB_MASTER_PROTO} -+ --db-sb-sync-from-proto=${SB_MASTER_PROTO} \ -+ --db-sb-probe-interval-to-active=${INACTIVE_PROBE_TO_MASTER} - - else - # For completeness, should never be called --- -2.25.1 - diff --git a/SOURCES/0001-ovn-ctl-introduce-ovsdb-n-s-b-wrapper-options.patch b/SOURCES/0001-ovn-ctl-introduce-ovsdb-n-s-b-wrapper-options.patch new file mode 100644 index 0000000..c2fe3b1 --- /dev/null +++ b/SOURCES/0001-ovn-ctl-introduce-ovsdb-n-s-b-wrapper-options.patch @@ -0,0 +1,175 @@ +From 1060d922a0fee3c9795eb58d5799b235ae406bc6 Mon Sep 17 00:00:00 2001 +Message-Id: <1060d922a0fee3c9795eb58d5799b235ae406bc6.1599568836.git.lorenzo.bianconi@redhat.com> +From: Lorenzo Bianconi +Date: Fri, 21 Aug 2020 15:40:13 +0200 +Subject: [PATCH] ovn-ctl: introduce ovsdb-{n, s}b-wrapper options + +ovn-ctl has the following options to run ovn-northd, ovn-controller or +ovn-ic under strace or valgrind wrappers. + + --ovn-northd-wrapper + --ovn-controller-wrapper + --ovn-ic-wrapper + +Introduce --ovsdb-nb-wrapper and --ovsdb-sb-wrapper to do the same for +ovsdb processes for ovn-{nb,sb} dbs + +Tested-by: Dumitru Ceara +Signed-off-by: Lorenzo Bianconi +Acked-by: Dumitru Ceara +Signed-off-by: Numan Siddique +--- + utilities/ovn-ctl | 18 ++++++++++----- + utilities/ovn-ctl.8.xml | 2 ++ + utilities/ovn-lib.in | 49 ++++++++++++++++++++++++----------------- + 3 files changed, 43 insertions(+), 26 deletions(-) + +--- a/utilities/ovn-ctl ++++ b/utilities/ovn-ctl +@@ -145,7 +145,7 @@ promote_ic_sb() { + } + + start_ovsdb__() { +- local DB=$1 db=$2 schema_name=$3 table_name=$4 ++ local DB=$1 db=$2 schema_name=$3 table_name=$4 wrapper=$5 + local db_pid_file + local cluster_local_addr + local cluster_local_port +@@ -288,7 +288,7 @@ $cluster_remote_port + set "$@" --sync-from=`cat $active_conf_file` + fi + +- "$@" "$file" ++ start_wrapped_daemon "$wrapper" ovsdb-$db "" "$@" "$file" + + # Initialize the database if it's NOT joining a cluster. + if test -z "$cluster_remote_addr"; then +@@ -301,7 +301,7 @@ $cluster_remote_port + } + + start_nb_ovsdb() { +- start_ovsdb__ NB nb OVN_Northbound NB_Global ++ start_ovsdb__ NB nb OVN_Northbound NB_Global "$OVSDB_NB_WRAPPER" + } + + start_sb_ovsdb() { +@@ -313,7 +313,7 @@ start_sb_ovsdb() { + ulimit -n $MAXFD + fi + +- start_ovsdb__ SB sb OVN_Southbound SB_Global ++ start_ovsdb__ SB sb OVN_Southbound SB_Global "$OVSDB_SB_WRAPPER" + } + + start_ovsdb () { +@@ -322,11 +322,13 @@ start_ovsdb () { + } + + start_ic_nb_ovsdb() { +- start_ovsdb__ IC_NB ic_nb OVN_IC_Northbound IC_NB_Global ++ start_ovsdb__ IC_NB ic_nb OVN_IC_Northbound IC_NB_Global \ ++ "$OVSDB_NB_WRAPPER" + } + + start_ic_sb_ovsdb() { +- start_ovsdb__ IC_SB ic_sb OVN_IC_Southbound IC_SB_Global ++ start_ovsdb__ IC_SB ic_sb OVN_IC_Southbound IC_SB_Global \ ++ "$OVSDB_SB_WRAPPER" + } + + start_ic_ovsdb () { +@@ -692,6 +694,8 @@ set_defaults () { + OVN_IC_WRAPPER= + OVN_CONTROLLER_PRIORITY=-10 + OVN_CONTROLLER_WRAPPER= ++ OVSDB_NB_WRAPPER= ++ OVSDB_SB_WRAPPER= + + OVN_USER= + +@@ -908,6 +912,8 @@ Options: + --ovn-ic-sb-db-ssl-ca-cert=CERT OVN IC Southbound DB SSL CA certificate file + --ovn-user="user[:group]" pass the --user flag to the ovn daemons + --ovs-user="user[:group]" pass the --user flag to ovs daemons ++ --ovsdb-nb-wrapper=WRAPPER run with a wrapper like valgrind for debugging ++ --ovsdb-sb-wrapper=WRAPPER run with a wrapper like valgrind for debugging + -h, --help display this help message + + File location options: +--- a/utilities/ovn-ctl.8.xml ++++ b/utilities/ovn-ctl.8.xml +@@ -64,6 +64,8 @@ +

    --ovn-controller-wrapper=WRAPPER

    +

    --ovn-ic-priority=NICE

    +

    --ovn-ic-wrapper=WRAPPER

    ++

    --ovsdb-nb-wrapper=WRAPPER

    ++

    --ovsdb-sb-wrapper=WRAPPER

    +

    --ovn-user=USER:GROUP

    +

    --ovs-user=USER:GROUP

    +

    -h | --help

    +--- a/utilities/ovn-lib.in ++++ b/utilities/ovn-lib.in +@@ -59,27 +59,12 @@ ovn_install_dir () { + fi + } + +-start_ovn_daemon () { +- priority=$1 +- wrapper=$2 +- shift; shift +- daemon=$1 ++start_wrapped_daemon() { ++ wrapper=$1 ++ daemon=$2 ++ priority=$3 + strace="" +- +- # drop core files in a sensible place +- ovn_install_dir "$DAEMON_CWD" +- set "$@" --no-chdir +- cd "$DAEMON_CWD" +- +- # log file +- ovn_install_dir "$ovn_logdir" "750" +- set "$@" --log-file="$ovn_logdir/$daemon.log" +- +- # pidfile and monitoring +- ovn_install_dir "$ovn_rundir" +- set "$@" --pidfile="$ovn_rundir/$daemon.pid" +- set "$@" --detach +- test X"$MONITOR" = Xno || set "$@" --monitor ++ shift ; shift ; shift ; + + # wrapper + case $wrapper in +@@ -127,6 +112,30 @@ start_ovn_daemon () { + fi + } + ++start_ovn_daemon () { ++ priority=$1 ++ wrapper=$2 ++ shift; shift ++ daemon=$1 ++ ++ # drop core files in a sensible place ++ ovn_install_dir "$DAEMON_CWD" ++ set "$@" --no-chdir ++ cd "$DAEMON_CWD" ++ ++ # log file ++ ovn_install_dir "$ovn_logdir" "750" ++ set "$@" --log-file="$ovn_logdir/$daemon.log" ++ ++ # pidfile and monitoring ++ ovn_install_dir "$ovn_rundir" ++ set "$@" --pidfile="$ovn_rundir/$daemon.pid" ++ set "$@" --detach ++ test X"$MONITOR" = Xno || set "$@" --monitor ++ ++ start_wrapped_daemon "$wrapper" $daemon "$priority" "$@" ++} ++ + stop_ovn_daemon () { + if test -e "$ovn_rundir/$1.pid"; then + if pid=`cat "$ovn_rundir/$1.pid"`; then diff --git a/SOURCES/0001-ovn-nbctl-Create-daemon-control-socket-in-ovn-run-di.patch b/SOURCES/0001-ovn-nbctl-Create-daemon-control-socket-in-ovn-run-di.patch deleted file mode 100644 index 999a5dc..0000000 --- a/SOURCES/0001-ovn-nbctl-Create-daemon-control-socket-in-ovn-run-di.patch +++ /dev/null @@ -1,180 +0,0 @@ -From 0319176e1d1fa741868d822100d0dde89a585ca3 Mon Sep 17 00:00:00 2001 -From: Numan Siddique -Date: Wed, 8 Apr 2020 20:16:49 +0530 -Subject: [PATCH] ovn-nbctl: Create daemon control socket in ovn run dir - -ovn-nbctl when run as a daemon is creating the ctl socket in -the ovs rundir. This patch fixes this issue by creating it in -the ovn rundir. - -When an ovn service is run with -u option (which specifies the -ctl socket path) and if this path is not absolute, the ovn -ctl socket path is created in the ovs run dir. This patch -also fixes this issue by creating it in the ovn run dir. - -Reported-by: Dan Williams -Acked-by: Dumitru Ceara -Signed-off-by: Numan Siddique - -(cherry-picked from upstream branch-20.03 commit 29927708242044696a49051c77c3d4b38ba02392) - -Change-Id: I17b3ab45f591fa9793ae8d95cf78c2113a1e4d65 ---- - controller-vtep/ovn-controller-vtep.c | 5 ++++- - controller/ovn-controller.c | 2 +- - ic/ovn-ic.c | 10 +++------- - lib/ovn-util.c | 9 +++++---- - lib/ovn-util.h | 2 +- - northd/ovn-northd.c | 10 +++------- - utilities/ovn-nbctl.c | 6 +++++- - utilities/ovn-trace.c | 6 +++++- - 8 files changed, 27 insertions(+), 23 deletions(-) - -diff --git a/controller-vtep/ovn-controller-vtep.c b/controller-vtep/ovn-controller-vtep.c -index b30a731d4..253a709ab 100644 ---- a/controller-vtep/ovn-controller-vtep.c -+++ b/controller-vtep/ovn-controller-vtep.c -@@ -67,7 +67,10 @@ main(int argc, char *argv[]) - - daemonize_start(false); - -- retval = unixctl_server_create(NULL, &unixctl); -+ char *abs_unixctl_path = get_abs_unix_ctl_path(NULL); -+ retval = unixctl_server_create(abs_unixctl_path, &unixctl); -+ free(abs_unixctl_path); -+ - if (retval) { - exit(EXIT_FAILURE); - } -diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c -index 2893eaac1..4d21ba0fd 100644 ---- a/controller/ovn-controller.c -+++ b/controller/ovn-controller.c -@@ -1729,7 +1729,7 @@ main(int argc, char *argv[]) - - daemonize_start(true); - -- char *abs_unixctl_path = get_abs_unix_ctl_path(); -+ char *abs_unixctl_path = get_abs_unix_ctl_path(NULL); - retval = unixctl_server_create(abs_unixctl_path, &unixctl); - free(abs_unixctl_path); - if (retval) { -diff --git a/ic/ovn-ic.c b/ic/ovn-ic.c -index bf8205de2..d931ca50f 100644 ---- a/ic/ovn-ic.c -+++ b/ic/ovn-ic.c -@@ -1575,13 +1575,9 @@ main(int argc, char *argv[]) - - daemonize_start(false); - -- if (!unixctl_path) { -- char *abs_unixctl_path = get_abs_unix_ctl_path(); -- retval = unixctl_server_create(abs_unixctl_path, &unixctl); -- free(abs_unixctl_path); -- } else { -- retval = unixctl_server_create(unixctl_path, &unixctl); -- } -+ char *abs_unixctl_path = get_abs_unix_ctl_path(unixctl_path); -+ retval = unixctl_server_create(abs_unixctl_path, &unixctl); -+ free(abs_unixctl_path); - - if (retval) { - exit(EXIT_FAILURE); -diff --git a/lib/ovn-util.c b/lib/ovn-util.c -index df18fda89..514e2489f 100644 ---- a/lib/ovn-util.c -+++ b/lib/ovn-util.c -@@ -377,7 +377,7 @@ default_ic_sb_db(void) - } - - char * --get_abs_unix_ctl_path(void) -+get_abs_unix_ctl_path(const char *path) - { - #ifdef _WIN32 - enum { WINDOWS = 1 }; -@@ -386,9 +386,10 @@ get_abs_unix_ctl_path(void) - #endif - - long int pid = getpid(); -- char *abs_path = -- WINDOWS ? xasprintf("%s/%s.ctl", ovn_rundir(), program_name) -- : xasprintf("%s/%s.%ld.ctl", ovn_rundir(), program_name, pid); -+ char *abs_path -+ = (path ? abs_file_name(ovn_rundir(), path) -+ : WINDOWS ? xasprintf("%s/%s.ctl", ovn_rundir(), program_name) -+ : xasprintf("%s/%s.%ld.ctl", ovn_rundir(), program_name, pid)); - return abs_path; - } - -diff --git a/lib/ovn-util.h b/lib/ovn-util.h -index 32c8334b0..11238f61c 100644 ---- a/lib/ovn-util.h -+++ b/lib/ovn-util.h -@@ -82,7 +82,7 @@ const char *default_nb_db(void); - const char *default_sb_db(void); - const char *default_ic_nb_db(void); - const char *default_ic_sb_db(void); --char *get_abs_unix_ctl_path(void); -+char *get_abs_unix_ctl_path(const char *path); - - struct ovsdb_idl_table_class; - const char *db_table_usage(struct ds *tables, -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index fd1be5b27..1f1238d23 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -11673,13 +11673,9 @@ main(int argc, char *argv[]) - - daemonize_start(false); - -- if (!unixctl_path) { -- char *abs_unixctl_path = get_abs_unix_ctl_path(); -- retval = unixctl_server_create(abs_unixctl_path, &unixctl); -- free(abs_unixctl_path); -- } else { -- retval = unixctl_server_create(unixctl_path, &unixctl); -- } -+ char *abs_unixctl_path = get_abs_unix_ctl_path(unixctl_path); -+ retval = unixctl_server_create(abs_unixctl_path, &unixctl); -+ free(abs_unixctl_path); - - if (retval) { - exit(EXIT_FAILURE); -diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c -index 59abe0051..a88c1ddc2 100644 ---- a/utilities/ovn-nbctl.c -+++ b/utilities/ovn-nbctl.c -@@ -6436,7 +6436,11 @@ server_loop(struct ovsdb_idl *idl, int argc, char *argv[]) - - service_start(&argc, &argv); - daemonize_start(false); -- int error = unixctl_server_create(unixctl_path, &server); -+ -+ char *abs_unixctl_path = get_abs_unix_ctl_path(unixctl_path); -+ int error = unixctl_server_create(abs_unixctl_path, &server); -+ free(abs_unixctl_path); -+ - if (error) { - ctl_fatal("failed to create unixctl server (%s)", - ovs_retval_to_string(error)); -diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c -index e59698ec4..eae9622d3 100644 ---- a/utilities/ovn-trace.c -+++ b/utilities/ovn-trace.c -@@ -125,7 +125,11 @@ main(int argc, char *argv[]) - bool exiting = false; - if (get_detach()) { - daemonize_start(false); -- int error = unixctl_server_create(unixctl_path, &server); -+ -+ char *abs_unixctl_path = get_abs_unix_ctl_path(unixctl_path); -+ int error = unixctl_server_create(abs_unixctl_path, &server); -+ free(abs_unixctl_path); -+ - if (error) { - ovs_fatal(error, "failed to create unixctl server"); - } --- -2.25.1 - diff --git a/SOURCES/0001-ovn-nbctl-Deal-with-nb_cfg-overflows.patch b/SOURCES/0001-ovn-nbctl-Deal-with-nb_cfg-overflows.patch new file mode 100644 index 0000000..c1a1dae --- /dev/null +++ b/SOURCES/0001-ovn-nbctl-Deal-with-nb_cfg-overflows.patch @@ -0,0 +1,38 @@ +From 9cb2f23877fec13693f27986e4d075152f318d0f Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Fri, 28 Aug 2020 14:10:54 +0200 +Subject: [PATCH] ovn-nbctl: Deal with nb_cfg overflows. + +Reported-at: https://bugzilla.redhat.com/id=1873455 +Reported-by: Ying Xu +Signed-off-by: Dumitru Ceara +Signed-off-by: Numan Siddique + +(cherry-picked from master commit be3a60f8e6a3a01e08ae6fe760279dc4f274562a) + +(cherry picked from upstream commit 49f322806785cf1195dfa8cfb63531cd1c119e5b) + +Change-Id: Iba7df83c1d8b88a70fff3c69b985ef46db8d18c4 +--- + utilities/ovn-nbctl.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c +index e6d8dbe..bd66ee6 100644 +--- a/utilities/ovn-nbctl.c ++++ b/utilities/ovn-nbctl.c +@@ -6080,6 +6080,11 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands, + nb = nbrec_nb_global_insert(txn); + } + ++ /* Deal with potential overflows. */ ++ if (nb->nb_cfg == LLONG_MAX) { ++ nbrec_nb_global_set_nb_cfg(nb, 0); ++ } ++ + if (wait_type != NBCTL_WAIT_NONE) { + ovsdb_idl_txn_increment(txn, &nb->header_, &nbrec_nb_global_col_nb_cfg, + force_wait); +-- +1.8.3.1 + diff --git a/SOURCES/0001-ovn-northd-Add-lflows-to-by-pass-the-svc-monitor-pac.patch b/SOURCES/0001-ovn-northd-Add-lflows-to-by-pass-the-svc-monitor-pac.patch deleted file mode 100644 index 5e35cab..0000000 --- a/SOURCES/0001-ovn-northd-Add-lflows-to-by-pass-the-svc-monitor-pac.patch +++ /dev/null @@ -1,197 +0,0 @@ -From 8e0b2c54726c666db7163dd18673683dfd06d89c Mon Sep 17 00:00:00 2001 -From: Numan Siddique -Date: Thu, 12 Mar 2020 15:58:38 +0530 -Subject: [PATCH] ovn-northd: Add lflows to by pass the svc monitor packets - from conntrack. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The commit [1] added lflows to by pass the service monitor health check -packets from conntrack. But it missed out adding in the ingress pre_acl -and egress pre_acl of logical switch pipeline. - -This patch adds these missing lflows. It also enhanced the system lb health -check tests to add the acls to test this scenario. - -[1] - bb9f2b9ce56c("ovn-northd: Consider load balancer active backends in router pipeline) -Fixes: bb9f2b9ce56c("ovn-northd: Consider load balancer active backends in router pipeline) - -Reported-by: Maciej Józefczyk -Acked-by: Dumitru Ceara -Acked-by: Maciej Jozefczyk -Signed-off-by: Numan Siddique - -(cherry-picked from upstream branch-20.03 commit f70a0f7c485f13cbbae8bc6f8d78225238c308b9) - -Change-Id: I8980d306ea67c2aaa3bfa8f907a0f71a55fe0f9d ---- - northd/ovn-northd.8.xml | 22 +++++++++++++++++++++- - northd/ovn-northd.c | 15 ++++++++++++++- - tests/ovn.at | 22 ++++++++++++++++++++++ - tests/system-ovn.at | 22 ++++++++++++++++++++++ - 4 files changed, 79 insertions(+), 2 deletions(-) - -diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml -index b6cfa3e90..9b44720d1 100644 ---- a/northd/ovn-northd.8.xml -+++ b/northd/ovn-northd.8.xml -@@ -293,6 +293,16 @@ - priority-110 flow is added to skip over stateful ACLs. -

    - -+

    -+ This table also has a priority-110 flow with the match -+ eth.dst == E for all logical switch -+ datapaths to move traffic to the next table. Where E -+ is the service monitor mac defined in the -+ colum of table. -+

    -+ -

    Ingress Table 4: Pre-LB

    - -

    -@@ -320,7 +330,7 @@ - -

    - This table also has a priority-110 flow with the match -- eth.src == E for all logical switch -+ eth.dst == E for all logical switch - datapaths to move traffic to the next table. Where E - is the service monitor mac defined in the - to-lport traffic. -

    - -+

    -+ This table also has a priority-110 flow with the match -+ eth.src == E for all logical switch -+ datapaths to move traffic to the next table. Where E -+ is the service monitor mac defined in the -+ colum of table. -+

    -+ -

    Egress Table 2: Pre-stateful

    - -

    -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index 217a8c894..3a77f2e3a 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -4601,6 +4601,16 @@ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows) - ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;"); - ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;"); - -+ char *svc_check_match = xasprintf("eth.dst == %s", svc_monitor_mac); -+ ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, svc_check_match, -+ "next;"); -+ free(svc_check_match); -+ -+ svc_check_match = xasprintf("eth.src == %s", svc_monitor_mac); -+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, svc_check_match, -+ "next;"); -+ free(svc_check_match); -+ - /* If there are any stateful ACL rules in this datapath, we must - * send all IP packets through the conntrack action, which handles - * defragmentation, in order to match L4 headers. */ -@@ -4784,9 +4794,12 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, - "next;"); - - /* Do not send service monitor packets to conntrack. */ -- char *svc_check_match = xasprintf("eth.src == %s", svc_monitor_mac); -+ char *svc_check_match = xasprintf("eth.dst == %s", svc_monitor_mac); - ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110, - svc_check_match, "next;"); -+ free(svc_check_match); -+ -+ svc_check_match = xasprintf("eth.src == %s", svc_monitor_mac); - ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110, - svc_check_match, "next;"); - free(svc_check_match); -diff --git a/tests/ovn.at b/tests/ovn.at -index 8de4b5ceb..8cdbad743 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -17739,12 +17739,34 @@ ovn-nbctl lsp-set-port-security sw0-p1 "50:54:00:00:00:03 10.0.0.3" - ovn-nbctl lsp-set-addresses sw0-p2 "50:54:00:00:00:04 10.0.0.4" - ovn-nbctl lsp-set-port-security sw0-p2 "50:54:00:00:00:04 10.0.0.4" - -+# Create port group and ACLs for sw0 ports. -+ovn-nbctl pg-add pg0_drop sw0-p1 sw0-p2 -+ovn-nbctl acl-add pg0_drop from-lport 1001 "inport == @pg0_drop && ip" drop -+ovn-nbctl acl-add pg0_drop to-lport 1001 "outport == @pg0_drop && ip" drop -+ -+ovn-nbctl pg-add pg0 sw0-p1 sw0-p2 -+ovn-nbctl acl-add pg0 from-lport 1002 "inport == @pg0 && ip4" allow-related -+ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && ip4.src == 0.0.0.0/0 && icmp4" allow-related -+ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && ip4.src == 0.0.0.0/0 && tcp && tcp.dst == 80" allow-related -+ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && ip4.src == 0.0.0.0/0 && udp && udp.dst == 80" allow-related -+ - # Create the second logical switch with one port - ovn-nbctl ls-add sw1 - ovn-nbctl lsp-add sw1 sw1-p1 - ovn-nbctl lsp-set-addresses sw1-p1 "40:54:00:00:00:03 20.0.0.3" - ovn-nbctl lsp-set-port-security sw1-p1 "40:54:00:00:00:03 20.0.0.3" - -+# Create port group and ACLs for sw1 ports. -+ovn-nbctl pg-add pg1_drop sw1-p1 -+ovn-nbctl acl-add pg1_drop from-lport 1001 "inport == @pg1_drop && ip" drop -+ovn-nbctl acl-add pg1_drop to-lport 1001 "outport == @pg1_drop && ip" drop -+ -+ovn-nbctl pg-add pg1 sw1-p1 -+ovn-nbctl acl-add pg1 from-lport 1002 "inport == @pg1 && ip4" allow-related -+ovn-nbctl acl-add pg1 to-lport 1002 "outport == @pg1 && ip4 && ip4.src == 0.0.0.0/0 && icmp4" allow-related -+ovn-nbctl acl-add pg1 to-lport 1002 "outport == @pg1 && ip4 && ip4.src == 0.0.0.0/0 && tcp && tcp.dst == 80" allow-related -+ovn-nbctl acl-add pg1 to-lport 1002 "outport == @pg1 && ip4 && ip4.src == 0.0.0.0/0 && udp && udp.dst == 80" allow-related -+ - # Create a logical router and attach both logical switches - ovn-nbctl lr-add lr0 - ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 -diff --git a/tests/system-ovn.at b/tests/system-ovn.at -index 9ed3df754..3b3379840 100644 ---- a/tests/system-ovn.at -+++ b/tests/system-ovn.at -@@ -3314,12 +3314,34 @@ ovn-nbctl lsp-add sw0 sw0-p2 - ovn-nbctl lsp-set-addresses sw0-p2 "50:54:00:00:00:04 10.0.0.4" - ovn-nbctl lsp-set-port-security sw0-p2 "50:54:00:00:00:04 10.0.0.4" - -+# Create port group and ACLs for sw0 ports. -+ovn-nbctl pg-add pg0_drop sw0-p1 sw0-p2 -+ovn-nbctl acl-add pg0_drop from-lport 1001 "inport == @pg0_drop && ip" drop -+ovn-nbctl acl-add pg0_drop to-lport 1001 "outport == @pg0_drop && ip" drop -+ -+ovn-nbctl pg-add pg0 sw0-p1 sw0-p2 -+ovn-nbctl acl-add pg0 from-lport 1002 "inport == @pg0 && ip4" allow-related -+ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && ip4.src == 0.0.0.0/0 && icmp4" allow-related -+ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && ip4.src == 0.0.0.0/0 && tcp && tcp.dst == 80" allow-related -+ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && ip4.src == 0.0.0.0/0 && udp && udp.dst == 80" allow-related -+ - # Create the second logical switch with one port - ovn-nbctl ls-add sw1 - ovn-nbctl lsp-add sw1 sw1-p1 - ovn-nbctl lsp-set-addresses sw1-p1 "40:54:00:00:00:03 20.0.0.3" - ovn-nbctl lsp-set-port-security sw1-p1 "40:54:00:00:00:03 20.0.0.3" - -+# Create port group and ACLs for sw1 ports. -+ovn-nbctl pg-add pg1_drop sw1-p1 -+ovn-nbctl acl-add pg1_drop from-lport 1001 "inport == @pg1_drop && ip" drop -+ovn-nbctl acl-add pg1_drop to-lport 1001 "outport == @pg1_drop && ip" drop -+ -+ovn-nbctl pg-add pg1 sw1-p1 -+ovn-nbctl acl-add pg1 from-lport 1002 "inport == @pg1 && ip4" allow-related -+ovn-nbctl acl-add pg1 to-lport 1002 "outport == @pg1 && ip4 && ip4.src == 0.0.0.0/0 && icmp4" allow-related -+ovn-nbctl acl-add pg1 to-lport 1002 "outport == @pg1 && ip4 && ip4.src == 0.0.0.0/0 && tcp && tcp.dst == 80" allow-related -+ovn-nbctl acl-add pg1 to-lport 1002 "outport == @pg1 && ip4 && ip4.src == 0.0.0.0/0 && udp && udp.dst == 80" allow-related -+ - # Create a logical router and attach both logical switches - ovn-nbctl lr-add lr0 - ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 --- -2.24.1 - diff --git a/SOURCES/0001-ovn-northd-Clear-SB-records-depending-on-stale-datap.patch b/SOURCES/0001-ovn-northd-Clear-SB-records-depending-on-stale-datap.patch deleted file mode 100644 index 3538655..0000000 --- a/SOURCES/0001-ovn-northd-Clear-SB-records-depending-on-stale-datap.patch +++ /dev/null @@ -1,189 +0,0 @@ -From e5b87cf915c0061355f9c4cdea0df1fe1c26cd38 Mon Sep 17 00:00:00 2001 -From: Dumitru Ceara -Date: Thu, 30 Apr 2020 20:32:17 +0200 -Subject: [PATCH 1/2] ovn-northd: Clear SB records depending on stale - datapaths. - -When purging stale SB Datapath_Binding records ovn-northd doesn't -properly clean records from other tables that might refer the -datapaths being deleted. - -One way to reproduce the issue is: -$ ovn-nbctl lr-add lr -$ ovn-nbctl lrp-add lr p 00:00:00:00:00:01 1.1.1.1/24 -$ ovn-nbctl --wait=sb sync -$ dp=$(ovn-sbctl --bare --columns _uuid list datapath .) -$ ovn-sbctl create mac_binding logical_port="p" ip="1.1.1.2" datapath="$dp" -$ ovn-nbctl lrp-del p -- lr-del lr -- \ - lr-add lr -- lrp-add lr p 00:00:00:00:00:01 1.1.1.1/24 - -Reported-by: Dan Williams -Reported-at: https://bugzilla.redhat.com/1828637 -Signed-off-by: Dumitru Ceara -Acked-by: Numan Siddique -Signed-off-by: Han Zhou -(cherry picked from upstream commit 6856adc616a7181723ce5201110cc95de1aba92b) - -Change-Id: I1cbcb5fc34927368e6655420126b2492c4fce9df ---- - northd/ovn-northd.c | 45 ++++++++++++++++++++++++++++++++------------- - tests/ovn-northd.at | 24 ++++++++++++++++++++++++ - 2 files changed, 56 insertions(+), 13 deletions(-) - -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index 5ada3ae..5e649d0 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -634,6 +634,12 @@ ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid) - return NULL; - } - -+static bool -+ovn_datapath_is_stale(const struct ovn_datapath *od) -+{ -+ return !od->nbr && !od->nbs; -+} -+ - static struct ovn_datapath * - ovn_datapath_from_sbrec(struct hmap *datapaths, - const struct sbrec_datapath_binding *sb) -@@ -3067,11 +3073,16 @@ ovn_port_update_sbrec(struct northd_context *ctx, - /* Remove mac_binding entries that refer to logical_ports which are - * deleted. */ - static void --cleanup_mac_bindings(struct northd_context *ctx, struct hmap *ports) -+cleanup_mac_bindings(struct northd_context *ctx, struct hmap *datapaths, -+ struct hmap *ports) - { - const struct sbrec_mac_binding *b, *n; - SBREC_MAC_BINDING_FOR_EACH_SAFE (b, n, ctx->ovnsb_idl) { -- if (!ovn_port_find(ports, b->logical_port)) { -+ const struct ovn_datapath *od = -+ ovn_datapath_from_sbrec(datapaths, b->datapath); -+ -+ if (!od || ovn_datapath_is_stale(od) || -+ !ovn_port_find(ports, b->logical_port)) { - sbrec_mac_binding_delete(b); - } - } -@@ -3439,6 +3450,9 @@ build_ports(struct northd_context *ctx, - join_logical_ports(ctx, datapaths, ports, &chassis_qdisc_queues, - &tag_alloc_table, &sb_only, &nb_only, &both); - -+ /* Purge stale Mac_Bindings if ports are deleted. */ -+ bool remove_mac_bindings = !ovs_list_is_empty(&sb_only); -+ - struct ovn_port *op, *next; - /* For logical ports that are in both databases, index the in-use - * tunnel_keys. */ -@@ -3453,6 +3467,12 @@ build_ports(struct northd_context *ctx, - * For logical ports that are in NB database, do any tag allocation - * needed. */ - LIST_FOR_EACH_SAFE (op, next, list, &both) { -+ /* When reusing stale Port_Bindings, make sure that stale -+ * Mac_Bindings are purged. -+ */ -+ if (op->od->sb != op->sb->datapath) { -+ remove_mac_bindings = true; -+ } - if (op->nbsp) { - tag_alloc_create_new_tag(&tag_alloc_table, op->nbsp); - } -@@ -3488,19 +3508,15 @@ build_ports(struct northd_context *ctx, - sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key); - } - -- bool remove_mac_bindings = false; -- if (!ovs_list_is_empty(&sb_only)) { -- remove_mac_bindings = true; -- } -- - /* Delete southbound records without northbound matches. */ - LIST_FOR_EACH_SAFE(op, next, list, &sb_only) { - ovs_list_remove(&op->list); - sbrec_port_binding_delete(op->sb); - ovn_port_destroy(ports, op); - } -+ - if (remove_mac_bindings) { -- cleanup_mac_bindings(ctx, ports); -+ cleanup_mac_bindings(ctx, datapaths, ports); - } - - tag_alloc_destroy(&tag_alloc_table); -@@ -10258,7 +10274,8 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, - SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) { - struct ovn_datapath *od - = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath); -- if (!od) { -+ -+ if (!od || ovn_datapath_is_stale(od)) { - sbrec_logical_flow_delete(sbflow); - continue; - } -@@ -10318,7 +10335,8 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, - SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) { - struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths, - sbmc->datapath); -- if (!od) { -+ -+ if (!od || ovn_datapath_is_stale(od)) { - sbrec_multicast_group_delete(sbmc); - continue; - } -@@ -10800,8 +10818,8 @@ build_ip_mcast(struct northd_context *ctx, struct hmap *datapaths) - const struct sbrec_ip_multicast *sb, *sb_next; - - SBREC_IP_MULTICAST_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) { -- if (!sb->datapath || -- !ovn_datapath_from_sbrec(datapaths, sb->datapath)) { -+ od = ovn_datapath_from_sbrec(datapaths, sb->datapath); -+ if (!od || ovn_datapath_is_stale(od)) { - sbrec_ip_multicast_delete(sb); - } - } -@@ -10870,7 +10888,8 @@ build_mcast_groups(struct northd_context *ctx, - /* If the datapath value is stale, purge the group. */ - struct ovn_datapath *od = - ovn_datapath_from_sbrec(datapaths, sb_igmp->datapath); -- if (!od) { -+ -+ if (!od || ovn_datapath_is_stale(od)) { - sbrec_igmp_group_delete(sb_igmp); - continue; - } -diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at -index d127152..94f892b 100644 ---- a/tests/ovn-northd.at -+++ b/tests/ovn-northd.at -@@ -1326,3 +1326,27 @@ AT_CHECK([test 0 = $(ovn-sbctl dump-flows lr0 | grep lr_in_unsnat | \ - grep "ip4 && ip4.dst == 192.168.2.6 && tcp && tcp.dst == 8080" -c) ]) - - AT_CLEANUP -+ -+AT_SETUP([ovn -- check reconcile stale Datapath_Binding]) -+ovn_start -+ -+ovn-nbctl lr-add lr -+ovn-nbctl lrp-add lr p 00:00:00:00:00:01 1.1.1.1/24 -+ -+AT_CHECK([ovn-nbctl --wait=sb sync], [0]) -+ -+# Create a MAC_Binding referring the router datapath. -+dp=$(ovn-sbctl --bare --columns _uuid list datapath .) -+ovn-sbctl create mac_binding logical_port="p" ip="1.1.1.2" datapath="$dp" -+ -+ovn-nbctl lrp-del p -- lr-del lr -- \ -+ lr-add lr -- lrp-add lr p 00:00:00:00:00:01 1.1.1.1/24 -+AT_CHECK([ovn-nbctl --wait=sb sync], [0]) -+ -+AT_CHECK([test 1 = $(ovn-sbctl --columns _uuid list Datapath_Binding | wc -l)]) -+ -+nb_uuid=$(ovn-sbctl get Datapath_Binding . external_ids:logical-router) -+lr_uuid=$(ovn-nbctl --columns _uuid list Logical_Router .) -+AT_CHECK[test ${nb_uuid} = ${lr_uuid}] -+ -+AT_CLEANUP --- -1.8.3.1 - diff --git a/SOURCES/0001-ovn-northd-Don-t-add-arp-responder-flows-for-lports-.patch b/SOURCES/0001-ovn-northd-Don-t-add-arp-responder-flows-for-lports-.patch deleted file mode 100644 index da692e0..0000000 --- a/SOURCES/0001-ovn-northd-Don-t-add-arp-responder-flows-for-lports-.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 5521da70830446373265999b6d994d986a02ce01 Mon Sep 17 00:00:00 2001 -From: Numan Siddique -Date: Thu, 19 Mar 2020 16:52:17 +0530 -Subject: [PATCH] ovn-northd: Don't add arp responder flows for lports with - 'unknown' address. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -If a logical port has 'unknown' address, it means it can send and receive -packet with any IP and MAC and generally port security is not set for -such logical ports. If an lport has addresses set to - ["MAC1 IP1", unknown], -right now we add arp responder flows for IP1 and respond MAC1 in the arp -response. But it's possible that the VIF of the logical port can use the IP1 -with a different MAC. This patch supports this usecase. When another logical port -sends ARP request for IP1, the VIF of the logical port will anyway respond. - -Reported-by: Maciej Józefczyk -Acked-by: Han Zhou -Signed-off-by: Numan Siddique ---- - northd/ovn-northd.8.xml | 5 +++-- - northd/ovn-northd.c | 13 ++++++++----- - tests/ovn.at | 16 ++++++++++++---- - 3 files changed, 23 insertions(+), 11 deletions(-) - -diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml -index 9b44720d1..7d03cbc83 100644 ---- a/northd/ovn-northd.8.xml -+++ b/northd/ovn-northd.8.xml -@@ -699,8 +699,9 @@ output; - -

    - These flows are omitted for logical ports (other than router ports or -- localport ports) that are down and for logical ports of -- type virtual. -+ localport ports) that are down, for logical ports of -+ type virtual and for logical ports with 'unknown' -+ address set. -

    -
  • - -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index 3a77f2e3a..356c5436c 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -1148,7 +1148,7 @@ struct ovn_port { - - bool derived; /* Indicates whether this is an additional port - * derived from nbsp or nbrp. */ -- -+ bool has_unknown; /* If the addresses have 'unknown' defined. */ - /* The port's peer: - * - * - A switch port S of type "router" has a router port R as a peer, -@@ -2055,8 +2055,11 @@ join_logical_ports(struct northd_context *ctx, - op->lsp_addrs - = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses); - for (size_t j = 0; j < nbsp->n_addresses; j++) { -- if (!strcmp(nbsp->addresses[j], "unknown") -- || !strcmp(nbsp->addresses[j], "router")) { -+ if (!strcmp(nbsp->addresses[j], "unknown")) { -+ op->has_unknown = true; -+ continue; -+ } -+ if (!strcmp(nbsp->addresses[j], "router")) { - continue; - } - if (is_dynamic_lsp_address(nbsp->addresses[j])) { -@@ -6123,7 +6126,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, - } else { - /* - * Add ARP/ND reply flows if either the -- * - port is up or -+ * - port is up and it doesn't have 'unknown' address defined or - * - port type is router or - * - port type is localport - */ -@@ -6132,7 +6135,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, - continue; - } - -- if (lsp_is_external(op->nbsp)) { -+ if (lsp_is_external(op->nbsp) || op->has_unknown) { - continue; - } - -diff --git a/tests/ovn.at b/tests/ovn.at -index 8cdbad743..1b6073ff0 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -1758,11 +1758,13 @@ for is in 1 2 3; do - sip=`ip_to_hex 192 168 0 $is$js` - tip=`ip_to_hex 192 168 0 $id$jd` - tip_unknown=`ip_to_hex 11 11 11 11` -+ reply_ha=; - if test $d != $s; then -- reply_ha=f000000000$d -- else -- reply_ha= -+ if test $jd != 1; then -+ reply_ha=f000000000$d -+ fi - fi -+ - test_arp $s f000000000$s $sip $tip $reply_ha #9 - test_arp $s f000000000$s $sip $tip_unknown #10 - -@@ -2199,7 +2201,13 @@ for s in 1 2 3; do - sip=192.168.0.$s - tip=192.168.0.$d - tip_unknown=11.11.11.11 -- if test $d != $s; then reply_ha=f0:00:00:00:00:0$d; else reply_ha=; fi -+ reply_ha=; -+ if test $d != $s; then -+ if test $d != 1; then -+ reply_ha=f0:00:00:00:00:0$d; -+ fi -+ fi -+ - test_arp $s f0:00:00:00:00:0$s $sip $tip $reply_ha #9 - test_arp $s f0:00:00:00:00:0$s $sip $tip_unknown #10 - --- -2.24.1 - diff --git a/SOURCES/0001-ovn-northd-Fix-leak-of-lport-addresses-during-DHCPv6.patch b/SOURCES/0001-ovn-northd-Fix-leak-of-lport-addresses-during-DHCPv6.patch deleted file mode 100644 index 7d1d91b..0000000 --- a/SOURCES/0001-ovn-northd-Fix-leak-of-lport-addresses-during-DHCPv6.patch +++ /dev/null @@ -1,62 +0,0 @@ -From c0d305f985cb1a8533950d4f17107d9a71635644 Mon Sep 17 00:00:00 2001 -From: Ilya Maximets -Date: Tue, 12 May 2020 12:44:06 +0200 -Subject: [PATCH 1/2] ovn-northd: Fix leak of lport addresses during DHCPv6 - reply handling. - -'lrp_networks' never destroyed but constantly overwritten in a loop that -handles DHCPv6 replies. In some cases this point leaks several MB per -minute making ovn-northd to constantly growing its memory consumption: - - 399,820,764 bytes in 1,885,947 blocks are definitely lost in loss record 182 of 182 - at 0x4839748: malloc (vg_replace_malloc.c:308) - by 0x483BD63: realloc (vg_replace_malloc.c:836) - by 0x1E7BF8: xrealloc (util.c:149) - by 0x152723: add_ipv6_netaddr.isra.0 (ovn-util.c:55) - by 0x152F1C: extract_lrp_networks (ovn-util.c:275) - by 0x142EE2: build_lrouter_flows (ovn-northd.c:8607) - by 0x142EE2: build_lflows.isra.0 (ovn-northd.c:10296) - by 0x14E4F8: ovnnb_db_run (ovn-northd.c:11128) - by 0x14E4F8: ovn_db_run (ovn-northd.c:11672) - by 0x13304D: main (ovn-northd.c:12035) - -In fact, there is no need to allocate this memory at all, since all the -required information is already available in 'op->lrp_networks'. - -Reported-by: Joe Talerico -Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1827769 -Fixes: 5c1d2d230773 ("northd: Add logical flows for dhcpv6 pfd parsing") -Signed-off-by: Ilya Maximets -Acked-by: Mark Michelson -Signed-off-by: Numan Siddique ---- - northd/ovn-northd.c | 9 ++------- - 1 file changed, 2 insertions(+), 7 deletions(-) - -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index b07e68cfa..c1cdb2280 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -8603,17 +8603,12 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - continue; - } - -- struct lport_addresses lrp_networks; -- if (!extract_lrp_networks(op->nbrp, &lrp_networks)) { -- continue; -- } -- -- for (size_t i = 0; i < lrp_networks.n_ipv6_addrs; i++) { -+ for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { - ds_clear(&actions); - ds_clear(&match); - ds_put_format(&match, "ip6.dst == %s && udp.src == 547 &&" - " udp.dst == 546", -- lrp_networks.ipv6_addrs[i].addr_s); -+ op->lrp_networks.ipv6_addrs[i].addr_s); - ds_put_format(&actions, "reg0 = 0; handle_dhcpv6_reply;"); - ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, - ds_cstr(&match), ds_cstr(&actions)); --- -2.26.2 - diff --git a/SOURCES/0001-ovn-northd-Fix-multiple-ARP-replies-for-SNAT-entries.patch b/SOURCES/0001-ovn-northd-Fix-multiple-ARP-replies-for-SNAT-entries.patch new file mode 100644 index 0000000..f370ad9 --- /dev/null +++ b/SOURCES/0001-ovn-northd-Fix-multiple-ARP-replies-for-SNAT-entries.patch @@ -0,0 +1,393 @@ +From eec06481841c053a00f04849a305b66b2d274f2a Mon Sep 17 00:00:00 2001 +From: Numan Siddique +Date: Sat, 12 Sep 2020 18:25:53 +0530 +Subject: [PATCH] ovn-northd: Fix multiple ARP replies for SNAT entries + configured on a distributed router. + +The commit in the Fixes tag, while addressing the issue to send ARP replies for +the SNAT entries, didn't take into account the gateway router port scenario. +Because of this, all the chassis which have bridge mappings configured reply +to ARP request for SNAT entries. This patch fixes the issue by adding +"is_chassis_resident()" condition for such flows so that only the gateway chassis +which claims the gateway router port responds to the ARP request. + +Note: This patch doesn't require any changes to ovn-northd.8.xml as it was already +documented with the desired flows for SNAT entries. + +Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2020-September/050679.html +Reported-by: Chris +Fixes: e2aa124ff7c2("ovn-northd: Add ARP responder flows for SNAT entries.") +Acked-by: Dumitru Ceara +Signed-off-by: Numan Siddique +--- + TODO.rst | 4 + + northd/ovn-northd.c | 14 ++- + tests/ovn-northd.at | 5 + + tests/ovn.at | 260 ++++++++++++++++++++++++++++++++++++++++++++ + 4 files changed, 279 insertions(+), 4 deletions(-) + +diff --git a/TODO.rst b/TODO.rst +index 98b86cef8..53ca2057e 100644 +--- a/TODO.rst ++++ b/TODO.rst +@@ -25,6 +25,10 @@ + OVN To-do List + ============== + ++* Refactor ovn-northd code to have separate functions to add logical flows ++ for gateway logical routers and logical routers with distributed gateway ++ port. ++ + * Get incremental updates in ovn-controller and ovn-northd in some + sensible way. + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 63c58ca6a..e5e524ec3 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -8678,10 +8678,9 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + ext_addrs->ipv4_addrs[0].addr_s; + + if (!strcmp(nat->type, "snat")) { +- if (sset_contains(&snat_ips, ext_addr)) { ++ if (!sset_add(&snat_ips, ext_addr)) { + continue; + } +- sset_add(&snat_ips, ext_addr); + } + + /* Priority 91 and 92 flows are added for each gateway router +@@ -9063,6 +9062,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + continue; + } + ++ struct sset sset_snat_ips = SSET_INITIALIZER(&sset_snat_ips); + for (size_t i = 0; i < op->od->nbr->n_nat; i++) { + struct ovn_nat *nat_entry = &op->od->nat_entries[i]; + const struct nbrec_nat *nat = nat_entry->nb; +@@ -9072,8 +9072,14 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + continue; + } + ++ struct lport_addresses *ext_addrs = &nat_entry->ext_addrs; ++ char *ext_addr = (nat_entry_is_v6(nat_entry) ++ ? ext_addrs->ipv6_addrs[0].addr_s ++ : ext_addrs->ipv4_addrs[0].addr_s); + if (!strcmp(nat->type, "snat")) { +- continue; ++ if (!sset_add(&sset_snat_ips, ext_addr)) { ++ continue; ++ } + } + + /* Mac address to use when replying to ARP/NS. */ +@@ -9115,7 +9121,6 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + /* Respond to ARP/NS requests on the chassis that binds the gw + * port. Drop the ARP/NS requests on other chassis. + */ +- struct lport_addresses *ext_addrs = &nat_entry->ext_addrs; + if (nat_entry_is_v6(nat_entry)) { + build_lrouter_nd_flow(op->od, op, "nd_na", + ext_addrs->ipv6_addrs[0].addr_s, +@@ -9138,6 +9143,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + &nat->header_, lflows); + } + } ++ sset_destroy(&sset_snat_ips); + } + + /* DHCPv6 reply handling */ +diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at +index 86eee3542..70ae8b92b 100644 +--- a/tests/ovn-northd.at ++++ b/tests/ovn-northd.at +@@ -1754,6 +1754,8 @@ action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = fe80::200:ff:fe00:100 + # Priority 91 drop flows (per distributed gw port), if port is not resident. + AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=91" | grep "arp\|nd" | sort], [0], [dnl + table=3 (lr_in_ip_input ), priority=91 , dnl ++match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.150), action=(drop;) ++ table=3 (lr_in_ip_input ), priority=91 , dnl + match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.2), dnl + action=(drop;) + table=3 (lr_in_ip_input ), priority=91 , dnl +@@ -1767,6 +1769,9 @@ action=(drop;) + # Priority 92 ARP/NS responders (per distributed gw port), if port is resident. + AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=92" | grep "arp\|nd" | sort], [0], [dnl + table=3 (lr_in_ip_input ), priority=92 , dnl ++match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.150 && is_chassis_resident("cr-lrp-public")), dnl ++action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.150; outport = inport; flags.loopback = 1; output;) ++ table=3 (lr_in_ip_input ), priority=92 , dnl + match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.2 && is_chassis_resident("cr-lrp-public")), dnl + action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.2; outport = inport; flags.loopback = 1; output;) + table=3 (lr_in_ip_input ), priority=92 , dnl +diff --git a/tests/ovn.at b/tests/ovn.at +index 2bef87b40..7cc1756e1 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -21439,3 +21439,263 @@ OVS_WAIT_UNTIL([test x$(as hv1 ovn-appctl -t ovn-controller debug/status) = "xru + + OVN_CLEANUP([hv1]) + AT_CLEANUP ++ ++AT_SETUP([ovn -- ARP replies for SNAT external ips]) ++ovn_start ++ ++net_add n1 ++ ++sim_add hv1 ++as hv1 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.1 ++ovs-vsctl -- add-port br-int hv1-vif1 -- \ ++ set interface hv1-vif1 external-ids:iface-id=sw0-port1 \ ++ options:tx_pcap=hv1/vif1-tx.pcap \ ++ options:rxq_pcap=hv1/vif1-rx.pcap \ ++ ofport-request=1 ++ovs-vsctl -- add-port br-int hv1-vif2 -- \ ++ set interface hv1-vif2 external-ids:iface-id=sw3-port1 \ ++ options:tx_pcap=hv1/vif2-tx.pcap \ ++ options:rxq_pcap=hv1/vif2-rx.pcap \ ++ ofport-request=2 ++AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=physnet1:br-phys]) ++ovs-vsctl set open . external_ids:ovn-enable-lflow-cache=false ++ ++sim_add hv2 ++as hv2 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.2 ++ovs-vsctl -- add-port br-int hv2-vif1 -- \ ++ set interface hv2-vif1 external-ids:iface-id=sw0-port2 \ ++ options:tx_pcap=hv2/vif1-tx.pcap \ ++ options:rxq_pcap=hv2/vif1-rx.pcap \ ++ ofport-request=1 ++AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=physnet1:br-phys]) ++ovs-vsctl set open . external_ids:ovn-enable-lflow-cache=false ++ ++sim_add hv3 ++as hv3 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.3 ++ovs-vsctl -- add-port br-int hv3-vif1 -- \ ++ set interface hv3-vif1 external-ids:iface-id=sw1-port1 \ ++ options:tx_pcap=hv3/vif1-tx.pcap \ ++ options:rxq_pcap=hv3/vif1-rx.pcap \ ++ ofport-request=1 ++AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=physnet1:br-phys]) ++ovs-vsctl set open . external_ids:ovn-enable-lflow-cache=false ++ ++ovn-nbctl ls-add sw0 ++ovn-nbctl lsp-add sw0 sw0-port1 ++ovn-nbctl lsp-set-addresses sw0-port1 "10:54:00:00:00:03 10.0.0.3 1000::3" ++ovn-nbctl lsp-add sw0 sw0-port2 ++ovn-nbctl lsp-set-addresses sw0-port2 "10:54:00:00:00:04 10.0.0.4 1000::4" ++ ++ovn-nbctl ls-add sw1 ++ovn-nbctl lsp-add sw1 sw1-port1 ++ovn-nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3" ++ ++ovn-nbctl lr-add lr0 ++ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64 ++ovn-nbctl lsp-add sw0 sw0-lr0 ++ovn-nbctl lsp-set-type sw0-lr0 router ++ovn-nbctl lsp-set-addresses sw0-lr0 router ++ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 ++ ++ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64 ++ovn-nbctl lsp-add sw1 sw1-lr0 ++ovn-nbctl lsp-set-type sw1-lr0 router ++ovn-nbctl lsp-set-addresses sw1-lr0 router ++ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1 ++ ++ovn-nbctl ls-add public ++ovn-nbctl lrp-add lr0 lr0-public 00:00:20:20:12:13 172.16.0.100/24 3000::a/64 ++ovn-nbctl lsp-add public public-lr0 ++ovn-nbctl lsp-set-type public-lr0 router ++ovn-nbctl lsp-set-addresses public-lr0 router ++ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public ++ ++# localnet port ++ovn-nbctl lsp-add public ln-public ++ovn-nbctl lsp-set-type ln-public localnet ++ovn-nbctl lsp-set-addresses ln-public unknown ++ovn-nbctl lsp-set-options ln-public network_name=physnet1 ++ ++# schedule the gw router port to a chassis. ++ovn-nbctl lrp-set-gateway-chassis lr0-public hv1 20 ++ ++# Create NAT entries for the ports ++ ++# sw0-port1 ++ovn-nbctl lr-nat-add lr0 dnat_and_snat 172.16.0.110 10.0.0.3 sw0-port1 30:54:00:00:00:03 ++ovn-nbctl lr-nat-add lr0 dnat_and_snat 3000::c 1000::3 sw0-port1 40:54:00:00:00:03 ++# sw1-port1 ++ovn-nbctl lr-nat-add lr0 dnat_and_snat 172.16.0.120 20.0.0.3 sw1-port1 30:54:00:00:00:04 ++ovn-nbctl lr-nat-add lr0 dnat_and_snat 3000::d 2000::3 sw1-port1 40:54:00:00:00:04 ++ ++# Add snat entriess ++ovn-nbctl lr-nat-add lr0 snat 172.16.0.100 10.0.0.0/24 ++ovn-nbctl lr-nat-add lr0 snat 172.16.0.101 10.0.0.10 ++ovn-nbctl lr-nat-add lr0 snat 172.16.0.102 10.0.0.11 ++ovn-nbctl lr-nat-add lr0 snat 172.16.0.100 20.0.0.0/24 ++ ++ovn-nbctl ls-add sw3 ++ovn-nbctl lsp-add sw3 sw3-port1 ++ovn-nbctl lsp-set-addresses sw3-port1 "20:14:00:00:00:03 30.0.0.3 3000::3" ++ ++ovn-nbctl lr-add lr1 ++ovn-nbctl lrp-add lr1 lr1-sw3 00:00:00:10:ff:03 30.0.0.1/24 3000::a/64 ++ovn-nbctl lsp-add sw3 sw3-lr1 ++ovn-nbctl lsp-set-type sw3-lr1 router ++ovn-nbctl lsp-set-addresses sw3-lr1 router ++ovn-nbctl lsp-set-options sw3-lr1 router-port=lr1-sw3 ++ ++ovn-nbctl ls-add join ++ ++# Connect lr1 to join ++ovn-nbctl lrp-add lr1 lr1-join 00:00:04:01:02:03 170.0.0.1/24 ++ovn-nbctl lsp-add join join-lr1 ++ovn-nbctl lsp-set-type join-lr1 router ++ovn-nbctl lsp-set-addresses join-lr1 router ++ovn-nbctl lsp-set-options join-lr1 router-port=lr1-join ++ ++# Create GW router ++ovn-nbctl lr-add gw_router ++# connect gw_router to join ++ovn-nbctl lrp-add gw_router gw_router-join 00:00:03:11:12:13 170.0.0.2/24 ++ovn-nbctl lsp-add join join-gw_router ++ovn-nbctl lsp-set-type join-gw_router router ++ovn-nbctl lsp-set-addresses join-gw_router router ++ovn-nbctl lsp-set-options join-gw_router router-port=gw_router-join ++ ++# Connect gw_router to public ++ovn-nbctl lrp-add gw_router gw_router-public 00:00:30:30:32:33 172.16.0.200/24 3000::b/64 ++ovn-nbctl lsp-add public public-gw_router ++ovn-nbctl lsp-set-type public-gw_router router ++ovn-nbctl lsp-set-addresses public-gw_router router ++ovn-nbctl lsp-set-options public-gw_router router-port=gw_router-public ++ ++# Pin gw_router to hv3 ++ovn-nbctl set logical_router gw_router options:chassis=hv3 ++ ++# Add snat entries on gw_router ++ovn-nbctl lr-nat-add gw_router snat 172.16.0.200 30.0.0.0/24 ++ovn-nbctl lr-nat-add gw_router snat 172.16.0.201 30.0.0.3 ++ ++ovn-nbctl --wait=hv sync ++ ++# Create an interface in br-phys in hv2 and send ARP request for 172.16.0.100 ++as hv2 ++ovs-vsctl -- add-port br-phys hv2-phys1 -- \ ++ set interface hv2-phys1 options:tx_pcap=hv2/phys1-tx.pcap \ ++ options:rxq_pcap=hv2/phys1-rx.pcap \ ++ ofport-request=1 ++ ++reset_pcap_file() { ++ local iface=$1 ++ local pcap_file=$2 ++ ovs-vsctl -- set Interface $iface options:tx_pcap=dummy-tx.pcap \ ++options:rxq_pcap=dummy-rx.pcap ++ rm -f ${pcap_file}*.pcap ++ ovs-vsctl -- set Interface $iface options:tx_pcap=${pcap_file}-tx.pcap \ ++options:rxq_pcap=${pcap_file}-rx.pcap ++} ++ ++ip_to_hex() { ++ printf "%02x%02x%02x%02x" "$@" ++} ++ ++send_arp_request() { ++ local eth_src=$1 spa=$2 tpa=$3 ++ local eth_dst=ffffffffffff ++ local eth_type=0806 ++ local eth=${eth_dst}${eth_src}${eth_type} ++ ++ local arp=0001080006040001${eth_src}${spa}${eth_dst}${tpa} ++ ++ local request=${eth}${arp} ++ as hv2 ovs-appctl netdev-dummy/receive hv2-phys1 $request ++} ++ ++test_arp_response () { ++ local router_mac=$1 router_ip=$2 gw=$3 nongw1=$4 nongw2=$5 ++ ++ echo "Checking arp reply for IP - $router_ip" ++ as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1 ++ as hv2 reset_pcap_file br-phys_n1 hv2/br-phys_n1 ++ as hv3 reset_pcap_file br-phys_n1 hv3/br-phys_n1 ++ as hv2 reset_pcap_file hv2-phys1 hv2/phys1 ++ ++ local src_mac=000200100011 ++ src_ip=$(ip_to_hex 172 16 0 40) ++ send_arp_request ${src_mac} ${src_ip} ${router_ip} ++ arp_reply=${src_mac}${router_mac}08060001080006040002${router_mac} ++ arp_reply=${arp_reply}${router_ip}${src_mac}${src_ip} ++ ++ OVS_WAIT_UNTIL([ ++ test $($PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/phys1-tx.pcap | wc -l) -ge 1 ++ ]) ++ ++ AT_CHECK([$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/phys1-tx.pcap | \ ++ grep -c $arp_reply], [0], [1 ++]) ++ ++ # $gw phys1-n1 should see the response because $gw ovn-controller responds ++ # to arp request. ++ AT_CHECK([$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" $gw/br-phys_n1-tx.pcap | \ ++ grep -c $arp_reply], [0], [1 ++]) ++ ++ # $nongw1 and $nongw1 phys1-n1 should not see the response. ++ $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" $nongw1/br-phys_n1-tx.pcap ++ AT_CHECK([$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" $nongw1/br-phys_n1-tx.pcap | \ ++ grep -c $arp_reply], [1], [0 ++]) ++ ++ AT_CHECK([$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" $nongw2/br-phys_n1-tx.pcap | \ ++ grep -c $arp_reply], [1], [0 ++]) ++} ++ ++# Send ARP request for the IPs which belongs to lr0 having ++# distributed gw router port - lr0-public. ++test_arp_response 000020201213 $(ip_to_hex 172 16 0 100) hv1 hv2 hv3 ++test_arp_response 000020201213 $(ip_to_hex 172 16 0 101) hv1 hv2 hv3 ++test_arp_response 000020201213 $(ip_to_hex 172 16 0 102) hv1 hv2 hv3 ++ ++# Send ARP request for the IP which belongs to gw_router ++test_arp_response 000030303233 $(ip_to_hex 172 16 0 200) hv3 hv1 hv2 ++test_arp_response 000030303233 $(ip_to_hex 172 16 0 201) hv3 hv1 hv2 ++ ++# Make hv3 claim the cr-lr0-public ++ovn-nbctl lrp-set-gateway-chassis lr0-public hv1 20 ++ovn-nbctl lrp-set-gateway-chassis lr0-public hv2 30 ++ovn-nbctl lrp-set-gateway-chassis lr0-public hv3 40 ++ ++hv3_uuid=$(ovn-sbctl --bare --columns _uuid list chassis hv3) ++ ++OVS_WAIT_UNTIL([ ++ cr_lr0_public_ch=$(ovn-sbctl --bare --columns chassis list port_binding cr-lr0-public) ++ test $cr_lr0_public_ch = $hv3_uuid ++]) ++ ++test_arp_response 000020201213 $(ip_to_hex 172 16 0 100) hv3 hv1 hv2 ++test_arp_response 000020201213 $(ip_to_hex 172 16 0 101) hv3 hv1 hv2 ++test_arp_response 000020201213 $(ip_to_hex 172 16 0 102) hv3 hv1 hv2 ++ ++# Schedule gw_router on hv1. ++ovn-nbctl set logical_router gw_router options:chassis=hv1 ++hv1_uuid=$(ovn-sbctl --bare --columns _uuid list chassis hv1) ++ ++OVS_WAIT_UNTIL([ ++ gw_router_ch=$(ovn-sbctl --bare --columns chassis list port_binding gw_router-public) ++ test $gw_router_ch = $hv1_uuid ++]) ++ ++# Send ARP request for the IP which belongs to gw_router ++test_arp_response 000030303233 $(ip_to_hex 172 16 0 200) hv1 hv2 hv3 ++test_arp_response 000030303233 $(ip_to_hex 172 16 0 201) hv1 hv2 hv3 ++ ++OVN_CLEANUP([hv1],[hv2],[hv3]) ++AT_CLEANUP +-- +2.26.2 + diff --git a/SOURCES/0001-ovn-northd-Fix-the-missing-lflow-issue-in-LS_OUT_PRE.patch b/SOURCES/0001-ovn-northd-Fix-the-missing-lflow-issue-in-LS_OUT_PRE.patch deleted file mode 100644 index 3d0d04c..0000000 --- a/SOURCES/0001-ovn-northd-Fix-the-missing-lflow-issue-in-LS_OUT_PRE.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 7aa75981dfc17eb7f0ac9ee7300e346f3b6a0c8e Mon Sep 17 00:00:00 2001 -From: Numan Siddique -Date: Tue, 7 Jul 2020 18:30:20 +0530 -Subject: [PATCH 1/5] ovn-northd: Fix the missing lflow issue in LS_OUT_PRE_LB. - -When load balancer(s) configured with VIPs are associated to a logical switch, -then ovn-northd adds the below logical flow so that the packets in the egress -switch pipeline enter the conntrack. - -table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) - -ovn-northd maintains a local boolean variable 'vip_configured' in -build_pre_lb() and adds the above lflow if this is true at the end. -But this variable is overriden as -> vip_configured = !!lb->n_vips; -when it loops through every load balancer associated with the logical switch. - -This is wrong and this patch fixes this issue. - -A test case is addd to test this scenario and the test case fails without the -fix in this patch. - -Fixes: bb9f2b9ce56c("ovn-northd: Consider load balancer active backends in router pipeline") - -Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1849162 -Reported-by: Tim Rozet -Acked-by: Dumitru Ceara -Signed-off-by: Numan Siddique - -(cherry-picked from master commit 59af6f9048946e16813ad7ad4e453b85989670e4) ---- - northd/ovn-northd.c | 2 +- - tests/ovn-northd.at | 73 +++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 74 insertions(+), 1 deletion(-) - -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index 8a809d020..2b891c68f 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -4932,7 +4932,7 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, - * table, we will eventually look at L4 information. */ - } - -- vip_configured = !!lb->n_vips; -+ vip_configured = (vip_configured || lb->n_vips); - } - - /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send -diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at -index 37805d3d8..842800b90 100644 ---- a/tests/ovn-northd.at -+++ b/tests/ovn-northd.at -@@ -1485,3 +1485,76 @@ lsp2 - ]) - - AT_CLEANUP -+ -+# This test case tests that when a logical switch has load balancers associated -+# (with VIPs configured), the below logical flow is added by ovn-northd. -+# table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) -+# This test case is added for the BZ - -+# https://bugzilla.redhat.com/show_bug.cgi?id=1849162 -+# -+# ovn-northd was not adding the above lflow if the last load balancer associated -+# to the logical switch doesn't have the VIP configured even if other load -+# balancers before the last one in the last have VIPs configured. -+# So make sure that the above lflow is added even if one load balancer has VIP -+# associated. -+ -+AT_SETUP([ovn -- Load balancer - missing ls_out_pre_lb flows]) -+ovn_start -+ -+ovn-nbctl ls-add sw0 -+ovn-nbctl lsp-add sw0 sw0-p1 -+ -+ovn-nbctl lb-add lb1 "10.0.0.10" "10.0.0.3" -+ovn-nbctl lb-add lb2 "10.0.0.11" "10.0.0.4" -+ -+ovn-nbctl --wait=sb sync -+AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl -+]) -+ -+ovn-nbctl ls-lb-add sw0 lb1 -+ovn-nbctl --wait=sb sync -+AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl -+ table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) -+]) -+ -+ovn-nbctl ls-lb-add sw0 lb2 -+ovn-nbctl --wait=sb sync -+AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl -+ table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) -+]) -+ -+lb1_uuid=$(ovn-nbctl --bare --columns _uuid find load_balancer name=lb1) -+lb2_uuid=$(ovn-nbctl --bare --columns _uuid find load_balancer name=lb2) -+ -+ovn-nbctl clear load_balancer $lb1_uuid vips -+ovn-nbctl --wait=sb sync -+AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl -+ table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) -+]) -+ -+ovn-nbctl clear load_balancer $lb2_uuid vips -+ovn-nbctl --wait=sb sync -+AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl -+]) -+ -+ovn-nbctl set load_balancer $lb1_uuid vips:"10.0.0.10"="10.0.0.3" -+ovn-nbctl set load_balancer $lb2_uuid vips:"10.0.0.11"="10.0.0.4" -+ -+ovn-nbctl --wait=sb sync -+AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl -+ table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) -+]) -+ -+# Now reverse the order of clearing the vip. -+ovn-nbctl clear load_balancer $lb2_uuid vips -+ovn-nbctl --wait=sb sync -+AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl -+ table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) -+]) -+ -+ovn-nbctl clear load_balancer $lb1_uuid vips -+ovn-nbctl --wait=sb sync -+AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl -+]) -+ -+AT_CLEANUP --- -2.26.2 - diff --git a/SOURCES/0001-ovn-northd-Forward-ARP-requests-on-localnet-ports.patch b/SOURCES/0001-ovn-northd-Forward-ARP-requests-on-localnet-ports.patch deleted file mode 100644 index 742c212..0000000 --- a/SOURCES/0001-ovn-northd-Forward-ARP-requests-on-localnet-ports.patch +++ /dev/null @@ -1,321 +0,0 @@ -From 20bf8b836703f3ad984e6679510ea215655479a5 Mon Sep 17 00:00:00 2001 -From: Dumitru Ceara -Date: Tue, 24 Mar 2020 11:03:29 +0100 -Subject: [PATCH ovn] ovn-northd: Forward ARP requests on localnet ports. - -Commit 32f5ebb06226 limited the ARP/ND broadcast domain but in scenarios -where ARP responder flows are installed only on chassis that own the -associated logical ports ARP requests should still be forwarded on -localnet ports because the router pipeline should be executed on the -chassis that owns the logical port. Only that chassis will reply to the -ARP/ND request. - -Reported-by: Michael Plato -Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2020-March/049856.html -Fixes: 32f5ebb06226 ("ovn-northd: Limit ARP/ND broadcast domain whenever possible.") -Signed-off-by: Dumitru Ceara -Signed-off-by: Numan Siddique -(cherry picked from upstream commit 9f13732ae232aa0c872527d948435125b3a6cbce) - -Change-Id: I86b18043821f24bf491c9b85381c5d008f7f2307 ---- - northd/ovn-northd.8.xml | 3 +- - northd/ovn-northd.c | 6 +- - tests/ovn.at | 169 ++++++++++++++++++++++++++++++++++++++++++++---- - 3 files changed, 162 insertions(+), 16 deletions(-) - -diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml -index 7d03cbc..1e0993e 100644 ---- a/northd/ovn-northd.8.xml -+++ b/northd/ovn-northd.8.xml -@@ -1194,7 +1194,8 @@ output; - Priority-75 flows for each IP address/VIP/NAT address owned by a - router port connected to the switch. These flows match ARP requests - and ND packets for the specific IP addresses. Matched packets are -- forwarded only to the router that owns the IP address. -+ forwarded only to the router that owns the IP address and, if -+ present, to the localnet port of the logical switch. - - -
  • -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index 356c543..787ca2f 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -5899,8 +5899,12 @@ build_lswitch_rport_arp_req_flow_for_ip(struct sset *ips, - ds_put_cstr(&match, "}"); - - /* Send a the packet only to the router pipeline and skip flooding it -- * in the broadcast domain. -+ * in the broadcast domain (except for the localnet port). - */ -+ if (od->localnet_port) { -+ ds_put_format(&actions, "clone { outport = %s; output; }; ", -+ od->localnet_port->json_key); -+ } - ds_put_format(&actions, "outport = %s; output;", patch_op->json_key); - ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_L2_LKUP, priority, - ds_cstr(&match), ds_cstr(&actions), stage_hint); -diff --git a/tests/ovn.at b/tests/ovn.at -index 1b6073f..4baf2e9 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -17928,13 +17928,14 @@ net_add n1 - sim_add hv1 - as hv1 - ovs-vsctl add-br br-phys -+ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys - ovn_attach n1 br-phys 192.168.0.1 - --ovs-vsctl -- add-port br-int hv1-vif1 -- \ -- set interface hv1-vif1 external-ids:iface-id=sw-agg-ext \ -- options:tx_pcap=hv1/vif1-tx.pcap \ -- options:rxq_pcap=hv1/vif1-rx.pcap \ -- ofport-request=1 -+sim_add hv2 -+as hv2 -+ovs-vsctl add-br br-phys -+ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys -+ovn_attach n1 br-phys 192.168.0.2 - - # One Aggregation Switch connected to two Logical networks (routers). - ovn-nbctl ls-add sw-agg -@@ -17950,18 +17951,66 @@ ovn-nbctl lsp-add sw-agg sw-rtr2 \ - -- lsp-set-addresses sw-rtr2 00:00:00:00:02:00 \ - -- lsp-set-options sw-rtr2 router-port=rtr2-sw - --# Configure L3 interface IPv4 & IPv6 on both routers -+# Localnet port on the Aggregation Switch. -+ovn-nbctl lsp-add sw-agg sw-agg-ln -+ovn-nbctl lsp-set-addresses sw-agg-ln unknown -+ovn-nbctl lsp-set-type sw-agg-ln localnet -+ovn-nbctl lsp-set-options sw-agg-ln network_name=phys -+ -+# Configure L3 interface IPv4 & IPv6 on both routers. - ovn-nbctl lr-add rtr1 - ovn-nbctl lrp-add rtr1 rtr1-sw 00:00:00:00:01:00 10.0.0.1/24 10::1/64 - -+ovn-nbctl lrp-add rtr1 rtr1-sw1 00:00:01:00:00:00 20.0.0.1/24 20::1/64 -+ - ovn-nbctl lr-add rtr2 - ovn-nbctl lrp-add rtr2 rtr2-sw 00:00:00:00:02:00 10.0.0.2/24 10::2/64 - -+# Configure router gateway ports. -+ovn-nbctl lrp-set-gateway-chassis rtr1-sw hv1 20 -+ovn-nbctl lrp-set-gateway-chassis rtr2-sw hv1 20 -+ -+# One private network behind rtr1 with two VMs. -+ovn-nbctl ls-add sw1 -+ovn-nbctl lsp-add sw1 sw1-p1 \ -+ -- lsp-set-addresses sw1-p1 00:00:00:01:00:00 -+ovn-nbctl lsp-add sw1 sw1-p2 \ -+ -- lsp-set-addresses sw1-p2 00:00:00:02:00:00 -+ovn-nbctl lsp-add sw1 sw1-rtr1 \ -+ -- lsp-set-type sw1-rtr1 router \ -+ -- lsp-set-addresses sw1-rtr1 00:00:01:00:00:00 \ -+ -- lsp-set-options sw1-rtr1 router-port=rtr1-sw1 -+ -+# Bind a "VM" connected to sw-agg on hv1. -+as hv1 -+ovs-vsctl -- add-port br-int hv1-vif0 -- \ -+ set interface hv1-vif0 external-ids:iface-id=sw-agg-ext \ -+ options:tx_pcap=hv1/vif0-tx.pcap \ -+ options:rxq_pcap=hv1/vif0-rx.pcap \ -+ ofport-request=1 -+ -+# Bind a "VM" connected to sw1 on hv1. -+as hv1 -+ovs-vsctl -- add-port br-int hv1-vif1 -- \ -+ set interface hv1-vif1 external-ids:iface-id=sw1-p1 \ -+ options:tx_pcap=hv1/vif1-tx.pcap \ -+ options:rxq_pcap=hv1/vif1-rx.pcap \ -+ ofport-request=2 -+ -+# Bind a "VM" connected to sw1 on hv2. -+as hv2 -+ovs-vsctl -- add-port br-int hv1-vif2 -- \ -+ set interface hv1-vif2 external-ids:iface-id=sw1-p2 \ -+ options:tx_pcap=hv1/vif2-tx.pcap \ -+ options:rxq_pcap=hv1/vif2-rx.pcap \ -+ ofport-request=3 -+ - OVN_POPULATE_ARP - ovn-nbctl --wait=hv sync - - sw_dp_uuid=$(ovn-sbctl --bare --columns _uuid list datapath_binding sw-agg) - sw_dp_key=$(ovn-sbctl --bare --columns tunnel_key list datapath_binding sw-agg) -+r1_dp_key=$(ovn-sbctl --bare --columns tunnel_key list datapath_binding rtr1) - - r1_tnl_key=$(ovn-sbctl --bare --columns tunnel_key list port_binding sw-rtr1) - r2_tnl_key=$(ovn-sbctl --bare --columns tunnel_key list port_binding sw-rtr2) -@@ -17970,9 +18019,10 @@ mc_key=$(ovn-sbctl --bare --columns tunnel_key find multicast_group datapath=${s - mc_key=$(printf "%04x" $mc_key) - - match_sw_metadata="metadata=0x${sw_dp_key}" -+match_r1_metadata="metadata=0x${r1_dp_key}" - - # Inject ARP request for first router owned IP address. --send_arp_request 1 1 ${src_mac} $(ip_to_hex 10 0 0 254) $(ip_to_hex 10 0 0 1) -+send_arp_request 1 0 ${src_mac} $(ip_to_hex 10 0 0 254) $(ip_to_hex 10 0 0 1) - - # Verify that the ARP request is sent only to rtr1. - match_arp_req="priority=75.*${match_sw_metadata}.*arp_tpa=10.0.0.1,arp_op=1" -@@ -18001,7 +18051,7 @@ OVS_WAIT_UNTIL([ - # Inject ND_NS for ofirst router owned IP address. - src_ipv6=00100000000000000000000000000254 - dst_ipv6=00100000000000000000000000000001 --send_nd_ns 1 1 ${src_mac} ${src_ipv6} ${dst_ipv6} 751d -+send_nd_ns 1 0 ${src_mac} ${src_ipv6} ${dst_ipv6} 751d - - # Verify that the ND_NS is sent only to rtr1. - match_nd_ns="priority=75.*${match_sw_metadata}.*icmp_type=135.*nd_target=10::1" -@@ -18038,7 +18088,7 @@ ovn-nbctl lr-lb-add rtr2 lb2-v6 - ovn-nbctl --wait=hv sync - - # Inject ARP request for first router owned VIP address. --send_arp_request 1 1 ${src_mac} $(ip_to_hex 10 0 0 254) $(ip_to_hex 10 0 0 11) -+send_arp_request 1 0 ${src_mac} $(ip_to_hex 10 0 0 254) $(ip_to_hex 10 0 0 11) - - # Verify that the ARP request is sent only to rtr1. - match_arp_req="priority=75.*${match_sw_metadata}.*arp_tpa=10.0.0.11,arp_op=1" -@@ -18067,7 +18117,7 @@ OVS_WAIT_UNTIL([ - # Inject ND_NS for first router owned VIP address. - src_ipv6=00100000000000000000000000000254 - dst_ipv6=00100000000000000000000000000011 --send_nd_ns 1 1 ${src_mac} ${src_ipv6} ${dst_ipv6} 751d -+send_nd_ns 1 0 ${src_mac} ${src_ipv6} ${dst_ipv6} 751d - - # Verify that the ND_NS is sent only to rtr1. - match_nd_ns="priority=75.*${match_sw_metadata}.*icmp_type=135.*nd_target=10::11" -@@ -18091,14 +18141,21 @@ OVS_WAIT_UNTIL([ - test "0" = "${pkts_flooded}" - ]) - --# Configure NAT on both routers -+# Configure NAT on both routers. - ovn-nbctl lr-nat-add rtr1 dnat_and_snat 10.0.0.111 42.42.42.1 - ovn-nbctl lr-nat-add rtr1 dnat_and_snat 10::111 42::1 - ovn-nbctl lr-nat-add rtr2 dnat_and_snat 10.0.0.222 42.42.42.2 - ovn-nbctl lr-nat-add rtr2 dnat_and_snat 10::222 42::2 - -+# Configure FIP1 and FIP2 on rtr1 for sw1-p1 and sw1-p2. -+ovn-nbctl lr-nat-add rtr1 dnat_and_snat 10.0.0.121 20.0.0.11 sw1-p1 00:00:00:01:00:00 -+ovn-nbctl lr-nat-add rtr1 dnat_and_snat 10::121 20::11 sw1-p1 00:00:00:01:00:00 -+ovn-nbctl lr-nat-add rtr1 dnat_and_snat 10.0.0.122 20.0.0.12 sw1-p2 00:00:00:02:00:00 -+ovn-nbctl lr-nat-add rtr1 dnat_and_snat 10::122 20::12 sw1-p2 00:00:00:02:00:00 -+ovn-nbctl --wait=hv sync -+ - # Inject ARP request for first router owned NAT address. --send_arp_request 1 1 ${src_mac} $(ip_to_hex 10 0 0 254) $(ip_to_hex 10 0 0 111) -+send_arp_request 1 0 ${src_mac} $(ip_to_hex 10 0 0 254) $(ip_to_hex 10 0 0 111) - - # Verify that the ARP request is sent only to rtr1. - match_arp_req="priority=75.*${match_sw_metadata}.*arp_tpa=10.0.0.111,arp_op=1" -@@ -18124,10 +18181,50 @@ OVS_WAIT_UNTIL([ - test "0" = "${pkts_flooded}" - ]) - -+# Inject ARP request for FIP1. -+send_arp_request 1 0 ${src_mac} $(ip_to_hex 10 0 0 254) $(ip_to_hex 10 0 0 121) -+ -+# Verify that the ARP request is replied to from hv1 and not hv2. -+match_arp_req="priority=90.*${match_r1_metadata}.*arp_tpa=10.0.0.121,arp_op=1" -+ -+as hv1 -+OVS_WAIT_UNTIL([ -+ pkts_on_rtr1=$(ovs-ofctl dump-flows br-int | \ -+ grep -E "${match_arp_req}" | grep n_packets=1 -c) -+ test "1" = "${pkts_on_rtr1}" -+]) -+ -+as hv2 -+OVS_WAIT_UNTIL([ -+ pkts_on_rtr1=$(ovs-ofctl dump-flows br-int | \ -+ grep -E "${match_arp_req}" | grep n_packets=1 -c) -+ test "0" = "${pkts_on_rtr1}" -+]) -+ -+# Inject ARP request for FIP2. -+send_arp_request 1 0 ${src_mac} $(ip_to_hex 10 0 0 254) $(ip_to_hex 10 0 0 122) -+ -+# Verify that the ARP request is replied to from hv2 and not hv1. -+match_arp_req="priority=90.*${match_r1_metadata}.*arp_tpa=10.0.0.122,arp_op=1" -+ -+as hv2 -+OVS_WAIT_UNTIL([ -+ pkts_on_rtr1=$(ovs-ofctl dump-flows br-int | \ -+ grep -E "${match_arp_req}" | grep n_packets=1 -c) -+ test "1" = "${pkts_on_rtr1}" -+]) -+ -+as hv1 -+OVS_WAIT_UNTIL([ -+ pkts_on_rtr1=$(ovs-ofctl dump-flows br-int | \ -+ grep -E "${match_arp_req}" | grep n_packets=1 -c) -+ test "0" = "${pkts_on_rtr1}" -+]) -+ - # Inject ND_NS for first router owned IP address. - src_ipv6=00100000000000000000000000000254 - dst_ipv6=00100000000000000000000000000111 --send_nd_ns 1 1 ${src_mac} ${src_ipv6} ${dst_ipv6} 751d -+send_nd_ns 1 0 ${src_mac} ${src_ipv6} ${dst_ipv6} 751d - - # Verify that the ND_NS is sent only to rtr1. - match_nd_ns="priority=75.*${match_sw_metadata}.*icmp_type=135.*nd_target=10::111" -@@ -18151,7 +18248,51 @@ OVS_WAIT_UNTIL([ - test "0" = "${pkts_flooded}" - ]) - --OVN_CLEANUP([hv1]) -+# Inject ND_NS for FIP1. -+src_ipv6=00100000000000000000000000000254 -+dst_ipv6=00100000000000000000000000000121 -+send_nd_ns 1 0 ${src_mac} ${src_ipv6} ${dst_ipv6} 72dd -+ -+# Verify that the ND_NS is replied to from hv1 and not hv2. -+match_nd_ns="priority=90.*${match_r1_metadata}.*icmp_type=135.*nd_target=10::121" -+ -+as hv1 -+OVS_WAIT_UNTIL([ -+ pkts_on_rtr1=$(ovs-ofctl dump-flows br-int | \ -+ grep -E "${match_nd_ns}" | grep n_packets=1 -c) -+ test "1" = "${pkts_on_rtr1}" -+]) -+ -+as hv2 -+OVS_WAIT_UNTIL([ -+ pkts_on_rtr1=$(ovs-ofctl dump-flows br-int | \ -+ grep -E "${match_nd_ns}" | grep n_packets=1 -c) -+ test "0" = "${pkts_on_rtr1}" -+]) -+ -+# Inject ND_NS for FIP2. -+src_ipv6=00100000000000000000000000000254 -+dst_ipv6=00100000000000000000000000000122 -+send_nd_ns 1 0 ${src_mac} ${src_ipv6} ${dst_ipv6} 72db -+ -+# Verify that the ND_NS is replied to from hv2 and not hv1. -+match_nd_ns="priority=90.*${match_r1_metadata}.*icmp_type=135.*nd_target=10::122" -+ -+as hv2 -+OVS_WAIT_UNTIL([ -+ pkts_on_rtr1=$(ovs-ofctl dump-flows br-int | \ -+ grep -E "${match_nd_ns}" | grep n_packets=1 -c) -+ test "1" = "${pkts_on_rtr1}" -+]) -+ -+as hv1 -+OVS_WAIT_UNTIL([ -+ pkts_on_rtr1=$(ovs-ofctl dump-flows br-int | \ -+ grep -E "${match_nd_ns}" | grep n_packets=1 -c) -+ test "0" = "${pkts_on_rtr1}" -+]) -+ -+OVN_CLEANUP([hv1], [hv2]) - AT_CLEANUP - - AT_SETUP([ovn -- trace when flow cookie updated]) --- -1.8.3.1 - diff --git a/SOURCES/0001-ovn-northd-Limit-IPv6-ND-NS-RA-RS-to-the-local-netwo.patch b/SOURCES/0001-ovn-northd-Limit-IPv6-ND-NS-RA-RS-to-the-local-netwo.patch deleted file mode 100644 index 9ef0d78..0000000 --- a/SOURCES/0001-ovn-northd-Limit-IPv6-ND-NS-RA-RS-to-the-local-netwo.patch +++ /dev/null @@ -1,205 +0,0 @@ -From d46b2e1f3b31509849441cde28475a8d48a6624f Mon Sep 17 00:00:00 2001 -From: Dumitru Ceara -Date: Fri, 17 Apr 2020 23:54:25 +0200 -Subject: [PATCH] ovn-northd: Limit IPv6 ND NS/RA/RS to the local network. - -Neighbor solicitation packets for router owned IPs are replied to in -table IN_IP_INPUT at a higher priority than flows relay IPv6 multicast -traffic when needed. All other NS/NA packets received at this point can -be safely dropped. - -However, router advertisement and router solicitation packets are -processed at a later stage, in ND_RA_OPTIONS/ND_RA_RESPONSE. These -packets need to be allowed in table IN_IP_INPUT. - -Commit 677a3ba4d66b incorrectly allowed all IPv6 multicast traffic -destined to all-nodes in table IN_IP_INPUT. Instead, only ND_RA and -ND_RS packets should be allowed. All others were either already -processed or should be dropped. If multicast relay is enabled then IPv6 -multicast traffic that's not destined to reserved groups should also be -allowed. - -Furthermore, router solicitation and advertisement packets that don't -get processed in tables ND_RA_OPTIONS/ND_RA_RESPONSE should be dropped -in IN_IP_ROUTING because they should never be routed. - -Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1825334 -Reported-by: Jakub Libosvar -Fixes: 677a3ba4d66b ("ovn: Add MLD support.") -Signed-off-by: Dumitru Ceara -Signed-off-by: Numan Siddique - -(cherry-picked from upstream branch-20.03 commit 0924bcb07ef25f93fde683fe8f15d376eca005ec) - -Change-Id: I1d060e5d40f344b890974e6ad0c0960ea280f050 ---- - northd/ovn-northd.8.xml | 49 +++++++++++++++++++++++++++-------------- - northd/ovn-northd.c | 43 +++++++++++++++++++++++++----------- - 2 files changed, 62 insertions(+), 30 deletions(-) - -diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml -index 82c86f636..efcc4b7fc 100644 ---- a/northd/ovn-northd.8.xml -+++ b/northd/ovn-northd.8.xml -@@ -1668,22 +1668,6 @@ next; - router. -
  • - --
  • --

    -- A priority-87 flow explicitly allows IPv6 multicast traffic that is -- supposed to reach the router pipeline (e.g., neighbor solicitations -- and traffic destined to the All-Routers multicast group). --

    --
  • -- --
  • --

    -- A priority-86 flow allows IP multicast traffic if -- :mcast_relay='true', -- otherwise drops it. --

    --
  • -- -
  • -

    - ICMP echo reply. These flows reply to ICMP echo requests received -@@ -1944,6 +1928,29 @@ nd.tll = external_mac; - packets. -

  • - -+
  • -+

    -+ A priority-84 flow explicitly allows IPv6 multicast traffic that is -+ supposed to reach the router pipeline (i.e., router solicitation -+ and router advertisement packets). -+

    -+
  • -+ -+
  • -+

    -+ A priority-83 flow explicitly drops IPv6 multicast traffic that is -+ destined to reserved multicast groups. -+

    -+
  • -+ -+
  • -+

    -+ A priority-82 flow allows IP multicast traffic if -+ :mcast_relay='true', -+ otherwise drops it. -+

    -+
  • -+ -
  • -

    - UDP port unreachable. Priority-80 flows generate ICMP port -@@ -2440,6 +2447,13 @@ output; -

    - -
      -+
    • -+

      -+ Priority-550 flow that drops IPv6 Router Solicitation/Advertisement -+ packets that were not processed in previous tables. -+

      -+
    • -+ -
    • -

      - Priority-500 flows that match IP multicast traffic destined to -@@ -2457,7 +2471,8 @@ output; - multicast group, which ovn-northd populates with the - logical ports that have - -- :mcast_flood='true'. -+ :mcast_flood='true'. If no router ports are configured -+ to flood multicast traffic the packets are dropped. -

      -
    • - -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index 1f1238d23..f7d3988d7 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -8002,17 +8002,6 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - - /* Priority-90 flows reply to ARP requests and ND packets. */ - -- /* Allow IPv6 multicast traffic that's supposed to reach the -- * router pipeline (e.g., neighbor solicitations). -- */ -- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 87, "ip6.mcast_flood", -- "next;"); -- -- /* Allow multicast if relay enabled (priority 86). */ -- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 86, -- "ip4.mcast || ip6.mcast", -- od->mcast_info.rtr.relay ? "next;" : "drop;"); -- - /* Drop ARP packets (priority 85). ARP request packets for router's own - * IPs are handled with priority-90 flows. - * Drop IPv6 ND packets (priority 85). ND NA packets for router's own -@@ -8021,6 +8010,21 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 85, - "arp || nd", "drop;"); - -+ /* Allow IPv6 multicast traffic that's supposed to reach the -+ * router pipeline (e.g., router solicitations). -+ */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 84, "nd_rs || nd_ra", -+ "next;"); -+ -+ /* Drop other reserved multicast. */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 83, -+ "ip6.mcast_rsvd", "drop;"); -+ -+ /* Allow other multicast if relay enabled (priority 82). */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 82, -+ "ip4.mcast || ip6.mcast", -+ od->mcast_info.rtr.relay ? "next;" : "drop;"); -+ - /* Drop Ethernet local broadcast. By definition this traffic should - * not be forwarded.*/ - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50, -@@ -9520,7 +9524,17 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - * advance to next table (priority 500). - */ - HMAP_FOR_EACH (od, key_node, datapaths) { -- if (!od->nbr || !od->mcast_info.rtr.relay) { -+ if (!od->nbr) { -+ continue; -+ } -+ -+ /* Drop IPv6 multicast traffic that shouldn't be forwarded, -+ * i.e., router solicitation and router advertisement. -+ */ -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550, -+ "nd_rs || nd_ra", "drop;"); -+ -+ if (!od->mcast_info.rtr.relay) { - continue; - } - -@@ -9551,7 +9565,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - } - - /* If needed, flood unregistered multicast on statically configured -- * ports. -+ * ports. Otherwise drop any multicast traffic. - */ - if (od->mcast_info.rtr.flood_static) { - ds_clear(&actions); -@@ -9562,6 +9576,9 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - "ip.ttl--; " - "next; " - "};"); -+ } else { -+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450, -+ "ip4.mcast || ip6.mcast", "drop;"); - } - } - --- -2.25.1 - diff --git a/SOURCES/0001-ovn-northd-Optimize-flows-for-LB-Hairpin-traffic.patch b/SOURCES/0001-ovn-northd-Optimize-flows-for-LB-Hairpin-traffic.patch deleted file mode 100644 index 6299b4e..0000000 --- a/SOURCES/0001-ovn-northd-Optimize-flows-for-LB-Hairpin-traffic.patch +++ /dev/null @@ -1,185 +0,0 @@ -From 74e1bf4dd0f6c62602ab708eabb5534a274a4d75 Mon Sep 17 00:00:00 2001 -From: Dumitru Ceara -Date: Tue, 28 Apr 2020 12:39:26 +0200 -Subject: [PATCH] ovn-northd: Optimize flows for LB Hairpin traffic. - -In order to detect that traffic was hairpinned due to logical switch load -balancers we need to match on source and destination IPs of packets (and -protocol ports potentially) in table ls_in_pre_hairpin. - -For this, until now, we created 2 logical flows for each backend of a load -balancer VIP. However, in scenarios where large load balancers (i.e., -with large numbers of backends) are applied to multiple logical -switches, this might generate logical flow count explosion. - -One optimization is to generate a single logical flow per VIP that -combines all conditions generated for each backend. This reduces load on -the SB DB because of lower number of logical flows and also reduces -overall DB size because of less overhead due to other fields on the -logical_flow records. - -Comparison of various performance aspects when running OVN with the NB -database attached to the bug report on a deployment with all VIFs bound -to a single node (62 load balancer VIPs with 513 load balancer -backends, applied on 106 logical switches): - -Without this patch: -- SB database size: 60MB -- # of pre-hairpin logical flows: 109074 -- # of logical flows: 159414 -- ovn-controller max loop iteration time when processing SB DB: 8803ms -- ovn-northd max loop iteration time: 3988ms - -With this patch: -- SB database size: 29MB (~50% decrease) -- # of pre-hairpin logical flows: 13250 (~88% decrease) -- # of logical flows: 63590 (~60% decrease) -- ovn-controller max loop iteration time when processing SB DB: 5585ms -- ovn-northd max loop iteration time: 1594ms - -Reported-by: Aniket Bhat -Reported-at: https://bugzilla.redhat.com/1827403 -Fixes: 1be8ac65bc60 ("ovn-northd: Support hairpinning for logical switch load balancing.") -Signed-off-by: Dumitru Ceara -Signed-off-by: Numan Siddique -(cherry picked from upstream commit 97e82ae5f135a088c9e95b49122d8217718d23f4) - -Change-Id: Id713209f8bd159e8ad924e91681bab784606faff ---- - northd/ovn-northd.8.xml | 4 +-- - northd/ovn-northd.c | 79 ++++++++++++++++++++++++++++++++----------------- - 2 files changed, 54 insertions(+), 29 deletions(-) - -diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml -index d39e259..1f81742 100644 ---- a/northd/ovn-northd.8.xml -+++ b/northd/ovn-northd.8.xml -@@ -559,14 +559,14 @@ -

      Ingress Table 11: Pre-Hairpin

      -
        -
      • -- For all configured load balancer backends a priority-2 flow that -+ For all configured load balancer VIPs a priority-2 flow that - matches on traffic that needs to be hairpinned, i.e., after load - balancing the destination IP matches the source IP, which sets - reg0[6] = 1 and executes ct_snat(VIP) - to force replies to these packets to come back through OVN. -
      • -
      • -- For all configured load balancer backends a priority-1 flow that -+ For all configured load balancer VIPs a priority-1 flow that - matches on replies to hairpinned traffic, i.e., destination IP is VIP, - source IP is the backend IP and source L4 port is backend port, which - sets reg0[6] = 1 and executes ct_snat;. -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index 0082e2e..5ada3ae 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -5542,52 +5542,77 @@ build_lb_hairpin_rules(struct ovn_datapath *od, struct hmap *lflows, - struct ovn_lb *lb, struct lb_vip *lb_vip, - const char *ip_match, const char *proto) - { -+ if (lb_vip->n_backends == 0) { -+ return; -+ } -+ -+ struct ds action = DS_EMPTY_INITIALIZER; -+ struct ds match_initiator = DS_EMPTY_INITIALIZER; -+ struct ds match_reply = DS_EMPTY_INITIALIZER; -+ struct ds proto_match = DS_EMPTY_INITIALIZER; -+ - /* Ingress Pre-Hairpin table. -- * - Priority 2: SNAT load balanced traffic that needs to be hairpinned. -+ * - Priority 2: SNAT load balanced traffic that needs to be hairpinned: -+ * - Both SRC and DST IP match backend->ip and destination port -+ * matches backend->port. - * - Priority 1: unSNAT replies to hairpinned load balanced traffic. -+ * - SRC IP matches backend->ip, DST IP matches LB VIP and source port -+ * matches backend->port. - */ -+ ds_put_char(&match_reply, '('); - for (size_t i = 0; i < lb_vip->n_backends; i++) { - struct lb_vip_backend *backend = &lb_vip->backends[i]; -- struct ds action = DS_EMPTY_INITIALIZER; -- struct ds match = DS_EMPTY_INITIALIZER; -- struct ds proto_match = DS_EMPTY_INITIALIZER; - - /* Packets that after load balancing have equal source and -- * destination IPs should be hairpinned. SNAT them so that the reply -- * traffic is directed also through OVN. -+ * destination IPs should be hairpinned. - */ - if (lb_vip->vip_port) { -- ds_put_format(&proto_match, " && %s && %s.dst == %"PRIu16, -- proto, proto, backend->port); -+ ds_put_format(&proto_match, " && %s.dst == %"PRIu16, -+ proto, backend->port); - } -- ds_put_format(&match, "%s.src == %s && %s.dst == %s%s", -+ ds_put_format(&match_initiator, "(%s.src == %s && %s.dst == %s%s)", - ip_match, backend->ip, ip_match, backend->ip, - ds_cstr(&proto_match)); -- ds_put_format(&action, REGBIT_HAIRPIN " = 1; ct_snat(%s);", -- lb_vip->vip); -- ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_HAIRPIN, 2, -- ds_cstr(&match), ds_cstr(&action), -- &lb->nlb->header_); - -- /* If the packets are replies for hairpinned traffic, UNSNAT them. */ -+ /* Replies to hairpinned traffic are originated by backend->ip:port. */ - ds_clear(&proto_match); -- ds_clear(&match); - if (lb_vip->vip_port) { -- ds_put_format(&proto_match, " && %s && %s.src == %"PRIu16, -- proto, proto, backend->port); -+ ds_put_format(&proto_match, " && %s.src == %"PRIu16, proto, -+ backend->port); - } -- ds_put_format(&match, "%s.src == %s && %s.dst == %s%s", -- ip_match, backend->ip, ip_match, lb_vip->vip, -+ ds_put_format(&match_reply, "(%s.src == %s%s)", ip_match, backend->ip, - ds_cstr(&proto_match)); -- ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_HAIRPIN, 1, -- ds_cstr(&match), -- REGBIT_HAIRPIN " = 1; ct_snat;", -- &lb->nlb->header_); -+ ds_clear(&proto_match); - -- ds_destroy(&action); -- ds_destroy(&match); -- ds_destroy(&proto_match); -+ if (i < lb_vip->n_backends - 1) { -+ ds_put_cstr(&match_initiator, " || "); -+ ds_put_cstr(&match_reply, " || "); -+ } - } -+ ds_put_char(&match_reply, ')'); -+ -+ /* SNAT hairpinned initiator traffic so that the reply traffic is -+ * also directed through OVN. -+ */ -+ ds_put_format(&action, REGBIT_HAIRPIN " = 1; ct_snat(%s);", -+ lb_vip->vip); -+ ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_HAIRPIN, 2, -+ ds_cstr(&match_initiator), ds_cstr(&action), -+ &lb->nlb->header_); -+ -+ /* Replies to hairpinned traffic are destined to the LB VIP. */ -+ ds_put_format(&match_reply, " && %s.dst == %s", ip_match, lb_vip->vip); -+ -+ /* UNSNAT replies for hairpinned traffic. */ -+ ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_HAIRPIN, 1, -+ ds_cstr(&match_reply), -+ REGBIT_HAIRPIN " = 1; ct_snat;", -+ &lb->nlb->header_); -+ -+ ds_destroy(&action); -+ ds_destroy(&match_initiator); -+ ds_destroy(&match_reply); -+ ds_destroy(&proto_match); - } - - static void --- -1.8.3.1 - diff --git a/SOURCES/0001-ovn-northd-Rate-limit-missing-chassis-log.patch b/SOURCES/0001-ovn-northd-Rate-limit-missing-chassis-log.patch new file mode 100644 index 0000000..5eb3010 --- /dev/null +++ b/SOURCES/0001-ovn-northd-Rate-limit-missing-chassis-log.patch @@ -0,0 +1,39 @@ +From 4fe045290891859c14e19d6bb00b2c857a0d4021 Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Tue, 1 Sep 2020 21:20:04 +0200 +Subject: [PATCH] ovn-northd: Rate limit missing chassis log. + +This log does indicate a problem so it's useful to log it at WARN level +but there's not much use to spam at every iteration of ovn-northd. + +CC: Han Zhou +Fixes: 4adc10f58127 ("Avoid nb_cfg update notification flooding") +Signed-off-by: Dumitru Ceara +Signed-off-by: Han Zhou +(cherry picked from upstream commit 81e88122985caf35b6c60b8d04e16cd0b53aa2b7) + +Change-Id: I946b74b44149cefec09cd1a19de3ffb8d7c01647 +--- + northd/ovn-northd.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index c83f9d5..8f2cd43 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -12254,8 +12254,10 @@ update_northbound_cfg(struct northd_context *ctx, + continue; + } + } else { +- VLOG_WARN("Chassis not exist for Chassis_Private record, " +- "name: %s", chassis_priv->name); ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); ++ VLOG_WARN_RL(&rl, "Chassis does not exist for " ++ "Chassis_Private record, name: %s", ++ chassis_priv->name); + } + + if (chassis_priv->nb_cfg < hv_cfg) { +-- +1.8.3.1 + diff --git a/SOURCES/0001-ovn-northd-Remove-useless-flow-for-GW_REDIRECT.patch b/SOURCES/0001-ovn-northd-Remove-useless-flow-for-GW_REDIRECT.patch deleted file mode 100644 index 89b38d8..0000000 --- a/SOURCES/0001-ovn-northd-Remove-useless-flow-for-GW_REDIRECT.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 32972260b50f39d493cc42a78d9648ed668c2aec Mon Sep 17 00:00:00 2001 -Message-Id: <32972260b50f39d493cc42a78d9648ed668c2aec.1590584789.git.lorenzo.bianconi@redhat.com> -From: Han Zhou -Date: Tue, 5 May 2020 23:16:09 -0700 -Subject: [PATCH ovn] ovn-northd: Remove useless flow for GW_REDIRECT. - -Remove the flow in lr_in_gw_redirect stage: - - A priority-150 logical flow with match - outport == GW && - eth.dst == 00:00:00:00:00:00 has actions - outport = CR; next;, where - GW is the logical router distributed gateway - port and CR is the chassisredirect - port representing the instance of the logical router - distributed gateway port on the - redirect-chassis. - -The commit c0bf32d ("Manage ARP process locally in a DVR scenario") updated -the priority-100 flow in this stage to priority 200, which makes this -priority-150 flow useless, because whatever packets matching this flow -would also match the priority-50 flow. - -Cc: Lorenzo Bianconi -Tested-by: Lorenzo Bianconi -Acked-by: Numan Siddique -Signed-off-by: Han Zhou -Signed-off-by: Lorenzo Bianconi ---- - northd/ovn-northd.8.xml | 12 ------------ - northd/ovn-northd.c | 11 ----------- - 2 files changed, 23 deletions(-) - ---- a/northd/ovn-northd.8.xml -+++ b/northd/ovn-northd.8.xml -@@ -2909,18 +2909,6 @@ icmp4 { - -
          -
        • -- A priority-150 logical flow with match -- outport == GW && -- eth.dst == 00:00:00:00:00:00 has actions -- outport = CR; next;, where -- GW is the logical router distributed gateway -- port and CR is the chassisredirect -- port representing the instance of the logical router -- distributed gateway port on the -- redirect-chassis. --
        • -- --
        • - For each NAT rule in the OVN Northbound database that can - be handled in a distributed manner, a priority-200 logical - flow with match ip4.src == B && ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -10143,17 +10143,6 @@ build_lrouter_flows(struct hmap *datapat - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50, - ds_cstr(&match), ds_cstr(&actions), - stage_hint); -- -- /* If the Ethernet destination has not been resolved, -- * redirect to the central instance of the l3dgw_port. -- * Such traffic will be replaced by an ARP request or ND -- * Neighbor Solicitation in the ARP request ingress -- * table, before being redirected to the central instance. -- */ -- ds_put_format(&match, " && eth.dst == 00:00:00:00:00:00"); -- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, 150, -- ds_cstr(&match), ds_cstr(&actions), -- stage_hint); - } - - /* Packets are allowed by default. */ diff --git a/SOURCES/0001-ovn.at-Fix-ARP-test-that-fails-due-to-timing.patch b/SOURCES/0001-ovn.at-Fix-ARP-test-that-fails-due-to-timing.patch deleted file mode 100644 index ffc9db8..0000000 --- a/SOURCES/0001-ovn.at-Fix-ARP-test-that-fails-due-to-timing.patch +++ /dev/null @@ -1,113 +0,0 @@ -From 356501f3246ddb99aef8cd6016467b7c1861b3ff Mon Sep 17 00:00:00 2001 -From: Dumitru Ceara -Date: Tue, 24 Mar 2020 14:35:41 +0100 -Subject: [PATCH ovn] ovn.at: Fix ARP test that fails due to timing. - -The test for "ARP/ND request broadcast limiting" checks that injected -ARP packets are not flooded using the MC_FLOOD multicast group. However, -this introduces a race condition in the test because GARPs generated by -OVN would also hit the same openflow rules. - -Remove the checks that use the MC_FLOOD group. They are also redundant -as the rest of the test checks that packets are forwarded according to -the newly added, higher priority rules. - -Fixes: 32f5ebb06226 ("ovn-northd: Limit ARP/ND broadcast domain whenever possible.") -Signed-off-by: Dumitru Ceara -Signed-off-by: Numan Siddique -(cherry picked from upstream commit 598a07cd240d7d01de3d7f04ca7abc58a33977a1) - -Change-Id: I4519f441245a5f1ecf7da73257f97aff0c7bc967 ---- - tests/ovn.at | 33 --------------------------------- - 1 file changed, 33 deletions(-) - -diff --git a/tests/ovn.at b/tests/ovn.at -index 4baf2e9..9a44f0a 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -18015,9 +18015,6 @@ r1_dp_key=$(ovn-sbctl --bare --columns tunnel_key list datapath_binding rtr1) - r1_tnl_key=$(ovn-sbctl --bare --columns tunnel_key list port_binding sw-rtr1) - r2_tnl_key=$(ovn-sbctl --bare --columns tunnel_key list port_binding sw-rtr2) - --mc_key=$(ovn-sbctl --bare --columns tunnel_key find multicast_group datapath=${sw_dp_uuid} name="_MC_flood") --mc_key=$(printf "%04x" $mc_key) -- - match_sw_metadata="metadata=0x${sw_dp_key}" - match_r1_metadata="metadata=0x${r1_dp_key}" - -@@ -18042,11 +18039,6 @@ OVS_WAIT_UNTIL([ - grep n_packets=1 -c) - test "0" = "${pkts_to_rtr2}" - ]) --OVS_WAIT_UNTIL([ -- pkts_flooded=$(ovs-ofctl dump-flows br-int | \ -- grep -E "${match_sw_metadata}" | grep ${mc_key} | grep -v n_packets=0 -c) -- test "0" = "${pkts_flooded}" --]) - - # Inject ND_NS for ofirst router owned IP address. - src_ipv6=00100000000000000000000000000254 -@@ -18069,11 +18061,6 @@ OVS_WAIT_UNTIL([ - grep n_packets=1 -c) - test "0" = "${pkts_to_rtr2}" - ]) --OVS_WAIT_UNTIL([ -- pkts_flooded=$(ovs-ofctl dump-flows br-int | \ -- grep -E "${match_sw_metadata}" | grep ${mc_key} | grep -v n_packets=0 -c) -- test "0" = "${pkts_flooded}" --]) - - # Configure load balancing on both routers. - ovn-nbctl lb-add lb1-v4 10.0.0.11 42.42.42.1 -@@ -18108,11 +18095,6 @@ OVS_WAIT_UNTIL([ - grep n_packets=1 -c) - test "0" = "${pkts_to_rtr2}" - ]) --OVS_WAIT_UNTIL([ -- pkts_flooded=$(ovs-ofctl dump-flows br-int | \ -- grep -E "${match_sw_metadata}" | grep ${mc_key} | grep -v n_packets=0 -c) -- test "0" = "${pkts_flooded}" --]) - - # Inject ND_NS for first router owned VIP address. - src_ipv6=00100000000000000000000000000254 -@@ -18135,11 +18117,6 @@ OVS_WAIT_UNTIL([ - grep n_packets=1 -c) - test "0" = "${pkts_to_rtr2}" - ]) --OVS_WAIT_UNTIL([ -- pkts_flooded=$(ovs-ofctl dump-flows br-int | \ -- grep -E "${match_sw_metadata}" | grep ${mc_key} | grep -v n_packets=0 -c) -- test "0" = "${pkts_flooded}" --]) - - # Configure NAT on both routers. - ovn-nbctl lr-nat-add rtr1 dnat_and_snat 10.0.0.111 42.42.42.1 -@@ -18175,11 +18152,6 @@ OVS_WAIT_UNTIL([ - grep n_packets=1 -c) - test "0" = "${pkts_to_rtr2}" - ]) --OVS_WAIT_UNTIL([ -- pkts_flooded=$(ovs-ofctl dump-flows br-int | \ -- grep -E "${match_sw_metadata}" | grep ${mc_key} | grep -v n_packets=0 -c) -- test "0" = "${pkts_flooded}" --]) - - # Inject ARP request for FIP1. - send_arp_request 1 0 ${src_mac} $(ip_to_hex 10 0 0 254) $(ip_to_hex 10 0 0 121) -@@ -18242,11 +18214,6 @@ OVS_WAIT_UNTIL([ - grep n_packets=1 -c) - test "0" = "${pkts_to_rtr2}" - ]) --OVS_WAIT_UNTIL([ -- pkts_flooded=$(ovs-ofctl dump-flows br-int | \ -- grep -E "${match_sw_metadata}" | grep ${mc_key} | grep -v n_packets=0 -c) -- test "0" = "${pkts_flooded}" --]) - - # Inject ND_NS for FIP1. - src_ipv6=00100000000000000000000000000254 --- -1.8.3.1 - diff --git a/SOURCES/0001-pinctrl-Fix-icmp6-packet-corruption-issue.patch b/SOURCES/0001-pinctrl-Fix-icmp6-packet-corruption-issue.patch deleted file mode 100644 index adc0e60..0000000 --- a/SOURCES/0001-pinctrl-Fix-icmp6-packet-corruption-issue.patch +++ /dev/null @@ -1,62 +0,0 @@ -From a069dc4106bc641cbc8596e90b95308950b0bffa Mon Sep 17 00:00:00 2001 -From: Numan Siddique -Date: Tue, 12 May 2020 14:21:33 +0530 -Subject: [PATCH] pinctrl: Fix icmp6 packet corruption issue - -The commit f792b1a00b43("Fix ACL reject action for UDP packets.") -didn't updated the 'struct ip6_hdr' pointer after calling -dp_packet_put(), as dp_packet_put() can reallocate memory making the -old references to packet pointers invalid. - -This patch fixes this issue. - -Fixes: f792b1a00b43("Fix ACL reject action for UDP packets.") -Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1834655 -Acked-by: Dumitru Ceara -Signed-off-by: Numan Siddique ---- - controller/pinctrl.c | 4 ++-- - tests/system-ovn.at | 2 +- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/controller/pinctrl.c b/controller/pinctrl.c -index 6b0ac3483..d976ec82b 100644 ---- a/controller/pinctrl.c -+++ b/controller/pinctrl.c -@@ -1570,8 +1570,6 @@ pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow, - } - ih->icmp6_base.icmp6_cksum = 0; - -- nh = dp_packet_l3(&packet); -- - /* RFC 4443: 3.1. - * - * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -@@ -1594,9 +1592,11 @@ pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow, - } - - dp_packet_put(&packet, in_ip, in_ip_len); -+ nh = dp_packet_l3(&packet); - nh->ip6_plen = htons(ICMP6_DATA_HEADER_LEN + in_ip_len); - - icmpv6_csum = packet_csum_pseudoheader6(dp_packet_l3(&packet)); -+ ih = dp_packet_l4(&packet); - ih->icmp6_base.icmp6_cksum = csum_finish( - csum_continue(icmpv6_csum, ih, - in_ip_len + ICMP6_DATA_HEADER_LEN)); -diff --git a/tests/system-ovn.at b/tests/system-ovn.at -index 9a5ef1ec3..9ae6c6b1f 100644 ---- a/tests/system-ovn.at -+++ b/tests/system-ovn.at -@@ -3967,7 +3967,7 @@ OVS_WAIT_UNTIL([ - NS_CHECK_EXEC([sw0-p1-rej], [tcpdump -n -c 1 -i sw0-p1-rej udp port 90 > sw0-p1-rej-udp.pcap &], [0]) - NS_CHECK_EXEC([sw0-p1-rej], [tcpdump -n -c 1 -i sw0-p1-rej icmp > sw0-p1-rej-icmp.pcap &], [0]) - --echo "foo" > foo -+printf '.%.0s' {1..100} > foo - OVS_WAIT_UNTIL([ - ip netns exec sw0-p1-rej nc -u 10.0.0.4 90 < foo - c=$(cat sw0-p1-rej-icmp.pcap | grep \ --- -2.26.2 - diff --git a/SOURCES/0001-pinctrl-Fix-incorrect-warning-message-for-multicast-.patch b/SOURCES/0001-pinctrl-Fix-incorrect-warning-message-for-multicast-.patch new file mode 100644 index 0000000..522da2d --- /dev/null +++ b/SOURCES/0001-pinctrl-Fix-incorrect-warning-message-for-multicast-.patch @@ -0,0 +1,89 @@ +From 6a3006116945234a5ad8e88f9cf86a4c4212b1f2 Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Thu, 27 Aug 2020 16:47:33 +0200 +Subject: [PATCH] pinctrl: Fix incorrect warning message for multicast querier. + +It's quite common that IP Multicast is enabled only for IPv4 or only for +IPv6. In such cases ovn-controller should not generate warnings. + +Fixes: 677a3ba4d66b ("ovn: Add MLD support.") +Signed-off-by: Dumitru Ceara +Signed-off-by: Numan Siddique + +(cherry-picked from master commit d6d59911412edc20c40c026a1bb8c48ef01800fb) + +(cherry picked from upstream commit be614f0743abe605c4305003c9de6859dd224fe9) + +Change-Id: I9fbd75723fdd163e4129106b378f7fb71a0e6877 +--- + controller/pinctrl.c | 38 +++++++++++++++++++++++--------------- + 1 file changed, 23 insertions(+), 15 deletions(-) + +diff --git a/controller/pinctrl.c b/controller/pinctrl.c +index f72ab70..c8fbd37 100644 +--- a/controller/pinctrl.c ++++ b/controller/pinctrl.c +@@ -4183,21 +4183,11 @@ ip_mcast_snoop_cfg_load(struct ip_mcast_snoop_cfg *cfg, + cfg->seq_no = ip_mcast->seq_no; + + if (querier_enabled) { +- /* Try to parse the source ETH address. */ +- if (!ip_mcast->eth_src || +- !eth_addr_from_string(ip_mcast->eth_src, +- &cfg->query_eth_src)) { +- VLOG_WARN_RL(&rl, +- "IGMP Querier enabled with invalid ETH src address"); +- /* Failed to parse the ETH source address. Disable the querier. */ +- cfg->querier_v4_enabled = false; +- cfg->querier_v6_enabled = false; +- } +- + /* Try to parse the source IPv4 address. */ + if (cfg->querier_v4_enabled) { +- if (!ip_mcast->ip4_src || +- !ip_parse(ip_mcast->ip4_src, &cfg->query_ipv4_src)) { ++ if (!ip_mcast->ip4_src || !ip_mcast->ip4_src[0]) { ++ cfg->querier_v4_enabled = false; ++ } else if (!ip_parse(ip_mcast->ip4_src, &cfg->query_ipv4_src)) { + VLOG_WARN_RL(&rl, + "IGMP Querier enabled with invalid IPv4 " + "src address"); +@@ -4215,8 +4205,9 @@ ip_mcast_snoop_cfg_load(struct ip_mcast_snoop_cfg *cfg, + + /* Try to parse the source IPv6 address. */ + if (cfg->querier_v6_enabled) { +- if (!ip_mcast->ip6_src || +- !ipv6_parse(ip_mcast->ip6_src, &cfg->query_ipv6_src)) { ++ if (!ip_mcast->ip6_src || !ip_mcast->ip6_src[0]) { ++ cfg->querier_v6_enabled = false; ++ } else if (!ipv6_parse(ip_mcast->ip6_src, &cfg->query_ipv6_src)) { + VLOG_WARN_RL(&rl, + "MLD Querier enabled with invalid IPv6 " + "src address"); +@@ -4232,6 +4223,23 @@ ip_mcast_snoop_cfg_load(struct ip_mcast_snoop_cfg *cfg, + cfg->query_ipv6_dst = + (struct in6_addr)IN6ADDR_ALL_HOSTS_INIT; + } ++ ++ if (!cfg->querier_v4_enabled && !cfg->querier_v6_enabled) { ++ VLOG_WARN_RL(&rl, ++ "IGMP Querier enabled without a valid IPv4 or IPv6 " ++ "address"); ++ } ++ ++ /* Try to parse the source ETH address. */ ++ if (!ip_mcast->eth_src || ++ !eth_addr_from_string(ip_mcast->eth_src, ++ &cfg->query_eth_src)) { ++ VLOG_WARN_RL(&rl, ++ "IGMP Querier enabled with invalid ETH src address"); ++ /* Failed to parse the ETH source address. Disable the querier. */ ++ cfg->querier_v4_enabled = false; ++ cfg->querier_v6_enabled = false; ++ } + } + } + +-- +1.8.3.1 + diff --git a/SOURCES/0001-pinctrl-Handle-service-monitors-even-if-the-lport-do.patch b/SOURCES/0001-pinctrl-Handle-service-monitors-even-if-the-lport-do.patch deleted file mode 100644 index 5932a4f..0000000 --- a/SOURCES/0001-pinctrl-Handle-service-monitors-even-if-the-lport-do.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 88677e5c2051a1880db61cbb22e4ab48659bc1a8 Mon Sep 17 00:00:00 2001 -From: Numan Siddique -Date: Fri, 17 Apr 2020 11:45:56 +0530 -Subject: [PATCH] pinctrl: Handle service monitors even if the lport doesn't - have IPv4 addresses set. - -If a logical port is only configured with MAC address(es) in the -Logical_Switch_Port.addresses, pinctrl is ignoring the service monitors -configured for that logical port. This patch allows that. - -Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1801058 -Acked-by: Mark Michelson -Signed-off-by: Numan Siddique ---- - controller/pinctrl.c | 5 +++++ - tests/ovn.at | 31 +++++++++++++++++++++++++++++++ - tests/system-ovn.at | 4 ++-- - 3 files changed, 38 insertions(+), 2 deletions(-) - -diff --git a/controller/pinctrl.c b/controller/pinctrl.c -index a053938ec..8703641c2 100644 ---- a/controller/pinctrl.c -+++ b/controller/pinctrl.c -@@ -5786,6 +5786,11 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn, - - if (mac_found) { - break; -+ } else if (!laddrs.n_ipv4_addrs) { -+ /* IPv4 address(es) are not configured. Use the first mac. */ -+ ea = laddrs.ea; -+ mac_found = true; -+ break; - } - } - -diff --git a/tests/ovn.at b/tests/ovn.at -index 3bc435e6d..0f02e8144 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -17918,6 +17918,37 @@ AT_CHECK([cat lflows.txt], [0], [dnl - table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 && is_chassis_resident("cr-lr0-public")), action=(drop;) - ]) - -+# Delete sw0-p1 -+ovn-nbctl lsp-del sw0-p1 -+ -+OVS_WAIT_UNTIL([test 1 = $(ovn-sbctl --bare --columns _uuid find \ -+service_monitor | sed '/^$/d' | wc -l)]) -+ -+# Add back sw0-p1 but without any IP address. -+ovn-nbctl lsp-add sw0 sw0-p1 -+ovn-nbctl lsp-set-addresses sw0-p1 "50:54:00:00:00:03" -- \ -+lsp-set-port-security sw0-p1 "50:54:00:00:00:03" -+ -+OVS_WAIT_UNTIL([test 2 = $(ovn-sbctl --bare --columns status find \ -+service_monitor | grep offline | wc -l)]) -+ -+ovn-nbctl lsp-del sw0-p1 -+ovn-nbctl lsp-del sw1-p1 -+OVS_WAIT_UNTIL([test 0 = $(ovn-sbctl --bare --columns _uuid find \ -+service_monitor | sed '/^$/d' | wc -l)]) -+ -+# Add back sw0-p1 but without any address set. -+ovn-nbctl lsp-add sw0 sw0-p1 -+ -+OVS_WAIT_UNTIL([test 1 = $(ovn-sbctl --bare --columns _uuid find \ -+service_monitor | sed '/^$/d' | wc -l)]) -+ -+OVS_WAIT_UNTIL([test 0 = $(ovn-sbctl --bare --columns status find \ -+service_monitor | grep offline | wc -l)]) -+ -+OVS_WAIT_UNTIL([test 0 = $(ovn-sbctl --bare --columns status find \ -+service_monitor | grep online | wc -l)]) -+ - OVN_CLEANUP([hv1], [hv2]) - AT_CLEANUP - -diff --git a/tests/system-ovn.at b/tests/system-ovn.at -index 000b3f13b..bdb9768d2 100644 ---- a/tests/system-ovn.at -+++ b/tests/system-ovn.at -@@ -3356,8 +3356,8 @@ start_daemon ovn-controller - ovn-nbctl ls-add sw0 - - ovn-nbctl lsp-add sw0 sw0-p1 --ovn-nbctl lsp-set-addresses sw0-p1 "50:54:00:00:00:03 10.0.0.3" --ovn-nbctl lsp-set-port-security sw0-p1 "50:54:00:00:00:03 10.0.0.3" -+ovn-nbctl lsp-set-addresses sw0-p1 "50:54:00:00:00:03" -+ovn-nbctl lsp-set-port-security sw0-p1 "50:54:00:00:00:03" - - ovn-nbctl lsp-add sw0 sw0-p2 - ovn-nbctl lsp-set-addresses sw0-p2 "50:54:00:00:00:04 10.0.0.4" --- -2.25.1 - diff --git a/SOURCES/0001-system-tests-Fix-occasional-failure-of-the-test-Load.patch b/SOURCES/0001-system-tests-Fix-occasional-failure-of-the-test-Load.patch deleted file mode 100644 index 335fe6d..0000000 --- a/SOURCES/0001-system-tests-Fix-occasional-failure-of-the-test-Load.patch +++ /dev/null @@ -1,35 +0,0 @@ -From b1a0a1aecc5e49e465f7ff42b179f6c5ce683397 Mon Sep 17 00:00:00 2001 -From: Numan Siddique -Date: Fri, 28 Feb 2020 22:09:43 +0530 -Subject: [PATCH 1/3] system tests: Fix occasional failure of the test - "Load - balancer health checks". - -Due to some timing issues, ovn-controller logs the warning message - -"handle service check: Service monitor not found". This can happen -if the service monitor is created in SB DB before the port for -the service is bound. This patch adds this warning message to the -WHITELIST of OVS_TRAFFIC_VSWITCHD_STOP. - -Signed-off-by: Numan Siddique -Acked-by: Dumitru Ceara ---- - tests/system-ovn.at | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/tests/system-ovn.at b/tests/system-ovn.at -index 53da910cb..9ed3df754 100644 ---- a/tests/system-ovn.at -+++ b/tests/system-ovn.at -@@ -3462,7 +3462,8 @@ OVS_APP_EXIT_AND_WAIT([ovn-northd]) - - as - OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d --/connection dropped.*/d"]) -+/connection dropped.*/d -+/Service monitor not found.*/d"]) - - AT_CLEANUP - --- -2.24.1 - diff --git a/SOURCES/0001-tests-Wait-up-to-OVS_CTL_TIMEOUT-seconds.patch b/SOURCES/0001-tests-Wait-up-to-OVS_CTL_TIMEOUT-seconds.patch deleted file mode 100644 index bedc1da..0000000 --- a/SOURCES/0001-tests-Wait-up-to-OVS_CTL_TIMEOUT-seconds.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 2e84aada0b45d2f8739c2fdbc351098fc1c09c26 Mon Sep 17 00:00:00 2001 -Message-Id: <2e84aada0b45d2f8739c2fdbc351098fc1c09c26.1586727203.git.lorenzo.bianconi@redhat.com> -From: Ilya Maximets -Date: Wed, 6 Nov 2019 17:29:58 +0100 -Subject: [PATCH 1/3] tests: Wait up to OVS_CTL_TIMEOUT seconds. - -While running tests under valgrind, it could take more than 10 seconds -for process to disappear after successful 'ovs-appctl exit' command. - -Same applies to some other events that tests are waiting for with -OVS_WAIT macro. This makes tests to fail frequently under valgrind. - -Using OVS_CTL_TIMEOUT variable instead of constant 10 seconds seems -reasonable to avoid this issue because it controls timeouts of all -control utilities and needs to be adjusted while running under valgrind -anyway. - -Signed-off-by: Ilya Maximets -Acked-by: Aaron Conole -Signed-off-by: William Tu -Signed-off-by: Numan Siddique -Signed-off-by: Lorenzo Bianconi ---- - tests/ovs-macros.at | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/tests/ovs-macros.at b/tests/ovs-macros.at -index b2e619f76..3dcf8f96d 100644 ---- a/tests/ovs-macros.at -+++ b/tests/ovs-macros.at -@@ -208,9 +208,9 @@ ovs_wait () { - sleep 0.1 - if ovs_wait_cond; then echo "$1: wait succeeded quickly" >&AS_MESSAGE_LOG_FD; return 0; fi - -- # Then wait up to 10 seconds. -+ # Then wait up to OVS_CTL_TIMEOUT seconds. - local d -- for d in 1 2 3 4 5 6 7 8 9 10; do -+ for d in `seq 1 "$OVS_CTL_TIMEOUT"`; do - sleep 1 - if ovs_wait_cond; then echo "$1: wait succeeded after $d seconds" >&AS_MESSAGE_LOG_FD; return 0; fi - done --- -2.25.2 - diff --git a/SOURCES/0002-Add-support-for-DHCP-domain-search-option-119.patch b/SOURCES/0002-Add-support-for-DHCP-domain-search-option-119.patch new file mode 100644 index 0000000..594f6f7 --- /dev/null +++ b/SOURCES/0002-Add-support-for-DHCP-domain-search-option-119.patch @@ -0,0 +1,309 @@ +From a0f09fcde4c1ef4f2971cc4c504d939298aa7ec5 Mon Sep 17 00:00:00 2001 +From: Dhathri Purohith +Date: Wed, 24 Jun 2020 19:16:24 -0700 +Subject: [PATCH 02/22] Add support for DHCP domain search option (119) + +Domain search list is encoded according to the specifications in RFC 1035, +section 4.1.4. + +Change-Id: I43d8e10f90fc64c5d8463f3d41fbaac5a2e1a929 +Signed-off-by: Dhathri Purohith +Signed-off-by: Ankur Sharma +Signed-off-by: Numan Siddique + +(cherry-picked from upmstream master commit d79bb92c4b4938da89aa16a6795c7bcb4e128374) +--- + lib/actions.c | 85 ++++++++++++++++++++++++++++++++++++++++++++- + lib/ovn-l7.h | 3 ++ + northd/ovn-northd.c | 1 + + ovn-nb.xml | 13 +++++++ + ovn-sb.ovsschema | 6 ++-- + ovn-sb.xml | 11 ++++++ + tests/ovn.at | 39 +++++++++++++++++++++ + tests/test-ovn.c | 1 + + 8 files changed, 155 insertions(+), 4 deletions(-) + +diff --git a/lib/actions.c b/lib/actions.c +index 616c93e8a..e4de97c23 100644 +--- a/lib/actions.c ++++ b/lib/actions.c +@@ -2087,7 +2087,8 @@ parse_gen_opt(struct action_context *ctx, struct ovnact_gen_option *o, + return; + } + +- if (!strcmp(o->option->type, "str")) { ++ if (!strcmp(o->option->type, "str") || ++ !strcmp(o->option->type, "domains")) { + if (o->value.type != EXPR_C_STRING) { + lexer_error(ctx->lexer, "%s option %s requires string value.", + opts_type, o->option->name); +@@ -2422,6 +2423,88 @@ encode_put_dhcpv4_option(const struct ovnact_gen_option *o, + opt_header[1] = sizeof(ovs_be32); + ofpbuf_put(ofpacts, &c->value.ipv4, sizeof(ovs_be32)); + } ++ } else if (!strcmp(o->option->type, "domains")) { ++ /* Please refer to RFC 1035, section 4.1.4 for the format of encoding ++ * domain names. Below is an example for encoding a search list ++ * consisting of the "abc.com" and "xyz.abc.com". ++ * ++ * +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ ++ * |119|14 | 3 |'a'|'b'|'c'| 3 |'c'|'o'|'m'| 0 |'x'|'y'|'z'|xC0|x00| ++ * +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ ++ * ++ * The encoding of "abc.com" ends with 0 to mark the end of the ++ * domain name as required by RFC 1035. ++ * ++ * The encoding of "xyz" (for "xyz.abc.com") ends with the two-octet ++ * compression pointer C000 (hex), which points to offset 0 where ++ * another validly encoded domain name can be found to complete ++ * the name ("abc.com"). ++ * ++ * Encoding adds 2 bytes (one for length and one for delimiter) for ++ * every domain name that is unique. If all the domain names are unique ++ * (which probably never happens in real world), then encoded string ++ * could be longer than the original string. Just to be on the safer ++ * side, allocate the (approx.) worst case length here. ++ */ ++ uint8_t *dns_encoded = xzalloc(2 * strlen(c->string)); ++ uint16_t encode_offset = 0; ++ struct shash label_offset_map = SHASH_INITIALIZER(&label_offset_map); ++ char *domain_list = xstrdup(c->string), *dom_ptr = NULL; ++ char *suffix = xzalloc(strlen(domain_list)); ++ for (char *domain = strtok_r(domain_list, ",", &dom_ptr); ++ domain != NULL; ++ domain = strtok_r(NULL, ",", &dom_ptr)) { ++ if (strlen(domain) > DOMAIN_NAME_MAX_LEN) { ++ VLOG_WARN("Domain names longer than 255 characters are not" ++ "supported"); ++ goto out; ++ } ++ ovs_strlcpy(suffix, domain, strlen(domain)); ++ char *label; ++ for (label = strtok_r(domain, ".", &domain); ++ label != NULL; ++ label = strtok_r(NULL, ".", &domain)) { ++ /* Check if we have already encoded this suffix. ++ * If yes, fill in the reference and break. */ ++ uint16_t *get_offset; ++ get_offset = shash_find_data(&label_offset_map, suffix); ++ if (get_offset != NULL) { ++ ovs_be16 temp = htons(0xc000) | htons(*get_offset); ++ memcpy(dns_encoded + encode_offset, &temp, ++ sizeof(temp)); ++ encode_offset += sizeof(temp); ++ break; ++ } else { ++ /* The suffix was not encoded before, encode it now ++ * and add the offset to the label_offset_map. */ ++ uint16_t *set_offset = xzalloc(sizeof(uint16_t)); ++ *set_offset = encode_offset; ++ shash_add_once(&label_offset_map, suffix, set_offset); ++ ++ uint8_t len = strlen(label); ++ memcpy(dns_encoded + encode_offset, &len, sizeof(uint8_t)); ++ encode_offset += sizeof(uint8_t); ++ memcpy(dns_encoded + encode_offset, label, len); ++ encode_offset += len; ++ } ++ ovs_strlcpy(suffix, domain, strlen(domain)); ++ } ++ /* Add the end marker (0 byte) to determine the end of the ++ * domain. */ ++ if (label == NULL) { ++ uint8_t end = 0; ++ memcpy(dns_encoded + encode_offset, &end, sizeof(uint8_t)); ++ encode_offset += sizeof(uint8_t); ++ } ++ } ++ opt_header[1] = encode_offset; ++ ofpbuf_put(ofpacts, dns_encoded, encode_offset); ++ ++ out: ++ free(suffix); ++ free(domain_list); ++ free(dns_encoded); ++ shash_destroy_free_data(&label_offset_map); + } + } + +diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h +index 22a2153de..cea97b9ce 100644 +--- a/lib/ovn-l7.h ++++ b/lib/ovn-l7.h +@@ -34,6 +34,7 @@ struct gen_opts_map { + size_t code; + }; + ++#define DOMAIN_NAME_MAX_LEN 255 + #define DHCP_BROADCAST_FLAG 0x8000 + + #define DHCP_OPTION(NAME, CODE, TYPE) \ +@@ -81,6 +82,8 @@ struct gen_opts_map { + #define DHCP_OPT_PATH_PREFIX DHCP_OPTION("path_prefix", 210, "str") + #define DHCP_OPT_TFTP_SERVER_ADDRESS \ + DHCP_OPTION("tftp_server_address", 150, "ipv4") ++#define DHCP_OPT_DOMAIN_SEARCH_LIST \ ++ DHCP_OPTION("domain_search_list", 119, "domains") + + #define DHCP_OPT_ARP_CACHE_TIMEOUT \ + DHCP_OPTION("arp_cache_timeout", 35, "uint32") +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 14be87435..a665d52e9 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -11761,6 +11761,7 @@ static struct gen_opts_map supported_dhcp_opts[] = { + DHCP_OPT_DOMAIN_NAME, + DHCP_OPT_ARP_CACHE_TIMEOUT, + DHCP_OPT_TCP_KEEPALIVE_INTERVAL, ++ DHCP_OPT_DOMAIN_SEARCH_LIST, + }; + + static struct gen_opts_map supported_dhcpv6_opts[] = { +diff --git a/ovn-nb.xml b/ovn-nb.xml +index 8d04d3d3b..0fdc1592b 100644 +--- a/ovn-nb.xml ++++ b/ovn-nb.xml +@@ -2990,6 +2990,19 @@ +

          + + ++ ++ ++

          ++ These options accept string value which is a comma separated ++ list of domain names. The domain names are encoded based on RFC 1035. ++

          ++ ++ ++

          ++ The DHCPv4 option code for this option is 119. ++

          ++
          ++
          + + + +diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema +index 2ec729b77..99c5de822 100644 +--- a/ovn-sb.ovsschema ++++ b/ovn-sb.ovsschema +@@ -1,7 +1,7 @@ + { + "name": "OVN_Southbound", +- "version": "2.8.1", +- "cksum": "236203406 21905", ++ "version": "2.8.2", ++ "cksum": "464326363 21916", + "tables": { + "SB_Global": { + "columns": { +@@ -218,7 +218,7 @@ + "type": "string", + "enum": ["set", ["bool", "uint8", "uint16", "uint32", + "ipv4", "static_routes", "str", +- "host_id"]]}}}}, ++ "host_id", "domains"]]}}}}, + "isRoot": true}, + "DHCPv6_Options": { + "columns": { +diff --git a/ovn-sb.xml b/ovn-sb.xml +index 2edafd48f..293b0920c 100644 +--- a/ovn-sb.xml ++++ b/ovn-sb.xml +@@ -3287,6 +3287,17 @@ tcp.flags = RST; +

          + + ++
          value: domains
          ++
          ++

          ++ This indicates that the value of the DHCP option is a domain name ++ or a comma separated list of domain names. ++

          ++ ++

          ++ Example. "name=domain_search_list", "code=119", "type=domains". ++

          ++
          + + + +diff --git a/tests/ovn.at b/tests/ovn.at +index 4e98790af..be677b663 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -1255,6 +1255,12 @@ reg0[15] = put_dhcp_opts(offerip=10.0.0.4,router=10.0.0.1,netmask=255.255.255.0, + reg0[15] = put_dhcp_opts(offerip=10.0.0.4,router=10.0.0.1,netmask=255.255.255.0,mtu=1400,ip_forward_enable=1,default_ttl=121,dns_server={8.8.8.8,7.7.7.7},classless_static_route={30.0.0.0/24,10.0.0.4,40.0.0.0/16,10.0.0.6,0.0.0.0/0,10.0.0.1},ethernet_encap=1,router_discovery=0,tftp_server="tftp_server_test"); + formats as reg0[15] = put_dhcp_opts(offerip = 10.0.0.4, router = 10.0.0.1, netmask = 255.255.255.0, mtu = 1400, ip_forward_enable = 1, default_ttl = 121, dns_server = {8.8.8.8, 7.7.7.7}, classless_static_route = {30.0.0.0/24, 10.0.0.4, 40.0.0.0/16, 10.0.0.6, 0.0.0.0/0, 10.0.0.1}, ethernet_encap = 1, router_discovery = 0, tftp_server = "tftp_server_test"); + encodes as controller(userdata=00.00.00.02.00.00.00.00.00.01.de.10.00.00.00.6f.0a.00.00.04.03.04.0a.00.00.01.01.04.ff.ff.ff.00.1a.02.05.78.13.01.01.17.01.79.06.08.08.08.08.08.07.07.07.07.79.14.18.1e.00.00.0a.00.00.04.10.28.00.0a.00.00.06.00.0a.00.00.01.24.01.01.1f.01.00.42.10.74.66.74.70.5f.73.65.72.76.65.72.5f.74.65.73.74,pause) ++reg2[5] = put_dhcp_opts(offerip=10.0.0.4,router=10.0.0.1,netmask=255.255.254.0,mtu=1400,domain_name="ovn.org",wpad="https://example.org",bootfile_name="https://127.0.0.1/boot.ipxe",path_prefix="/tftpboot",domain_search_list="ovn.org,abc.ovn.org"); ++ formats as reg2[5] = put_dhcp_opts(offerip = 10.0.0.4, router = 10.0.0.1, netmask = 255.255.254.0, mtu = 1400, domain_name = "ovn.org", wpad = "https://example.org", bootfile_name = "https://127.0.0.1/boot.ipxe", path_prefix = "/tftpboot", domain_search_list = "ovn.org,abc.ovn.org"); ++ encodes as controller(userdata=00.00.00.02.00.00.00.00.00.01.de.10.00.00.00.25.0a.00.00.04.03.04.0a.00.00.01.01.04.ff.ff.fe.00.1a.02.05.78.0f.07.6f.76.6e.2e.6f.72.67.fc.13.68.74.74.70.73.3a.2f.2f.65.78.61.6d.70.6c.65.2e.6f.72.67.43.1b.68.74.74.70.73.3a.2f.2f.31.32.37.2e.30.2e.30.2e.31.2f.62.6f.6f.74.2e.69.70.78.65.d2.09.2f.74.66.74.70.62.6f.6f.74.77.0f.03.6f.76.6e.03.6f.72.67.00.03.61.62.63.c0.00,pause) ++reg2[5] = put_dhcp_opts(offerip=10.0.0.4,router=10.0.0.1,netmask=255.255.254.0,mtu=1400,domain_name="ovn.org",wpad="https://example.org",bootfile_name="https://127.0.0.1/boot.ipxe",path_prefix="/tftpboot",domain_search_list="ovn.org,abc.ovn.org,def.ovn.org,ovn.test,def.ovn.test,test.org,abc.com"); ++ formats as reg2[5] = put_dhcp_opts(offerip = 10.0.0.4, router = 10.0.0.1, netmask = 255.255.254.0, mtu = 1400, domain_name = "ovn.org", wpad = "https://example.org", bootfile_name = "https://127.0.0.1/boot.ipxe", path_prefix = "/tftpboot", domain_search_list = "ovn.org,abc.ovn.org,def.ovn.org,ovn.test,def.ovn.test,test.org,abc.com"); ++ encodes as controller(userdata=00.00.00.02.00.00.00.00.00.01.de.10.00.00.00.25.0a.00.00.04.03.04.0a.00.00.01.01.04.ff.ff.fe.00.1a.02.05.78.0f.07.6f.76.6e.2e.6f.72.67.fc.13.68.74.74.70.73.3a.2f.2f.65.78.61.6d.70.6c.65.2e.6f.72.67.43.1b.68.74.74.70.73.3a.2f.2f.31.32.37.2e.30.2e.30.2e.31.2f.62.6f.6f.74.2e.69.70.78.65.d2.09.2f.74.66.74.70.62.6f.6f.74.77.35.03.6f.76.6e.03.6f.72.67.00.03.61.62.63.c0.00.03.64.65.66.c0.00.03.6f.76.6e.04.74.65.73.74.00.03.64.65.66.c0.15.04.74.65.73.74.c0.04.03.61.62.63.03.63.6f.6d.00,pause) + + reg1[0..1] = put_dhcp_opts(offerip = 1.2.3.4, router = 10.0.0.1); + Cannot use 2-bit field reg1[0..1] where 1-bit field is required. +@@ -1272,6 +1278,8 @@ reg1[0] = put_dhcp_opts(offerip="xyzzy"); + DHCPv4 option offerip requires numeric value. + reg1[0] = put_dhcp_opts(offerip=1.2.3.4, domain_name=1.2.3.4); + DHCPv4 option domain_name requires string value. ++reg1[0] = put_dhcp_opts(offerip=1.2.3.4, domain_search_list=1.2.3.4); ++ DHCPv4 option domain_search_list requires string value. + + # nd_ns + nd_ns { nd.target = xxreg0; output; }; +@@ -5692,6 +5700,37 @@ AT_CHECK([cat 2.packets | cut -c -48], [0], [expout]) + cat 2.expected | cut -c 53- > expout + AT_CHECK([cat 2.packets | cut -c 53-], [0], [expout]) + ++reset_pcap_file hv1-vif1 hv1/vif1 ++reset_pcap_file hv1-vif2 hv1/vif2 ++rm -f 1.expected ++rm -f 2.expected ++ ++# Set domain search list option for ls1 ++echo "------ Set domain search list --------" ++ovn-nbctl dhcp-options-set-options $d1 server_id=10.0.0.1 \ ++server_mac=ff:10:00:00:00:01 lease_time=3600 router=10.0.0.1 \ ++domain_search_list=\"test1.com,test2.com\" ++echo "------------------------------------------" ++ ++# Send DHCPREQUEST in the SELECTING/INIT-REBOOT state with the offered IP ++# address in the Requested IP Address option. ++offer_ip=`ip_to_hex 10 0 0 6` ++server_ip=`ip_to_hex 10 0 0 1` ++ciaddr=`ip_to_hex 0 0 0 0` ++request_ip=$offer_ip ++expected_dhcp_opts=771305746573743103636f6d00057465737432c006330400000e100104ffffff0003040a00000136040a000001 ++test_dhcp 2 f00000000002 03 0 $ciaddr $offer_ip $request_ip 0 ff1000000001 $server_ip 05 $expected_dhcp_opts ++ ++# NXT_RESUMEs should be 16. ++OVS_WAIT_UNTIL([test 16 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) ++ ++$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets ++cat 2.expected | cut -c -48 > expout ++AT_CHECK([cat 2.packets | cut -c -48], [0], [expout]) ++# Skipping the IPv4 checksum. ++cat 2.expected | cut -c 53- > expout ++AT_CHECK([cat 2.packets | cut -c 53-], [0], [expout]) ++ + OVN_CLEANUP([hv1]) + + AT_CLEANUP +diff --git a/tests/test-ovn.c b/tests/test-ovn.c +index 29d343b60..ba628288b 100644 +--- a/tests/test-ovn.c ++++ b/tests/test-ovn.c +@@ -190,6 +190,7 @@ create_gen_opts(struct hmap *dhcp_opts, struct hmap *dhcpv6_opts, + dhcp_opt_add(dhcp_opts, "tftp_server_address", 150, "ipv4"); + dhcp_opt_add(dhcp_opts, "arp_cache_timeout", 35, "uint32"); + dhcp_opt_add(dhcp_opts, "tcp_keepalive_interval", 38, "uint32"); ++ dhcp_opt_add(dhcp_opts, "domain_search_list", 119, "domains"); + + /* DHCPv6 options. */ + hmap_init(dhcpv6_opts); +-- +2.26.2 + diff --git a/SOURCES/0002-Broadcast-DHCPREPLY-when-BROADCAST-flag-is-set.patch b/SOURCES/0002-Broadcast-DHCPREPLY-when-BROADCAST-flag-is-set.patch deleted file mode 100644 index f11664d..0000000 --- a/SOURCES/0002-Broadcast-DHCPREPLY-when-BROADCAST-flag-is-set.patch +++ /dev/null @@ -1,400 +0,0 @@ -From e982e99e5ee4eb9b65e0d3fe59d0975505ea625b Mon Sep 17 00:00:00 2001 -From: Ihar Hrachyshka -Date: Thu, 5 Mar 2020 20:44:24 -0500 -Subject: [PATCH 2/3] Broadcast DHCPREPLY when BROADCAST flag is set - -As per RFC2131, section 4.1: - A server or relay agent sending or relaying a DHCP message directly - to a DHCP client (i.e., not to a relay agent specified in the - 'giaddr' field) SHOULD examine the BROADCAST bit in the 'flags' - field. If this bit is set to 1, the DHCP message SHOULD be sent as - an IP broadcast using an IP broadcast address (preferably 0xffffffff) - as the IP destination address and the link-layer broadcast address as - the link-layer destination address. - -This patch changes destination IP address to 255.255.255.255 when client -set BROADCAST flag in their DHCPREQUEST. Note: the offered IP address is -still part of the DHCP payload. - -While the new DHCP response is sent as a broadcast IP frame, it's -handled locally, as any other DHCP reply by the native responder. -Meaning, the reply is sent to the client port that initiated the DHCP -session only. - -Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1801006 - -Signed-off-by: Ihar Hrachyshka -Signed-off-by: Numan Siddique - -(cherry-picked from upstream commit 4f8045b3b5f2c3376f86f5edc4e3f7507c2b1148) ---- - controller/pinctrl.c | 15 +++++++ - lib/ovn-l7.h | 2 + - northd/ovn-northd.8.xml | 5 +-- - northd/ovn-northd.c | 7 ++- - tests/ovn.at | 98 +++++++++++++++++++++++++++++------------ - 5 files changed, 93 insertions(+), 34 deletions(-) - -diff --git a/controller/pinctrl.c b/controller/pinctrl.c -index dc8d3fd28..8bf19776c 100644 ---- a/controller/pinctrl.c -+++ b/controller/pinctrl.c -@@ -966,6 +966,12 @@ pinctrl_handle_tcp_reset(struct rconn *swconn, const struct flow *ip_flow, - dp_packet_uninit(&packet); - } - -+static bool -+is_dhcp_flags_broadcast(ovs_be16 flags) -+{ -+ return flags & htons(DHCP_BROADCAST_FLAG); -+} -+ - /* Called with in the pinctrl_handler thread context. */ - static void - pinctrl_handle_put_dhcp_opts( -@@ -1190,7 +1196,16 @@ pinctrl_handle_put_dhcp_opts( - - udp->udp_len = htons(new_l4_size); - -+ /* Send a broadcast IP frame when BROADCAST flag is set. */ - struct ip_header *out_ip = dp_packet_l3(&pkt_out); -+ ovs_be32 ip_dst; -+ if (!is_dhcp_flags_broadcast(dhcp_data->flags)) { -+ ip_dst = *offer_ip; -+ } else { -+ ip_dst = htonl(0xffffffff); -+ } -+ put_16aligned_be32(&out_ip->ip_dst, ip_dst); -+ - out_ip->ip_tot_len = htons(pkt_out.l4_ofs - pkt_out.l3_ofs + new_l4_size); - udp->udp_csum = 0; - /* Checksum needs to be initialized to zero. */ -diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h -index f20d86c39..931e6ffcf 100644 ---- a/lib/ovn-l7.h -+++ b/lib/ovn-l7.h -@@ -34,6 +34,8 @@ struct gen_opts_map { - size_t code; - }; - -+#define DHCP_BROADCAST_FLAG 0x8000 -+ - #define DHCP_OPTION(NAME, CODE, TYPE) \ - {.name = NAME, .code = CODE, .type = TYPE} - -diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml -index d80f8cf8d..b6cfa3e90 100644 ---- a/northd/ovn-northd.8.xml -+++ b/northd/ovn-northd.8.xml -@@ -937,7 +937,6 @@ next; -
          - eth.dst = eth.src;
          - eth.src = E;
          --ip4.dst = A;
          - ip4.src = S;
          - udp.src = 67;
          - udp.dst = 68;
          -@@ -948,8 +947,8 @@ output;
          - 
          -         

          - where E is the server MAC address and S is the -- server IPv4 address defined in the DHCPv4 options and A is -- the IPv4 address defined in the logical port's addresses column. -+ server IPv4 address defined in the DHCPv4 options. Note that -+ ip4.dst field is handled by put_dhcp_opts. -

          - -

          -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index 0d43322cf..217a8c894 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -4276,10 +4276,9 @@ build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip, - ds_put_cstr(options_action, "); next;"); - - ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; " -- "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; " -- "udp.dst = 68; outport = inport; flags.loopback = 1; " -- "output;", -- server_mac, IP_ARGS(offer_ip), server_ip); -+ "ip4.src = %s; udp.src = 67; udp.dst = 68; " -+ "outport = inport; flags.loopback = 1; output;", -+ server_mac, server_ip); - - ds_put_format(ipv4_addr_match, - "ip4.src == "IP_FMT" && ip4.dst == {%s, 255.255.255.255}", -diff --git a/tests/ovn.at b/tests/ovn.at -index a04f22c4c..8de4b5ceb 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -4595,10 +4595,11 @@ sleep 2 - as hv1 ovs-vsctl show - - # This shell function sends a DHCP request packet --# test_dhcp INPORT SRC_MAC DHCP_TYPE OFFER_IP REQUEST_IP ... -+# test_dhcp INPORT SRC_MAC DHCP_TYPE BROADCAST CIADDR OFFER_IP REQUEST_IP USE_IP ... - test_dhcp() { -- local inport=$1 src_mac=$2 dhcp_type=$3 ciaddr=$4 offer_ip=$5 request_ip=$6 use_ip=$7 -- shift; shift; shift; shift; shift; shift; shift; -+ local inport=$1 src_mac=$2 dhcp_type=$3 broadcast=$4 ciaddr=$5 offer_ip=$6 request_ip=$7 use_ip=$8 -+ shift; shift; shift; shift; shift; shift; shift; shift; -+ - if test $use_ip != 0; then - src_ip=$1 - dst_ip=$2 -@@ -4607,6 +4608,7 @@ test_dhcp() { - src_ip=`ip_to_hex 0 0 0 0` - dst_ip=`ip_to_hex 255 255 255 255` - fi -+ - if test $request_ip != 0; then - ip_len=0120 - udp_len=010b -@@ -4614,10 +4616,19 @@ test_dhcp() { - ip_len=011a - udp_len=0106 - fi -+ -+ if test $broadcast != 0; then -+ flags=8000 -+ reply_dst_ip=`ip_to_hex 255 255 255 255` -+ else -+ flags=0000 -+ reply_dst_ip=${offer_ip} -+ fi -+ - local request=ffffffffffff${src_mac}08004510${ip_len}0000000080110000${src_ip}${dst_ip} - # udp header and dhcp header - request=${request}00440043${udp_len}0000 -- request=${request}010106006359aa7600000000${ciaddr}000000000000000000000000${src_mac} -+ request=${request}010106006359aa760000${flags}${ciaddr}000000000000000000000000${src_mac} - # client hardware padding - request=${request}00000000000000000000 - # server hostname -@@ -4655,10 +4666,10 @@ test_dhcp() { - ip_len=$(printf "%x" $ip_len) - udp_len=$(printf "%x" $udp_len) - # $ip_len var will be in 3 digits i.e 134. So adding a '0' before $ip_len -- local reply=${src_mac}${srv_mac}080045100${ip_len}000000008011XXXX${srv_ip}${offer_ip} -+ local reply=${src_mac}${srv_mac}080045100${ip_len}000000008011XXXX${srv_ip}${reply_dst_ip} - # udp header and dhcp header. - # $udp_len var will be in 3 digits. So adding a '0' before $udp_len -- reply=${reply}004300440${udp_len}0000020106006359aa7600000000${ciaddr} -+ reply=${reply}004300440${udp_len}0000020106006359aa760000${flags}${ciaddr} - # your ip address; 0 for NAK - if test $dhcp_reply_type = 06; then - reply=${reply}00000000 -@@ -4729,7 +4740,7 @@ server_ip=`ip_to_hex 10 0 0 1` - ciaddr=`ip_to_hex 0 0 0 0` - request_ip=0 - expected_dhcp_opts=330400000e100104ffffff0003040a00000136040a000001 --test_dhcp 1 f00000000001 01 $ciaddr $offer_ip $request_ip 0 ff1000000001 $server_ip 02 $expected_dhcp_opts -+test_dhcp 1 f00000000001 01 0 $ciaddr $offer_ip $request_ip 0 ff1000000001 $server_ip 02 $expected_dhcp_opts - - # NXT_RESUMEs should be 1. - OVS_WAIT_UNTIL([test 1 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -@@ -4755,7 +4766,7 @@ server_ip=`ip_to_hex 10 0 0 1` - ciaddr=`ip_to_hex 0 0 0 0` - request_ip=$offer_ip - expected_dhcp_opts=330400000e100104ffffff0003040a00000136040a000001 --test_dhcp 2 f00000000002 03 $ciaddr $offer_ip $request_ip 0 ff1000000001 $server_ip 05 $expected_dhcp_opts -+test_dhcp 2 f00000000002 03 0 $ciaddr $offer_ip $request_ip 0 ff1000000001 $server_ip 05 $expected_dhcp_opts - - # NXT_RESUMEs should be 2. - OVS_WAIT_UNTIL([test 2 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -@@ -4779,7 +4790,7 @@ server_ip=`ip_to_hex 10 0 0 1` - ciaddr=`ip_to_hex 0 0 0 0` - request_ip=`ip_to_hex 10 0 0 7` - expected_dhcp_opts="" --test_dhcp 2 f00000000002 03 $ciaddr $offer_ip $request_ip 0 ff1000000001 $server_ip 06 $expected_dhcp_opts -+test_dhcp 2 f00000000002 03 0 $ciaddr $offer_ip $request_ip 0 ff1000000001 $server_ip 06 $expected_dhcp_opts - - # NXT_RESUMEs should be 3. - OVS_WAIT_UNTIL([test 3 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -@@ -4803,7 +4814,7 @@ rm -f 2.expected - ciaddr=`ip_to_hex 0 0 0 0` - offer_ip=0 - request_ip=0 --test_dhcp 2 f00000000002 08 $ciaddr $offer_ip $request_ip 0 1 1 -+test_dhcp 2 f00000000002 08 0 $ciaddr $offer_ip $request_ip 0 1 1 - - # NXT_RESUMEs should be 4. - OVS_WAIT_UNTIL([test 4 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -@@ -4820,12 +4831,12 @@ rm -f 2.expected - # ls2-lp2 (vif4-tx.pcap) should receive the DHCPv4 request packet once. - - ciaddr=`ip_to_hex 0 0 0 0` --test_dhcp 3 f00000000003 01 $ciaddr 0 0 4 0 -+test_dhcp 3 f00000000003 01 0 $ciaddr 0 0 4 0 - - # Send DHCPv4 packet on ls2-lp2. "router" DHCPv4 option is not defined for - # this lport. - ciaddr=`ip_to_hex 0 0 0 0` --test_dhcp 4 f00000000004 01 $ciaddr 0 0 3 0 -+test_dhcp 4 f00000000004 01 0 $ciaddr 0 0 3 0 - - # NXT_RESUMEs should be 4. - OVS_WAIT_UNTIL([test 4 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -@@ -4842,7 +4853,7 @@ request_ip=0 - src_ip=$offer_ip - dst_ip=$server_ip - expected_dhcp_opts=330400000e100104ffffff0003040a00000136040a000001 --test_dhcp 2 f00000000002 03 $ciaddr $offer_ip $request_ip 1 $src_ip $dst_ip ff1000000001 $server_ip 05 $expected_dhcp_opts -+test_dhcp 2 f00000000002 03 0 $ciaddr $offer_ip $request_ip 1 $src_ip $dst_ip ff1000000001 $server_ip 05 $expected_dhcp_opts - - # NXT_RESUMEs should be 5. - OVS_WAIT_UNTIL([test 5 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -@@ -4868,7 +4879,7 @@ request_ip=0 - src_ip=$offer_ip - dst_ip=`ip_to_hex 255 255 255 255` - expected_dhcp_opts=330400000e100104ffffff0003040a00000136040a000001 --test_dhcp 2 f00000000002 03 $ciaddr $offer_ip $request_ip 1 $src_ip $dst_ip ff1000000001 $server_ip 05 $expected_dhcp_opts -+test_dhcp 2 f00000000002 03 0 $ciaddr $offer_ip $request_ip 1 $src_ip $dst_ip ff1000000001 $server_ip 05 $expected_dhcp_opts - - # NXT_RESUMEs should be 6. - OVS_WAIT_UNTIL([test 6 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -@@ -4894,7 +4905,7 @@ request_ip=0 - src_ip=$offer_ip - dst_ip=`ip_to_hex 255 255 255 255` - expected_dhcp_opts="" --test_dhcp 2 f00000000002 03 $ciaddr $offer_ip $request_ip 1 $src_ip $dst_ip ff1000000001 $server_ip 06 $expected_dhcp_opts -+test_dhcp 2 f00000000002 03 0 $ciaddr $offer_ip $request_ip 1 $src_ip $dst_ip ff1000000001 $server_ip 06 $expected_dhcp_opts - - # NXT_RESUMEs should be 7. - OVS_WAIT_UNTIL([test 7 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -@@ -4920,7 +4931,7 @@ request_ip=0 - src_ip=$offer_ip - dst_ip=`ip_to_hex 255 255 255 255` - expected_dhcp_opts="" --test_dhcp 2 f00000000002 03 $ciaddr $offer_ip $request_ip 1 $src_ip $dst_ip ff1000000001 $server_ip 06 $expected_dhcp_opts -+test_dhcp 2 f00000000002 03 0 $ciaddr $offer_ip $request_ip 1 $src_ip $dst_ip ff1000000001 $server_ip 06 $expected_dhcp_opts - - # NXT_RESUMEs should be 8. - OVS_WAIT_UNTIL([test 8 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -@@ -4942,7 +4953,7 @@ rm -f 2.expected - ciaddr=`ip_to_hex 0 0 0 0` - src_ip=`ip_to_hex 10 0 0 6` - dst_ip=`ip_to_hex 10 0 0 4` --test_dhcp 2 f00000000002 03 $ciaddr 0 0 1 $src_ip $dst_ip 1 -+test_dhcp 2 f00000000002 03 0 $ciaddr 0 0 1 $src_ip $dst_ip 1 - - # NXT_RESUMEs should be 8. - OVS_WAIT_UNTIL([test 8 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -@@ -4950,6 +4961,29 @@ OVS_WAIT_UNTIL([test 8 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) - # vif1-tx.pcap should have received the DHCPv4 request packet - OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected]) - -+reset_pcap_file hv1-vif1 hv1/vif1 -+reset_pcap_file hv1-vif2 hv1/vif2 -+rm -f 1.expected -+rm -f 2.expected -+ -+# Send DHCPDISCOVER with BROADCAST flag on. -+offer_ip=`ip_to_hex 10 0 0 4` -+server_ip=`ip_to_hex 10 0 0 1` -+ciaddr=`ip_to_hex 0 0 0 0` -+request_ip=0 -+expected_dhcp_opts=330400000e100104ffffff0003040a00000136040a000001 -+test_dhcp 1 f00000000001 01 1 $ciaddr $offer_ip $request_ip 0 ff1000000001 $server_ip 02 $expected_dhcp_opts -+ -+# NXT_RESUMEs should be 9. -+OVS_WAIT_UNTIL([test 9 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) -+ -+$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif1-tx.pcap > 1.packets -+cat 1.expected | cut -c -48 > expout -+AT_CHECK([cat 1.packets | cut -c -48], [0], [expout]) -+# Skipping the IPv4 checksum. -+cat 1.expected | cut -c 53- > expout -+AT_CHECK([cat 1.packets | cut -c 53-], [0], [expout]) -+ - OVN_CLEANUP([hv1]) - - AT_CLEANUP -@@ -13220,10 +13254,11 @@ as hv1 - ovs-vsctl show - - # This shell function sends a DHCP request packet --# test_dhcp INPORT SRC_MAC DHCP_TYPE OFFER_IP ... -+# test_dhcp INPORT SRC_MAC DHCP_TYPE BROADCAST OFFER_IP ... - test_dhcp() { -- local inport=$1 src_mac=$2 dhcp_type=$3 offer_ip=$4 use_ip=$5 -- shift; shift; shift; shift; shift; -+ local inport=$1 src_mac=$2 dhcp_type=$3 broadcast=$4 offer_ip=$5 use_ip=$6 -+ shift; shift; shift; shift; shift; shift; -+ - if test $use_ip != 0; then - src_ip=$1 - dst_ip=$2 -@@ -13232,10 +13267,19 @@ test_dhcp() { - src_ip=`ip_to_hex 0 0 0 0` - dst_ip=`ip_to_hex 255 255 255 255` - fi -+ -+ if test $broadcast != 0; then -+ flags=8000 -+ reply_dst_ip=`ip_to_hex 255 255 255 255` -+ else -+ flags=0000 -+ reply_dst_ip=${offer_ip} -+ fi -+ - local request=ffffffffffff${src_mac}0800451001100000000080110000${src_ip}${dst_ip} - # udp header and dhcp header - request=${request}0044004300fc0000 -- request=${request}010106006359aa760000000000000000000000000000000000000000${src_mac} -+ request=${request}010106006359aa760000${flags}00000000000000000000000000000000${src_mac} - # client hardware padding - request=${request}00000000000000000000 - # server hostname -@@ -13259,10 +13303,10 @@ test_dhcp() { - ip_len=$(printf "%x" $ip_len) - udp_len=$(printf "%x" $udp_len) - # $ip_len var will be in 3 digits i.e 134. So adding a '0' before $ip_len -- local reply=${src_mac}${srv_mac}080045100${ip_len}000000008011XXXX${srv_ip}${offer_ip} -+ local reply=${src_mac}${srv_mac}080045100${ip_len}000000008011XXXX${srv_ip}${reply_dst_ip} - # udp header and dhcp header. - # $udp_len var will be in 3 digits. So adding a '0' before $udp_len -- reply=${reply}004300440${udp_len}0000020106006359aa760000000000000000 -+ reply=${reply}004300440${udp_len}0000020106006359aa760000${flags}00000000 - # your ip address - reply=${reply}${offer_ip} - # next server ip address, relay agent ip address, client mac address -@@ -13381,7 +13425,7 @@ offer_ip=`ip_to_hex 10 0 0 6` - server_ip=`ip_to_hex 10 0 0 1` - server_mac=ff1000000001 - expected_dhcp_opts=330400000e100104ffffff0003040a00000136040a000001 --test_dhcp 1 f00000000003 01 $offer_ip 0 $server_mac $server_ip \ -+test_dhcp 1 f00000000003 01 0 $offer_ip 0 $server_mac $server_ip \ - $expected_dhcp_opts - - # NXT_RESUMEs should be 1 in hv1. -@@ -13479,7 +13523,7 @@ offer_ip=`ip_to_hex 10 0 0 6` - server_ip=`ip_to_hex 10 0 0 1` - server_mac=ff1000000001 - expected_dhcp_opts=330400000e100104ffffff0003040a00000136040a000001 --test_dhcp 1 f00000000003 01 $offer_ip 0 $server_mac $server_ip \ -+test_dhcp 1 f00000000003 01 0 $offer_ip 0 $server_mac $server_ip \ - $expected_dhcp_opts - - # NXT_RESUMEs should be 2 in hv1. -@@ -13589,7 +13633,7 @@ offer_ip=`ip_to_hex 10 0 0 6` - server_ip=`ip_to_hex 10 0 0 1` - server_mac=ff1000000001 - expected_dhcp_opts=330400000e100104ffffff0003040a00000136040a000001 --test_dhcp 1 f00000000003 01 $offer_ip 0 $server_mac $server_ip \ -+test_dhcp 1 f00000000003 01 0 $offer_ip 0 $server_mac $server_ip \ - $expected_dhcp_opts - - # NXT_RESUMEs should be 3 in hv1. -@@ -13669,7 +13713,7 @@ offer_ip=`ip_to_hex 10 0 0 6` - server_ip=`ip_to_hex 10 0 0 1` - server_mac=ff1000000001 - expected_dhcp_opts=330400000e100104ffffff0003040a00000136040a000001 --test_dhcp 1 f00000000003 01 $offer_ip 0 $server_mac $server_ip \ -+test_dhcp 1 f00000000003 01 0 $offer_ip 0 $server_mac $server_ip \ - $expected_dhcp_opts - - # NXT_RESUMEs should be 4 in hv1. --- -2.24.1 - diff --git a/SOURCES/0002-Create-daemon-pidfiles-in-ovn-run-dir.patch b/SOURCES/0002-Create-daemon-pidfiles-in-ovn-run-dir.patch deleted file mode 100644 index 45129b5..0000000 --- a/SOURCES/0002-Create-daemon-pidfiles-in-ovn-run-dir.patch +++ /dev/null @@ -1,351 +0,0 @@ -From d80be07ca318603524508402e044474571c1f642 Mon Sep 17 00:00:00 2001 -From: Numan Siddique -Date: Thu, 23 Apr 2020 12:53:23 +0530 -Subject: [PATCH 2/2] Create daemon pidfiles in ovn run dir. - -If an OVN service is started with --pidfile option, the pidfile -is created in the ovs rundir. This patch fixes it by using the ovn rundir -if either the pidfile is not specified or if specified, it is not -absolute path. - -Signed-off-by: Numan Siddique -Acked-by: Dumitru Ceara -Signed-off-by: Mark Michelson ---- - controller-vtep/ovn-controller-vtep.c | 6 +-- - controller/ovn-controller.c | 6 +-- - ic/ovn-ic.c | 6 +-- - lib/ovn-util.c | 26 ++++++++++++ - lib/ovn-util.h | 60 +++++++++++++++++++++++++++ - northd/ovn-northd.c | 6 +-- - utilities/ovn-nbctl.c | 10 ++--- - utilities/ovn-trace.c | 6 +-- - 8 files changed, 106 insertions(+), 20 deletions(-) - -diff --git a/controller-vtep/ovn-controller-vtep.c b/controller-vtep/ovn-controller-vtep.c -index 253a709ab..c13280bc0 100644 ---- a/controller-vtep/ovn-controller-vtep.c -+++ b/controller-vtep/ovn-controller-vtep.c -@@ -169,7 +169,7 @@ parse_options(int argc, char *argv[]) - OPT_PEER_CA_CERT = UCHAR_MAX + 1, - OPT_BOOTSTRAP_CA_CERT, - VLOG_OPTION_ENUMS, -- DAEMON_OPTION_ENUMS, -+ OVN_DAEMON_OPTION_ENUMS, - SSL_OPTION_ENUMS, - }; - -@@ -179,7 +179,7 @@ parse_options(int argc, char *argv[]) - {"help", no_argument, NULL, 'h'}, - {"version", no_argument, NULL, 'V'}, - VLOG_LONG_OPTIONS, -- DAEMON_LONG_OPTIONS, -+ OVN_DAEMON_LONG_OPTIONS, - STREAM_SSL_LONG_OPTIONS, - {"peer-ca-cert", required_argument, NULL, OPT_PEER_CA_CERT}, - {"bootstrap-ca-cert", required_argument, NULL, OPT_BOOTSTRAP_CA_CERT}, -@@ -212,7 +212,7 @@ parse_options(int argc, char *argv[]) - exit(EXIT_SUCCESS); - - VLOG_OPTION_HANDLERS -- DAEMON_OPTION_HANDLERS -+ OVN_DAEMON_OPTION_HANDLERS - STREAM_SSL_OPTION_HANDLERS - - case OPT_PEER_CA_CERT: -diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c -index 4d21ba0fd..6ff897325 100644 ---- a/controller/ovn-controller.c -+++ b/controller/ovn-controller.c -@@ -2268,7 +2268,7 @@ parse_options(int argc, char *argv[]) - OPT_PEER_CA_CERT = UCHAR_MAX + 1, - OPT_BOOTSTRAP_CA_CERT, - VLOG_OPTION_ENUMS, -- DAEMON_OPTION_ENUMS, -+ OVN_DAEMON_OPTION_ENUMS, - SSL_OPTION_ENUMS, - }; - -@@ -2276,7 +2276,7 @@ parse_options(int argc, char *argv[]) - {"help", no_argument, NULL, 'h'}, - {"version", no_argument, NULL, 'V'}, - VLOG_LONG_OPTIONS, -- DAEMON_LONG_OPTIONS, -+ OVN_DAEMON_LONG_OPTIONS, - STREAM_SSL_LONG_OPTIONS, - {"peer-ca-cert", required_argument, NULL, OPT_PEER_CA_CERT}, - {"bootstrap-ca-cert", required_argument, NULL, OPT_BOOTSTRAP_CA_CERT}, -@@ -2301,7 +2301,7 @@ parse_options(int argc, char *argv[]) - exit(EXIT_SUCCESS); - - VLOG_OPTION_HANDLERS -- DAEMON_OPTION_HANDLERS -+ OVN_DAEMON_OPTION_HANDLERS - STREAM_SSL_OPTION_HANDLERS - - case OPT_PEER_CA_CERT: -diff --git a/ic/ovn-ic.c b/ic/ovn-ic.c -index d931ca50f..a1ed25623 100644 ---- a/ic/ovn-ic.c -+++ b/ic/ovn-ic.c -@@ -1461,7 +1461,7 @@ static void - parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) - { - enum { -- DAEMON_OPTION_ENUMS, -+ OVN_DAEMON_OPTION_ENUMS, - VLOG_OPTION_ENUMS, - SSL_OPTION_ENUMS, - }; -@@ -1474,7 +1474,7 @@ parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) - {"help", no_argument, NULL, 'h'}, - {"options", no_argument, NULL, 'o'}, - {"version", no_argument, NULL, 'V'}, -- DAEMON_LONG_OPTIONS, -+ OVN_DAEMON_LONG_OPTIONS, - VLOG_LONG_OPTIONS, - STREAM_SSL_LONG_OPTIONS, - {NULL, 0, NULL, 0}, -@@ -1490,7 +1490,7 @@ parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) - } - - switch (c) { -- DAEMON_OPTION_HANDLERS; -+ OVN_DAEMON_OPTION_HANDLERS; - VLOG_OPTION_HANDLERS; - STREAM_SSL_OPTION_HANDLERS; - -diff --git a/lib/ovn-util.c b/lib/ovn-util.c -index 1b30c2e9a..3482edb8d 100644 ---- a/lib/ovn-util.c -+++ b/lib/ovn-util.c -@@ -15,6 +15,7 @@ - #include - #include - -+#include "daemon.h" - #include "ovn-util.h" - #include "ovn-dirs.h" - #include "openvswitch/vlog.h" -@@ -394,6 +395,31 @@ get_abs_unix_ctl_path(const char *path) - return abs_path; - } - -+void -+ovn_set_pidfile(const char *name) -+{ -+ char *pidfile_name = NULL; -+ -+#ifndef _WIN32 -+ pidfile_name = name ? abs_file_name(ovn_rundir(), name) -+ : xasprintf("%s/%s.pid", ovn_rundir(), program_name); -+#else -+ if (name) { -+ if (strchr(name, ':')) { -+ pidfile_name = xstrdup(name); -+ } else { -+ pidfile_name = xasprintf("%s/%s", ovn_rundir(), name); -+ } -+ } else { -+ pidfile_name = xasprintf("%s/%s.pid", ovn_rundir(), program_name); -+ } -+#endif -+ -+ /* Call openvswitch lib function. */ -+ set_pidfile(pidfile_name); -+ free(pidfile_name); -+} -+ - /* l3gateway, chassisredirect, and patch - * are not in this list since they are - * only set in the SB DB by northd -diff --git a/lib/ovn-util.h b/lib/ovn-util.h -index 4076e8b9a..ec5f2cf5a 100644 ---- a/lib/ovn-util.h -+++ b/lib/ovn-util.h -@@ -112,6 +112,7 @@ uint32_t ovn_allocate_tnlid(struct hmap *set, const char *name, uint32_t min, - uint32_t max, uint32_t *hint); - - char *ovn_chassis_redirect_name(const char *port_name); -+void ovn_set_pidfile(const char *name); - - /* An IPv4 or IPv6 address */ - struct v46_ip { -@@ -129,4 +130,63 @@ bool ip46_equals(const struct v46_ip *addr1, const struct v46_ip *addr2); - * Caller must free the returned string. - */ - char *str_tolower(const char *orig); -+ -+/* OVN daemon options. Taken from ovs/lib/daemon.h. */ -+#define OVN_DAEMON_OPTION_ENUMS \ -+ OVN_OPT_DETACH, \ -+ OVN_OPT_NO_SELF_CONFINEMENT, \ -+ OVN_OPT_NO_CHDIR, \ -+ OVN_OPT_OVERWRITE_PIDFILE, \ -+ OVN_OPT_PIDFILE, \ -+ OVN_OPT_MONITOR, \ -+ OVN_OPT_USER_GROUP -+ -+#define OVN_DAEMON_LONG_OPTIONS \ -+ {"detach", no_argument, NULL, OVN_OPT_DETACH}, \ -+ {"no-self-confinement", no_argument, NULL, \ -+ OVN_OPT_NO_SELF_CONFINEMENT}, \ -+ {"no-chdir", no_argument, NULL, OVN_OPT_NO_CHDIR}, \ -+ {"pidfile", optional_argument, NULL, OVN_OPT_PIDFILE}, \ -+ {"overwrite-pidfile", no_argument, NULL, OVN_OPT_OVERWRITE_PIDFILE}, \ -+ {"monitor", no_argument, NULL, OVN_OPT_MONITOR}, \ -+ {"user", required_argument, NULL, OVN_OPT_USER_GROUP} -+ -+#define OVN_DAEMON_OPTION_HANDLERS \ -+ case OVN_OPT_DETACH: \ -+ set_detach(); \ -+ break; \ -+ \ -+ case OVN_OPT_NO_SELF_CONFINEMENT: \ -+ daemon_disable_self_confinement(); \ -+ break; \ -+ \ -+ case OVN_OPT_NO_CHDIR: \ -+ set_no_chdir(); \ -+ break; \ -+ \ -+ case OVN_OPT_PIDFILE: \ -+ ovn_set_pidfile(optarg); \ -+ break; \ -+ \ -+ case OVN_OPT_OVERWRITE_PIDFILE: \ -+ ignore_existing_pidfile(); \ -+ break; \ -+ \ -+ case OVN_OPT_MONITOR: \ -+ daemon_set_monitor(); \ -+ break; \ -+ \ -+ case OVN_OPT_USER_GROUP: \ -+ daemon_set_new_user(optarg); \ -+ break; -+ -+#define OVN_DAEMON_OPTION_CASES \ -+ case OVN_OPT_DETACH: \ -+ case OVN_OPT_NO_SELF_CONFINEMENT: \ -+ case OVN_OPT_NO_CHDIR: \ -+ case OVN_OPT_PIDFILE: \ -+ case OVN_OPT_OVERWRITE_PIDFILE: \ -+ case OVN_OPT_MONITOR: \ -+ case OVN_OPT_USER_GROUP: -+ - #endif -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index 515722c5d..d3d481ab8 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -11607,7 +11607,7 @@ static void - parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) - { - enum { -- DAEMON_OPTION_ENUMS, -+ OVN_DAEMON_OPTION_ENUMS, - VLOG_OPTION_ENUMS, - SSL_OPTION_ENUMS, - }; -@@ -11618,7 +11618,7 @@ parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) - {"help", no_argument, NULL, 'h'}, - {"options", no_argument, NULL, 'o'}, - {"version", no_argument, NULL, 'V'}, -- DAEMON_LONG_OPTIONS, -+ OVN_DAEMON_LONG_OPTIONS, - VLOG_LONG_OPTIONS, - STREAM_SSL_LONG_OPTIONS, - {NULL, 0, NULL, 0}, -@@ -11634,7 +11634,7 @@ parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) - } - - switch (c) { -- DAEMON_OPTION_HANDLERS; -+ OVN_DAEMON_OPTION_HANDLERS; - VLOG_OPTION_HANDLERS; - STREAM_SSL_OPTION_HANDLERS; - -diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c -index a88c1ddc2..3f4731d40 100644 ---- a/utilities/ovn-nbctl.c -+++ b/utilities/ovn-nbctl.c -@@ -324,7 +324,7 @@ enum { - OPT_NO_SHUFFLE_REMOTES, - OPT_BOOTSTRAP_CA_CERT, - MAIN_LOOP_OPTION_ENUMS, -- DAEMON_OPTION_ENUMS, -+ OVN_DAEMON_OPTION_ENUMS, - VLOG_OPTION_ENUMS, - TABLE_OPTION_ENUMS, - SSL_OPTION_ENUMS, -@@ -428,7 +428,7 @@ get_all_options(void) - {"version", no_argument, NULL, 'V'}, - {"unixctl", required_argument, NULL, 'u'}, - MAIN_LOOP_LONG_OPTIONS, -- DAEMON_LONG_OPTIONS, -+ OVN_DAEMON_LONG_OPTIONS, - VLOG_LONG_OPTIONS, - STREAM_SSL_LONG_OPTIONS, - {"bootstrap-ca-cert", required_argument, NULL, OPT_BOOTSTRAP_CA_CERT}, -@@ -460,7 +460,7 @@ has_option(const struct ovs_cmdl_parsed_option *parsed_options, size_t n, - static bool - will_detach(const struct ovs_cmdl_parsed_option *parsed_options, size_t n) - { -- return has_option(parsed_options, n, OPT_DETACH); -+ return has_option(parsed_options, n, OVN_OPT_DETACH); - } - - static char * OVS_WARN_UNUSED_RESULT -@@ -547,7 +547,7 @@ apply_options_direct(const struct ovs_cmdl_parsed_option *parsed_options, - printf("DB Schema %s\n", nbrec_get_db_version()); - exit(EXIT_SUCCESS); - -- DAEMON_OPTION_HANDLERS -+ OVN_DAEMON_OPTION_HANDLERS - VLOG_OPTION_HANDLERS - TABLE_OPTION_HANDLERS(&table_style) - STREAM_SSL_OPTION_HANDLERS -@@ -6512,7 +6512,7 @@ nbctl_client(const char *socket_name, - case OPT_NO_SHUFFLE_REMOTES: - case OPT_BOOTSTRAP_CA_CERT: - STREAM_SSL_CASES -- DAEMON_OPTION_CASES -+ OVN_DAEMON_OPTION_CASES - VLOG_INFO("using ovn-nbctl daemon, ignoring %s option", - po->o->name); - break; -diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c -index eae9622d3..c9d72285c 100644 ---- a/utilities/ovn-trace.c -+++ b/utilities/ovn-trace.c -@@ -239,7 +239,7 @@ parse_options(int argc, char *argv[]) - OPT_CT, - OPT_FRIENDLY_NAMES, - OPT_NO_FRIENDLY_NAMES, -- DAEMON_OPTION_ENUMS, -+ OVN_DAEMON_OPTION_ENUMS, - SSL_OPTION_ENUMS, - VLOG_OPTION_ENUMS, - OPT_LB_DST, -@@ -260,7 +260,7 @@ parse_options(int argc, char *argv[]) - {"version", no_argument, NULL, 'V'}, - {"lb-dst", required_argument, NULL, OPT_LB_DST}, - {"select-id", required_argument, NULL, OPT_SELECT_ID}, -- DAEMON_LONG_OPTIONS, -+ OVN_DAEMON_LONG_OPTIONS, - VLOG_LONG_OPTIONS, - STREAM_SSL_LONG_OPTIONS, - {NULL, 0, NULL, 0}, -@@ -333,7 +333,7 @@ parse_options(int argc, char *argv[]) - printf("DB Schema %s\n", sbrec_get_db_version()); - exit(EXIT_SUCCESS); - -- DAEMON_OPTION_HANDLERS -+ OVN_DAEMON_OPTION_HANDLERS - VLOG_OPTION_HANDLERS - STREAM_SSL_OPTION_HANDLERS - --- -2.25.1 - diff --git a/SOURCES/0002-Honour-router_preference-for-solicited-RA.patch b/SOURCES/0002-Honour-router_preference-for-solicited-RA.patch deleted file mode 100644 index a73b677..0000000 --- a/SOURCES/0002-Honour-router_preference-for-solicited-RA.patch +++ /dev/null @@ -1,209 +0,0 @@ -From 17d3f4f18878ef706008575cd1565745c5936819 Mon Sep 17 00:00:00 2001 -From: Gabriele Cerami -Date: Sat, 13 Jun 2020 10:20:23 +0100 -Subject: [PATCH 2/3] Honour router_preference for solicited RA - -Replies to router solicitation follow a different flow than periodic RA. -This flow currently does not honour the router_preference configuration. - -This patch modifies the flow to honour the flag, and send -router-preference indications in the reply RA following RFC4191 -specifications - -Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1804576 -Signed-off-by: Gabriele Cerami -Signed-off-by: Numan Siddique ---- - AUTHORS.rst | 1 + - lib/actions.c | 29 +++++++++++++++++++++++++++-- - lib/ovn-l7.h | 5 +++++ - northd/ovn-northd.c | 6 ++++++ - tests/ovn.at | 26 ++++++++++++++++---------- - 5 files changed, 55 insertions(+), 12 deletions(-) - -diff --git a/AUTHORS.rst b/AUTHORS.rst -index c80fc1bae..bba0d1d6f 100644 ---- a/AUTHORS.rst -+++ b/AUTHORS.rst -@@ -145,6 +145,7 @@ Frédéric Tobias Christ fchrist@live.de - Frode Nordahl frode.nordahl@gmail.com - FUJITA Tomonori fujita.tomonori@lab.ntt.co.jp - Gabe Beged-Dov gabe@begeddov.com -+Gabriele Cerami gcerami@redhat.com - Gaetano Catalli gaetano.catalli@gmail.com - Gal Sagie gal.sagie@gmail.com - Genevieve LEsperance glesperance@pivotal.io -diff --git a/lib/actions.c b/lib/actions.c -index ee7ccae0d..3181126e6 100644 ---- a/lib/actions.c -+++ b/lib/actions.c -@@ -2484,6 +2484,12 @@ parse_put_nd_ra_opts(struct action_context *ctx, const struct expr_field *dst, - } - break; - -+ case ND_RA_FLAG_PRF: -+ ok = (c->string && (!strcmp(c->string, "MEDIUM") || -+ !strcmp(c->string, "HIGH") || -+ !strcmp(c->string, "LOW"))); -+ break; -+ - case ND_OPT_SOURCE_LINKADDR: - ok = c->format == LEX_F_ETHERNET; - slla_present = true; -@@ -2538,9 +2544,22 @@ encode_put_nd_ra_option(const struct ovnact_gen_option *o, - { - struct ovs_ra_msg *ra = ofpbuf_at(ofpacts, ra_offset, sizeof *ra); - if (!strcmp(c->string, "dhcpv6_stateful")) { -- ra->mo_flags = IPV6_ND_RA_FLAG_MANAGED_ADDR_CONFIG; -+ ra->mo_flags |= IPV6_ND_RA_FLAG_MANAGED_ADDR_CONFIG; - } else if (!strcmp(c->string, "dhcpv6_stateless")) { -- ra->mo_flags = IPV6_ND_RA_FLAG_OTHER_ADDR_CONFIG; -+ ra->mo_flags |= IPV6_ND_RA_FLAG_OTHER_ADDR_CONFIG; -+ } -+ break; -+ } -+ -+ case ND_RA_FLAG_PRF: -+ { -+ struct ovs_ra_msg *ra = ofpbuf_at(ofpacts, ra_offset, sizeof *ra); -+ if (!strcmp(c->string, "LOW")) { -+ ra->mo_flags |= IPV6_ND_RA_OPT_PRF_LOW; -+ } else if (!strcmp(c->string, "HIGH")) { -+ ra->mo_flags |= IPV6_ND_RA_OPT_PRF_HIGH; -+ } else { -+ ra->mo_flags |= IPV6_ND_RA_OPT_PRF_NORMAL; - } - break; - } -@@ -2621,6 +2640,12 @@ encode_PUT_ND_RA_OPTS(const struct ovnact_put_opts *po, - encode_put_nd_ra_option(o, ofpacts, ra_offset); - } - -+ /* RFC4191 section 2.2 */ -+ struct ovs_ra_msg *new_ra = ofpbuf_at(ofpacts, ra_offset, sizeof *new_ra); -+ if (ntohs(new_ra->router_lifetime) == 0) { -+ new_ra->mo_flags &= IPV6_ND_RA_OPT_PRF_RESET_MASK; -+ } -+ - encode_finish_controller_op(oc_offset, ofpacts); - } - -diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h -index cbea2a0c8..2da38fb65 100644 ---- a/lib/ovn-l7.h -+++ b/lib/ovn-l7.h -@@ -304,6 +304,9 @@ nd_ra_opts_destroy(struct hmap *nd_ra_opts) - - - #define ND_RA_FLAG_ADDR_MODE 0 -+/* all small numbers seems to be all already taken but nothing guarantees this -+ * code will not be assigned by IANA to another option */ -+#define ND_RA_FLAG_PRF 255 - - - /* Default values of various IPv6 Neighbor Discovery protocol options and -@@ -325,11 +328,13 @@ nd_ra_opts_destroy(struct hmap *nd_ra_opts) - #define IPV6_ND_RA_OPT_PRF_NORMAL 0x00 - #define IPV6_ND_RA_OPT_PRF_HIGH 0x08 - #define IPV6_ND_RA_OPT_PRF_LOW 0x18 -+#define IPV6_ND_RA_OPT_PRF_RESET_MASK 0xe7 - - static inline void - nd_ra_opts_init(struct hmap *nd_ra_opts) - { - nd_ra_opt_add(nd_ra_opts, "addr_mode", ND_RA_FLAG_ADDR_MODE, "str"); -+ nd_ra_opt_add(nd_ra_opts, "router_preference", ND_RA_FLAG_PRF, "str"); - nd_ra_opt_add(nd_ra_opts, "slla", ND_OPT_SOURCE_LINKADDR, "mac"); - nd_ra_opt_add(nd_ra_opts, "prefix", ND_OPT_PREFIX_INFORMATION, "ipv6"); - nd_ra_opt_add(nd_ra_opts, "mtu", ND_OPT_MTU, "uint32"); -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index c1b4c13b7..cffe3de17 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -9408,6 +9408,12 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - ds_put_format(&actions, ", mtu = %u", mtu); - } - -+ const char *prf = smap_get_def( -+ &op->nbrp->ipv6_ra_configs, "router_preference", "MEDIUM"); -+ if (strcmp(prf, "MEDIUM")) { -+ ds_put_format(&actions, ", router_preference = \"%s\"", prf); -+ } -+ - bool add_rs_response_flow = false; - - for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { -diff --git a/tests/ovn.at b/tests/ovn.at -index 57c1d90e4..d9df393d5 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -1346,14 +1346,14 @@ log(severity=notice); - Syntax error at `;' expecting verdict. - - # put_nd_ra_opts --reg1[0] = put_nd_ra_opts(addr_mode = "slaac", mtu = 1500, prefix = aef0::/64, slla = ae:01:02:03:04:05); -- encodes as controller(userdata=00.00.00.08.00.00.00.00.00.01.de.10.00.00.00.40.86.00.00.00.ff.00.ff.ff.00.00.00.00.00.00.00.00.05.01.00.00.00.00.05.dc.03.04.40.c0.ff.ff.ff.ff.ff.ff.ff.ff.00.00.00.00.ae.f0.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.01.ae.01.02.03.04.05,pause) -+reg1[0] = put_nd_ra_opts(addr_mode = "slaac", mtu = 1500, router_preference = "HIGH", prefix = aef0::/64, slla = ae:01:02:03:04:05); -+ encodes as controller(userdata=00.00.00.08.00.00.00.00.00.01.de.10.00.00.00.40.86.00.00.00.ff.08.ff.ff.00.00.00.00.00.00.00.00.05.01.00.00.00.00.05.dc.03.04.40.c0.ff.ff.ff.ff.ff.ff.ff.ff.00.00.00.00.ae.f0.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.01.ae.01.02.03.04.05,pause) - has prereqs ip6 --reg1[0] = put_nd_ra_opts(addr_mode = "dhcpv6_stateful", slla = ae:01:02:03:04:10, mtu = 1450); -+reg1[0] = put_nd_ra_opts(addr_mode = "dhcpv6_stateful", router_preference = "MEDIUM", slla = ae:01:02:03:04:10, mtu = 1450); - encodes as controller(userdata=00.00.00.08.00.00.00.00.00.01.de.10.00.00.00.40.86.00.00.00.ff.80.ff.ff.00.00.00.00.00.00.00.00.01.01.ae.01.02.03.04.10.05.01.00.00.00.00.05.aa,pause) - has prereqs ip6 --reg1[0] = put_nd_ra_opts(addr_mode = "dhcpv6_stateless", slla = ae:01:02:03:04:06, prefix = aef0::/64); -- encodes as controller(userdata=00.00.00.08.00.00.00.00.00.01.de.10.00.00.00.40.86.00.00.00.ff.40.ff.ff.00.00.00.00.00.00.00.00.01.01.ae.01.02.03.04.06.03.04.40.c0.ff.ff.ff.ff.ff.ff.ff.ff.00.00.00.00.ae.f0.00.00.00.00.00.00.00.00.00.00.00.00.00.00,pause) -+reg1[0] = put_nd_ra_opts(addr_mode = "dhcpv6_stateless", router_preference = "LOW", slla = ae:01:02:03:04:06, prefix = aef0::/64); -+ encodes as controller(userdata=00.00.00.08.00.00.00.00.00.01.de.10.00.00.00.40.86.00.00.00.ff.58.ff.ff.00.00.00.00.00.00.00.00.01.01.ae.01.02.03.04.06.03.04.40.c0.ff.ff.ff.ff.ff.ff.ff.ff.00.00.00.00.ae.f0.00.00.00.00.00.00.00.00.00.00.00.00.00.00,pause) - has prereqs ip6 - reg1[0] = put_nd_ra_opts(addr_mode = "slaac", mtu = 1500, prefix = aef0::/64); - slla option not present -@@ -10083,13 +10083,16 @@ reset_pcap_file hv1-vif1 hv1/vif1 - reset_pcap_file hv1-vif2 hv1/vif2 - reset_pcap_file hv1-vif3 hv1/vif3 - --# Set the MTU to 1500 -+# Set the MTU to 1500, send_periodic to false, preference to LOW - ovn-nbctl --wait=hv set Logical_Router_Port lrp0 ipv6_ra_configs:mtu=1500 -+ovn-nbctl --wait=hv set Logical_Router_Port lrp0 ipv6_ra_configs:send_periodic="false" -+ovn-nbctl --wait=hv set Logical_Router_Port lrp0 ipv6_ra_configs:router_preference="LOW" - - # Make sure that ovn-controller has installed the corresponding OF Flow. - OVS_WAIT_UNTIL([test 1 = `as hv1 ovs-ofctl dump-flows br-int | grep -c "ipv6_dst=ff02::2,nw_ttl=255,icmp_type=133,icmp_code=0"`]) - --addr_mode=00 -+# addr_mode byte also includes router preference information -+addr_mode=18 - default_prefix_option_config=030440c0ffffffffffffffff00000000 - src_mac=fa163e000003 - src_lla=fe80000000000000f8163efffe000003 -@@ -10114,12 +10117,14 @@ reset_pcap_file hv1-vif1 hv1/vif1 - reset_pcap_file hv1-vif2 hv1/vif2 - reset_pcap_file hv1-vif3 hv1/vif3 - --# Set the address mode to dhcpv6_stateful -+# Set the address mode to dhcpv6_stateful, router_preference to HIGH - ovn-nbctl --wait=hv set Logical_Router_Port lrp0 ipv6_ra_configs:address_mode=dhcpv6_stateful -+ovn-nbctl --wait=hv set Logical_Router_Port lrp0 ipv6_ra_configs:router_preference="HIGH" - # Make sure that ovn-controller has installed the corresponding OF Flow. - OVS_WAIT_UNTIL([test 1 = `as hv1 ovs-ofctl dump-flows br-int | grep -c "ipv6_dst=ff02::2,nw_ttl=255,icmp_type=133,icmp_code=0"`]) - --addr_mode=80 -+# addr_mode byte also includes router preference information -+addr_mode=88 - default_prefix_option_config=03044080ffffffffffffffff00000000 - src_mac=fa163e000004 - src_lla=fe80000000000000f8163efffe000004 -@@ -10144,8 +10149,9 @@ reset_pcap_file hv1-vif1 hv1/vif1 - reset_pcap_file hv1-vif2 hv1/vif2 - reset_pcap_file hv1-vif3 hv1/vif3 - --# Set the address mode to dhcpv6_stateless -+# Set the address mode to dhcpv6_stateless, reset router preference to default - ovn-nbctl --wait=hv set Logical_Router_Port lrp0 ipv6_ra_configs:address_mode=dhcpv6_stateless -+ovn-nbctl --wait=hv set Logical_Router_Port lrp0 ipv6_ra_configs:router_preference="MEDIUM" - # Make sure that ovn-controller has installed the corresponding OF Flow. - OVS_WAIT_UNTIL([test 1 = `as hv1 ovs-ofctl dump-flows br-int | grep -c "ipv6_dst=ff02::2,nw_ttl=255,icmp_type=133,icmp_code=0"`]) - --- -2.26.2 - diff --git a/SOURCES/0002-Revert-Manage-ARP-process-locally-in-a-DVR-scenario.patch b/SOURCES/0002-Revert-Manage-ARP-process-locally-in-a-DVR-scenario.patch deleted file mode 100644 index d6bd9d1..0000000 --- a/SOURCES/0002-Revert-Manage-ARP-process-locally-in-a-DVR-scenario.patch +++ /dev/null @@ -1,186 +0,0 @@ -From d9ed450713eda62af1bec5009694b2d206c9f435 Mon Sep 17 00:00:00 2001 -Message-Id: -From: Lorenzo Bianconi -Date: Mon, 25 May 2020 23:55:06 +0200 -Subject: [PATCH ovn 1/3] Revert "Manage ARP process locally in a DVR scenario" - -This reverts commit c0bf32d72f8b893bbe3cb64912b0fd259d71555f. - -Signed-off-by: Lorenzo Bianconi -Signed-off-by: Han Zhou ---- - northd/ovn-northd.8.xml | 37 ++-------------------------- - northd/ovn-northd.c | 53 +---------------------------------------- - tests/ovn.at | 14 ----------- - 3 files changed, 3 insertions(+), 101 deletions(-) - ---- a/northd/ovn-northd.8.xml -+++ b/northd/ovn-northd.8.xml -@@ -2486,44 +2486,11 @@ output; - -

        • -

          -- For distributed logical routers where one of the logical router ports -- specifies a redirect-chassis, a priority-400 logical -- flow for each dnat_and_snat NAT rules configured. -- These flows will allow to properly forward traffic to the external -- connections if available and avoid sending it through the tunnel. -- Assuming the following NAT rule has been configured: --

          -- --
          --external_ip = A;
          --external_mac = B;
          --logical_ip = C;
          --        
          -- --

          -- the following action will be applied: --

          -- --
          --ip.ttl--;
          --reg0 = ip.dst;
          --reg1 = A;
          --eth.src = B;
          --outport = router-port;
          --next;
          --        
          -- --
        • -- --
        • --

          - IPv4 routing table. For each route to IPv4 network N with - netmask M, on router port P with IP address - A and Ethernet - address E, a logical flow with match ip4.dst == -- N/M, whose priority is 400 -- + the number of 1-bits in M if the router port is not a -- distributed gateway port, else the priority is the number of -+ N/M, whose priority is the number of - 1-bits in M, has the following actions: -

          - -@@ -2910,7 +2877,7 @@ icmp4 { -
            -
          • - For each NAT rule in the OVN Northbound database that can -- be handled in a distributed manner, a priority-200 logical -+ be handled in a distributed manner, a priority-100 logical - flow with match ip4.src == B && - outport == GW, where GW is - the logical router distributed gateway port, with actions ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -7102,8 +7102,6 @@ build_routing_policy_flow(struct hmap *l - ds_destroy(&actions); - } - --/* default logical flow prioriry for distributed routes */ --#define DROUTE_PRIO 400 - struct parsed_route { - struct ovs_list list_node; - struct v46_ip prefix; -@@ -7492,40 +7490,6 @@ build_ecmp_route_flow(struct hmap *lflow - } - - static void --add_distributed_routes(struct hmap *lflows, struct ovn_datapath *od) --{ -- struct ds actions = DS_EMPTY_INITIALIZER; -- struct ds match = DS_EMPTY_INITIALIZER; -- -- for (size_t i = 0; i < od->nbr->n_nat; i++) { -- const struct nbrec_nat *nat = od->nbr->nat[i]; -- -- if (strcmp(nat->type, "dnat_and_snat") || -- !nat->external_mac) { -- continue; -- } -- -- bool is_ipv4 = strchr(nat->logical_ip, '.') ? true : false; -- ds_put_format(&match, "ip%s.src == %s && is_chassis_resident(\"%s\")", -- is_ipv4 ? "4" : "6", nat->logical_ip, -- nat->logical_port); -- char *prefix = is_ipv4 ? "" : "xx"; -- ds_put_format(&actions, "outport = %s; eth.src = %s; " -- "%sreg0 = ip%s.dst; %sreg1 = %s; next;", -- od->l3dgw_port->json_key, nat->external_mac, -- prefix, is_ipv4 ? "4" : "6", -- prefix, nat->external_ip); -- ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, DROUTE_PRIO, -- ds_cstr(&match), ds_cstr(&actions)); -- ds_clear(&match); -- ds_clear(&actions); -- } -- -- ds_destroy(&actions); -- ds_destroy(&match); --} -- --static void - add_route(struct hmap *lflows, const struct ovn_port *op, - const char *lrp_addr_s, const char *network_s, int plen, - const char *gateway, bool is_src_route, -@@ -7546,12 +7510,6 @@ add_route(struct hmap *lflows, const str - } - build_route_match(op_inport, network_s, plen, is_src_route, is_ipv4, - &match, &priority); -- /* traffic for internal IPs of logical switch ports must be sent to -- * the gw controller through the overlay tunnels -- */ -- if (op->nbrp && !op->nbrp->n_gateway_chassis) { -- priority += DROUTE_PRIO; -- } - - struct ds actions = DS_EMPTY_INITIALIZER; - ds_put_format(&actions, "ip.ttl--; "REG_ECMP_GROUP_ID" = 0; %sreg0 = ", -@@ -9187,7 +9145,7 @@ build_lrouter_flows(struct hmap *datapat - nat->logical_ip, - od->l3dgw_port->json_key); - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, -- 200, ds_cstr(&match), "next;", -+ 100, ds_cstr(&match), "next;", - &nat->header_); - } - -@@ -9493,15 +9451,6 @@ build_lrouter_flows(struct hmap *datapat - ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;"); - } - -- /* Logical router ingress table IP_ROUTING - IP routing for distributed -- * logical router -- */ -- HMAP_FOR_EACH (od, key_node, datapaths) { -- if (od->nbr && od->l3dgw_port) { -- add_distributed_routes(lflows, od); -- } -- } -- - /* Logical router ingress table IP_ROUTING & IP_ROUTING_ECMP: IP Routing. - * - * A packet that arrives at this table is an IP packet that should be ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -9605,20 +9605,6 @@ AT_CHECK([as hv3 ovs-vsctl set Open_vSwi - OVS_WAIT_UNTIL([test 1 = `as hv3 ovs-vsctl show | \ - grep "Port patch-br-int-to-ln_port" | wc -l`]) - --AT_CHECK([test 1 = `ovn-sbctl dump-flows lr0 | grep lr_in_ip_routing | \ --grep "ip4.src == 10.0.0.3 && is_chassis_resident(\"foo1\")" -c`]) --AT_CHECK([test 1 = `ovn-sbctl dump-flows lr0 | grep lr_in_ip_routing | \ --grep "ip4.src == 10.0.0.4 && is_chassis_resident(\"foo2\")" -c`]) -- --key=`ovn-sbctl --bare --columns tunnel_key list datapath_Binding lr0` --# Check that the OVS flows appear for the dnat_and_snat entries in --# lr_in_ip_routing table. --OVS_WAIT_UNTIL([test 1 = `as hv3 ovs-ofctl dump-flows br-int table=17 | \ --grep "priority=400,ip,metadata=0x$key,nw_src=10.0.0.3" -c`]) -- --OVS_WAIT_UNTIL([test 1 = `as hv3 ovs-ofctl dump-flows br-int table=17 | \ --grep "priority=400,ip,metadata=0x$key,nw_src=10.0.0.4" -c`]) -- - # Re-add nat-addresses option - ovn-nbctl lsp-set-options lrp0-rp router-port=lrp0 nat-addresses="router" - diff --git a/SOURCES/0002-chassis-Fix-chassis_private-record-updates-when-the-.patch b/SOURCES/0002-chassis-Fix-chassis_private-record-updates-when-the-.patch new file mode 100644 index 0000000..b9e4333 --- /dev/null +++ b/SOURCES/0002-chassis-Fix-chassis_private-record-updates-when-the-.patch @@ -0,0 +1,274 @@ +From 59103ff841797ad399e1679bfecfb6256bb6a0c4 Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Thu, 3 Sep 2020 17:04:01 +0200 +Subject: [PATCH 2/2] chassis: Fix chassis_private record updates when the + system-id changes. + +Also: +- Change conditional monitoring for Chassis_Private to use the chassis uuid + instead of chassis name. Using the chassis->name field does not work + because this is the old value of the field and would cause ovsdb-server + to inform ovn-controller that the updated Chassis_Private record was + "deleted" because it doesn't match the monitor condition anymore. +- Allow ovn-sbctl to access Chassis_Private records by name. + +Reported-at: https://bugzilla.redhat.com/1873032 +Reported-by: Ying Xu +CC: Han Zhou +Fixes: 4adc10f58127 ("Avoid nb_cfg update notification flooding") +Signed-off-by: Dumitru Ceara +Signed-off-by: Numan Siddique +(cherry picked from upstream commit dce1af31b550a9fb57b01cbe0b4139b6768f2521) + +Change-Id: Ic5f7f0b820b43715e1f1cf68b215374daf237fd5 +--- + controller/chassis.c | 92 +++++++++++++++++++++++++++++++++++++++------ + controller/chassis.h | 2 + + controller/ovn-controller.c | 13 ++++--- + tests/ovn-controller.at | 18 +++++++++ + utilities/ovn-sbctl.c | 3 ++ + 5 files changed, 112 insertions(+), 16 deletions(-) + +diff --git a/controller/chassis.c b/controller/chassis.c +index 773d966..8231169 100644 +--- a/controller/chassis.c ++++ b/controller/chassis.c +@@ -633,6 +633,77 @@ chassis_update(const struct sbrec_chassis *chassis_rec, + return true; + } + ++/* ++ * Returns a pointer to a chassis_private record from 'chassis_pvt_table' that ++ * matches the chassis record. ++ */ ++static const struct sbrec_chassis_private * ++chassis_private_get_stale_record( ++ const struct sbrec_chassis_private_table *chassis_pvt_table, ++ const struct sbrec_chassis *chassis) ++{ ++ const struct sbrec_chassis_private *chassis_pvt_rec; ++ ++ SBREC_CHASSIS_PRIVATE_TABLE_FOR_EACH (chassis_pvt_rec, chassis_pvt_table) { ++ if (chassis_pvt_rec->chassis == chassis) { ++ return chassis_pvt_rec; ++ } ++ } ++ ++ return NULL; ++} ++ ++/* If this is a chassis_private config update after we initialized the record ++ * once then we should always be able to find it with the ID we saved in ++ * chassis_state. ++ * Otherwise (i.e., first time we created the chassis record or if the ++ * system-id changed) then we check if there's a stale record from a previous ++ * controller run that didn't end gracefully and reuse it. If not then we ++ * create a new record. ++ * ++ * Returns the local chassis record. ++ */ ++static const struct sbrec_chassis_private * ++chassis_private_get_record( ++ struct ovsdb_idl_txn *ovnsb_idl_txn, ++ struct ovsdb_idl_index *sbrec_chassis_pvt_by_name, ++ const struct sbrec_chassis_private_table *chassis_pvt_table, ++ const struct sbrec_chassis *chassis) ++{ ++ const struct sbrec_chassis_private *chassis_p = NULL; ++ ++ if (chassis_info_id_inited(&chassis_state)) { ++ chassis_p = ++ chassis_private_lookup_by_name(sbrec_chassis_pvt_by_name, ++ chassis_info_id(&chassis_state)); ++ } ++ ++ if (!chassis_p) { ++ chassis_p = chassis_private_get_stale_record(chassis_pvt_table, ++ chassis); ++ } ++ ++ if (!chassis_p && ovnsb_idl_txn) { ++ return sbrec_chassis_private_insert(ovnsb_idl_txn); ++ } ++ ++ return chassis_p; ++} ++ ++static void ++chassis_private_update(const struct sbrec_chassis_private *chassis_pvt, ++ const struct sbrec_chassis *chassis, ++ const char *chassis_id) ++{ ++ if (!chassis_pvt->name || strcmp(chassis_pvt->name, chassis_id)) { ++ sbrec_chassis_private_set_name(chassis_pvt, chassis_id); ++ } ++ ++ if (chassis_pvt->chassis != chassis) { ++ sbrec_chassis_private_set_chassis(chassis_pvt, chassis); ++ } ++} ++ + /* Returns this chassis's Chassis record, if it is available. */ + const struct sbrec_chassis * + chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn, +@@ -640,6 +711,7 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn, + struct ovsdb_idl_index *sbrec_chassis_private_by_name, + const struct ovsrec_open_vswitch_table *ovs_table, + const struct sbrec_chassis_table *chassis_table, ++ const struct sbrec_chassis_private_table *chassis_pvt_table, + const char *chassis_id, + const struct ovsrec_bridge *br_int, + const struct sset *transport_zones, +@@ -668,7 +740,6 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn, + bool updated = chassis_update(chassis_rec, ovnsb_idl_txn, &ovs_cfg, + chassis_id, transport_zones); + +- chassis_info_set_id(&chassis_state, chassis_id); + if (!existed || updated) { + ovsdb_idl_txn_add_comment(ovnsb_idl_txn, + "ovn-controller: %s chassis '%s'", +@@ -676,17 +747,16 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn, + chassis_id); + } + +- const struct sbrec_chassis_private *chassis_private_rec = +- chassis_private_lookup_by_name(sbrec_chassis_private_by_name, +- chassis_id); +- if (!chassis_private_rec && ovnsb_idl_txn) { +- chassis_private_rec = sbrec_chassis_private_insert(ovnsb_idl_txn); +- sbrec_chassis_private_set_name(chassis_private_rec, +- chassis_id); +- sbrec_chassis_private_set_chassis(chassis_private_rec, +- chassis_rec); ++ *chassis_private = ++ chassis_private_get_record(ovnsb_idl_txn, ++ sbrec_chassis_private_by_name, ++ chassis_pvt_table, chassis_rec); ++ ++ if (*chassis_private) { ++ chassis_private_update(*chassis_private, chassis_rec, chassis_id); + } +- *chassis_private = chassis_private_rec; ++ ++ chassis_info_set_id(&chassis_state, chassis_id); + } + + ovs_chassis_cfg_destroy(&ovs_cfg); +diff --git a/controller/chassis.h b/controller/chassis.h +index 81055b4..220f726 100644 +--- a/controller/chassis.h ++++ b/controller/chassis.h +@@ -26,6 +26,7 @@ struct ovsrec_bridge; + struct ovsrec_open_vswitch_table; + struct sbrec_chassis; + struct sbrec_chassis_table; ++struct sbrec_chassis_private_table; + struct sset; + struct eth_addr; + struct smap; +@@ -37,6 +38,7 @@ const struct sbrec_chassis *chassis_run( + struct ovsdb_idl_index *sbrec_chassis_private_by_name, + const struct ovsrec_open_vswitch_table *, + const struct sbrec_chassis_table *, ++ const struct sbrec_chassis_private_table *, + const char *chassis_id, const struct ovsrec_bridge *br_int, + const struct sset *transport_zones, + const struct sbrec_chassis_private **chassis_private); +diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c +index 28ca7a8..6aeeb15 100644 +--- a/controller/ovn-controller.c ++++ b/controller/ovn-controller.c +@@ -179,7 +179,7 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, + * chassis */ + sbrec_port_binding_add_clause_type(&pb, OVSDB_F_EQ, "chassisredirect"); + sbrec_port_binding_add_clause_type(&pb, OVSDB_F_EQ, "external"); +- if (chassis) { ++ if (chassis && !sbrec_chassis_is_new(chassis)) { + /* This should be mostly redundant with the other clauses for port + * bindings, but it allows us to catch any ports that are assigned to + * us but should not be. That way, we can clear their chassis +@@ -202,9 +202,9 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, + sbrec_igmp_group_add_clause_chassis(&igmp, OVSDB_F_EQ, + &chassis->header_.uuid); + +- /* Monitors Chassis_Private record for current chassis only */ +- sbrec_chassis_private_add_clause_name(&chprv, OVSDB_F_EQ, +- chassis->name); ++ /* Monitors Chassis_Private record for current chassis only. */ ++ sbrec_chassis_private_add_clause_chassis(&chprv, OVSDB_F_EQ, ++ &chassis->header_.uuid); + } else { + /* During initialization, we monitor all records in Chassis_Private so + * that we don't try to recreate existing ones. */ +@@ -2402,6 +2402,8 @@ main(int argc, char *argv[]) + ovsrec_open_vswitch_table_get(ovs_idl_loop.idl); + const struct sbrec_chassis_table *chassis_table = + sbrec_chassis_table_get(ovnsb_idl_loop.idl); ++ const struct sbrec_chassis_private_table *chassis_pvt_table = ++ sbrec_chassis_private_table_get(ovnsb_idl_loop.idl); + const struct ovsrec_bridge *br_int = + process_br_int(ovs_idl_txn, bridge_table, ovs_table); + const char *chassis_id = get_ovs_chassis_id(ovs_table); +@@ -2410,7 +2412,8 @@ main(int argc, char *argv[]) + if (chassis_id) { + chassis = chassis_run(ovnsb_idl_txn, sbrec_chassis_by_name, + sbrec_chassis_private_by_name, +- ovs_table, chassis_table, chassis_id, ++ ovs_table, chassis_table, ++ chassis_pvt_table, chassis_id, + br_int, &transport_zones, + &chassis_private); + } +diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at +index f2faf1f..812946b 100644 +--- a/tests/ovn-controller.at ++++ b/tests/ovn-controller.at +@@ -195,6 +195,15 @@ OVS_WAIT_UNTIL([ + chassis_id=$(ovn-sbctl get Chassis "${sysid}" name) + test "${sysid}" = "${chassis_id}" + ]) ++OVS_WAIT_UNTIL([ ++ chassis_id=$(ovn-sbctl get Chassis_Private "${sysid}" name) ++ test "${sysid}" = "${chassis_id}" ++]) ++ ++# Only one Chassis_Private record should exist. ++OVS_WAIT_UNTIL([ ++ test $(ovn-sbctl --columns _uuid list chassis_private | wc -l) -eq 1 ++]) + + # Simulate system-id changing while ovn-controller is disconnected from the + # SB. +@@ -212,6 +221,15 @@ OVS_WAIT_UNTIL([ + chassis_id=$(ovn-sbctl get Chassis "${sysid}" name) + test "${sysid}" = "${chassis_id}" + ]) ++OVS_WAIT_UNTIL([ ++ chassis_id=$(ovn-sbctl get Chassis_Private "${sysid}" name) ++ test "${sysid}" = "${chassis_id}" ++]) ++ ++# Only one Chassis_Private record should exist. ++OVS_WAIT_UNTIL([ ++ test $(ovn-sbctl --columns _uuid list chassis_private | wc -l) -eq 1 ++]) + + # Gracefully terminate daemons + OVN_CLEANUP_SBOX([hv]) +diff --git a/utilities/ovn-sbctl.c b/utilities/ovn-sbctl.c +index 04e082c..d3eec91 100644 +--- a/utilities/ovn-sbctl.c ++++ b/utilities/ovn-sbctl.c +@@ -1391,6 +1391,9 @@ cmd_set_ssl(struct ctl_context *ctx) + static const struct ctl_table_class tables[SBREC_N_TABLES] = { + [SBREC_TABLE_CHASSIS].row_ids[0] = {&sbrec_chassis_col_name, NULL, NULL}, + ++ [SBREC_TABLE_CHASSIS_PRIVATE].row_ids[0] ++ = {&sbrec_chassis_private_col_name, NULL, NULL}, ++ + [SBREC_TABLE_DATAPATH_BINDING].row_ids + = {{&sbrec_datapath_binding_col_external_ids, "name", NULL}, + {&sbrec_datapath_binding_col_external_ids, "name2", NULL}, +-- +1.8.3.1 + diff --git a/SOURCES/0002-controller-Add-garbage-collector-for-ipv6_prefixd.patch b/SOURCES/0002-controller-Add-garbage-collector-for-ipv6_prefixd.patch deleted file mode 100644 index 7b48c56..0000000 --- a/SOURCES/0002-controller-Add-garbage-collector-for-ipv6_prefixd.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 85b13870020facf5f4441df51df64ee647c6abd9 Mon Sep 17 00:00:00 2001 -Message-Id: <85b13870020facf5f4441df51df64ee647c6abd9.1588608928.git.lorenzo.bianconi@redhat.com> -In-Reply-To: <0b9d16670d5561d8300d2448cbd4686a3acdc57e.1588608928.git.lorenzo.bianconi@redhat.com> -References: <0b9d16670d5561d8300d2448cbd4686a3acdc57e.1588608928.git.lorenzo.bianconi@redhat.com> -From: Lorenzo Bianconi -Date: Wed, 29 Apr 2020 18:05:30 +0200 -Subject: [PATCH 2/3] controller: Add garbage collector for ipv6_prefixd. - -Introduce a garbage collector for stale entries in ipv6_prefixd that are -no longer managed by the controller (e.g. if the processing has been -disabled setting ipv6_prefix_delegation to false on all logical router -ports). - -Tested-by: Jianlin Shi -Signed-off-by: Lorenzo Bianconi -Signed-off-by: Numan Siddique ---- - controller/pinctrl.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/controller/pinctrl.c b/controller/pinctrl.c -index 3230bb386..f0d63b9a6 100644 ---- a/controller/pinctrl.c -+++ b/controller/pinctrl.c -@@ -578,6 +578,7 @@ enum { - - struct ipv6_prefixd_state { - long long int next_announce; -+ long long int last_used; - struct in6_addr ipv6_addr; - struct eth_addr ea; - struct eth_addr cmac; -@@ -1128,11 +1129,13 @@ fill_ipv6_prefix_state(struct ovsdb_idl_txn *ovnsb_idl_txn, - sbrec_port_binding_set_options(pb, &options); - smap_destroy(&options); - } -+ pfd->last_used = time_msec(); - } - - return changed; - } - -+#define IPV6_PREFIXD_STALE_TIMEOUT 180000LL - static void - prepare_ipv6_prefixd(struct ovsdb_idl_txn *ovnsb_idl_txn, - struct ovsdb_idl_index *sbrec_port_binding_by_name, -@@ -1210,6 +1213,15 @@ prepare_ipv6_prefixd(struct ovsdb_idl_txn *ovnsb_idl_txn, - } - } - -+ struct shash_node *iter, *next; -+ SHASH_FOR_EACH_SAFE (iter, next, &ipv6_prefixd) { -+ struct ipv6_prefixd_state *pfd = iter->data; -+ if (pfd->last_used + IPV6_PREFIXD_STALE_TIMEOUT < time_msec()) { -+ free(pfd); -+ shash_delete(&ipv6_prefixd, iter); -+ } -+ } -+ - if (changed) { - notify_pinctrl_handler(); - } --- -2.26.2 - diff --git a/SOURCES/0002-controller-Add-ipv6-prefix-delegation-state-machine.patch b/SOURCES/0002-controller-Add-ipv6-prefix-delegation-state-machine.patch deleted file mode 100644 index b715219..0000000 --- a/SOURCES/0002-controller-Add-ipv6-prefix-delegation-state-machine.patch +++ /dev/null @@ -1,934 +0,0 @@ -From aa0139f28628bb869866e4c35cb31f8005b99994 Mon Sep 17 00:00:00 2001 -Message-Id: -In-Reply-To: <2e84aada0b45d2f8739c2fdbc351098fc1c09c26.1586727203.git.lorenzo.bianconi@redhat.com> -References: <2e84aada0b45d2f8739c2fdbc351098fc1c09c26.1586727203.git.lorenzo.bianconi@redhat.com> -From: Lorenzo Bianconi -Date: Wed, 1 Apr 2020 18:37:30 +0200 -Subject: [PATCH 2/3] controller: Add ipv6 prefix delegation state machine - -Introduce IPv6 Prefix delegation state machine according to RFC 3633 -https://tools.ietf.org/html/rfc3633. -Add handle_dhcpv6_reply controller action to parse advertise/reply from -IPv6 delegation server. Advertise/reply are parsed running respectively: -- pinctrl_parse_dhcv6_advt -- pinctrl_parse_dhcv6_reply -The IPv6 requesting router starts sending dhcpv6 solicit through the logical -router port marked with ipv6_prefix_delegation set to true. -An IPv6 prefix will be requested for each logical router port marked -with "prefix" set to true in option column of logical router port table. -Save IPv6 prefix received by IPv6 delegation router in the options columns of -SB port binding table in order to be reused by Router Advertisement framework -run by ovn logical router pipeline. -IPv6 Prefix delegation state machine is enabled on Gateway Router or on -a Gateway Router Port - -Signed-off-by: Lorenzo Bianconi -Signed-off-by: Numan Siddique ---- - controller/pinctrl.c | 651 ++++++++++++++++++++++++++++++++++++++++++ - include/ovn/actions.h | 8 +- - lib/actions.c | 16 ++ - lib/ovn-l7.h | 19 ++ - ovn-sb.xml | 18 ++ - tests/ovn.at | 4 + - utilities/ovn-trace.c | 2 + - 7 files changed, 717 insertions(+), 1 deletion(-) - -diff --git a/controller/pinctrl.c b/controller/pinctrl.c -index 3fa8923e7..a053938ec 100644 ---- a/controller/pinctrl.c -+++ b/controller/pinctrl.c -@@ -270,6 +270,8 @@ static void pinctrl_ip_mcast_handle( - const struct match *md, - struct ofpbuf *userdata); - -+static void init_ipv6_prefixd(void); -+ - static bool may_inject_pkts(void); - - static void init_put_vport_bindings(void); -@@ -313,6 +315,13 @@ static void pinctrl_compose_ipv6(struct dp_packet *packet, - uint8_t ip_proto, uint8_t ttl, - uint16_t ip_payload_len); - -+static void -+put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits, -+ struct ofpbuf *ofpacts); -+ -+static void notify_pinctrl_main(void); -+static void notify_pinctrl_handler(void); -+ - COVERAGE_DEFINE(pinctrl_drop_put_mac_binding); - COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map); - COVERAGE_DEFINE(pinctrl_drop_controller_event); -@@ -470,6 +479,7 @@ pinctrl_init(void) - init_put_mac_bindings(); - init_send_garps_rarps(); - init_ipv6_ras(); -+ init_ipv6_prefixd(); - init_buffered_packets_map(); - init_event_table(); - ip_mcast_snoop_init(); -@@ -557,6 +567,634 @@ set_actions_and_enqueue_msg(struct rconn *swconn, - ofpbuf_uninit(&ofpacts); - } - -+static struct shash ipv6_prefixd; -+ -+enum { -+ PREFIX_SOLICIT, -+ PREFIX_REQUEST, -+ PREFIX_PENDING, -+ PREFIX_DONE, -+}; -+ -+struct ipv6_prefixd_state { -+ long long int next_announce; -+ struct in6_addr ipv6_addr; -+ struct eth_addr ea; -+ struct eth_addr cmac; -+ int64_t port_key; -+ int64_t metadata; -+ struct in6_addr prefix; -+ uint32_t plife_time; -+ uint32_t vlife_time; -+ uint32_t aid; -+ uint32_t t1; -+ uint32_t t2; -+ uint32_t plen; -+ int state; -+}; -+ -+static void -+init_ipv6_prefixd(void) -+{ -+ shash_init(&ipv6_prefixd); -+} -+ -+static void -+destroy_ipv6_prefixd(void) -+{ -+ struct shash_node *iter, *next; -+ SHASH_FOR_EACH_SAFE (iter, next, &ipv6_prefixd) { -+ struct ipv6_prefixd_state *pfd = iter->data; -+ free(pfd); -+ shash_delete(&ipv6_prefixd, iter); -+ } -+ shash_destroy(&ipv6_prefixd); -+} -+ -+static struct ipv6_prefixd_state * -+pinctrl_find_prefixd_state(const struct flow *ip_flow, unsigned aid) -+{ -+ struct shash_node *iter; -+ -+ SHASH_FOR_EACH (iter, &ipv6_prefixd) { -+ struct ipv6_prefixd_state *pfd = iter->data; -+ if (IN6_ARE_ADDR_EQUAL(&pfd->ipv6_addr, &ip_flow->ipv6_dst) && -+ eth_addr_equals(pfd->ea, ip_flow->dl_dst) && -+ pfd->aid == aid) { -+ return pfd; -+ } -+ } -+ return NULL; -+} -+ -+static void -+pinctrl_parse_dhcpv6_advt(struct rconn *swconn, const struct flow *ip_flow, -+ struct dp_packet *pkt_in, const struct match *md) -+{ -+ struct udp_header *udp_in = dp_packet_l4(pkt_in); -+ size_t dlen = MIN(ntohs(udp_in->udp_len), dp_packet_l4_size(pkt_in)); -+ unsigned char *in_dhcpv6_data = (unsigned char *)(udp_in + 1); -+ uint8_t *data, *end = (uint8_t *)udp_in + dlen; -+ int len = 0, aid = 0; -+ -+ data = xmalloc(dlen); -+ /* skip DHCPv6 common header */ -+ in_dhcpv6_data += 4; -+ while (in_dhcpv6_data < end) { -+ struct dhcpv6_opt_header *in_opt = -+ (struct dhcpv6_opt_header *)in_dhcpv6_data; -+ int opt_len = sizeof *in_opt + ntohs(in_opt->len); -+ -+ if (dlen < opt_len + len) { -+ goto out; -+ } -+ -+ switch (ntohs(in_opt->code)) { -+ case DHCPV6_OPT_IA_PD: { -+ struct dhcpv6_opt_ia_na *ia_na = (struct dhcpv6_opt_ia_na *)in_opt; -+ int orig_len = len, hdr_len = 0, size = sizeof *in_opt + 12; -+ -+ aid = ntohl(ia_na->iaid); -+ memcpy(&data[len], in_opt, size); -+ in_opt = (struct dhcpv6_opt_header *)(in_dhcpv6_data + size); -+ len += size; -+ -+ while (size < opt_len) { -+ int flen = sizeof *in_opt + ntohs(in_opt->len); -+ -+ if (dlen < flen + len) { -+ goto out; -+ } -+ -+ if (ntohs(in_opt->code) == DHCPV6_OPT_IA_PREFIX) { -+ memcpy(&data[len], in_opt, flen); -+ hdr_len += flen; -+ len += flen; -+ } -+ if (ntohs(in_opt->code) == DHCPV6_OPT_STATUS_CODE) { -+ struct dhcpv6_opt_status *status; -+ -+ status = (struct dhcpv6_opt_status *)in_opt; -+ if (ntohs(status->status_code)) { -+ goto out; -+ } -+ } -+ size += flen; -+ in_opt = (struct dhcpv6_opt_header *)(in_dhcpv6_data + size); -+ } -+ in_opt = (struct dhcpv6_opt_header *)&data[orig_len]; -+ in_opt->len = htons(hdr_len + 12); -+ break; -+ } -+ case DHCPV6_OPT_SERVER_ID_CODE: -+ case DHCPV6_OPT_CLIENT_ID_CODE: -+ memcpy(&data[len], in_opt, opt_len); -+ len += opt_len; -+ break; -+ default: -+ break; -+ } -+ in_dhcpv6_data += opt_len; -+ } -+ -+ struct ipv6_prefixd_state *pfd = pinctrl_find_prefixd_state(ip_flow, aid); -+ if (!pfd) { -+ goto out; -+ } -+ -+ pfd->state = PREFIX_REQUEST; -+ -+ uint64_t packet_stub[256 / 8]; -+ struct dp_packet packet; -+ -+ dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); -+ eth_compose(&packet, ip_flow->dl_src, ip_flow->dl_dst, ETH_TYPE_IPV6, -+ IPV6_HEADER_LEN); -+ -+ struct udp_header *udp_h = compose_ipv6(&packet, IPPROTO_UDP, -+ &ip_flow->ipv6_dst, -+ &ip_flow->ipv6_src, 0, 0, 255, -+ len + UDP_HEADER_LEN + 4); -+ udp_h->udp_len = htons(len + UDP_HEADER_LEN + 4); -+ udp_h->udp_csum = 0; -+ packet_set_udp_port(&packet, htons(546), htons(547)); -+ -+ unsigned char *dhcp_hdr = (unsigned char *)(udp_h + 1); -+ *dhcp_hdr = DHCPV6_MSG_TYPE_REQUEST; -+ memcpy(dhcp_hdr + 4, data, len); -+ -+ uint32_t csum = packet_csum_pseudoheader6(dp_packet_l3(&packet)); -+ csum = csum_continue(csum, udp_h, dp_packet_size(&packet) - -+ ((const unsigned char *)udp_h - -+ (const unsigned char *)dp_packet_eth(&packet))); -+ udp_h->udp_csum = csum_finish(csum); -+ if (!udp_h->udp_csum) { -+ udp_h->udp_csum = htons(0xffff); -+ } -+ -+ if (ip_flow->vlans[0].tci & htons(VLAN_CFI)) { -+ eth_push_vlan(&packet, htons(ETH_TYPE_VLAN_8021Q), -+ ip_flow->vlans[0].tci); -+ } -+ -+ uint64_t ofpacts_stub[4096 / 8]; -+ struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); -+ enum ofp_version version = rconn_get_version(swconn); -+ put_load(ntohll(md->flow.metadata), MFF_LOG_DATAPATH, 0, 64, &ofpacts); -+ put_load(md->flow.regs[MFF_LOG_INPORT - MFF_REG0], MFF_LOG_OUTPORT, -+ 0, 32, &ofpacts); -+ struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); -+ resubmit->in_port = OFPP_CONTROLLER; -+ resubmit->table_id = OFTABLE_REMOTE_OUTPUT; -+ -+ struct ofputil_packet_out po = { -+ .packet = dp_packet_data(&packet), -+ .packet_len = dp_packet_size(&packet), -+ .buffer_id = UINT32_MAX, -+ .ofpacts = ofpacts.data, -+ .ofpacts_len = ofpacts.size, -+ }; -+ match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); -+ enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version); -+ queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); -+ dp_packet_uninit(&packet); -+ ofpbuf_uninit(&ofpacts); -+ -+out: -+ free(data); -+} -+ -+static void -+pinctrl_prefixd_state_handler(const struct flow *ip_flow, -+ struct in6_addr addr, unsigned aid, -+ char prefix_len, unsigned t1, unsigned t2, -+ unsigned plife_time, unsigned vlife_time) -+{ -+ struct ipv6_prefixd_state *pfd; -+ -+ pfd = pinctrl_find_prefixd_state(ip_flow, aid); -+ if (pfd) { -+ pfd->state = PREFIX_PENDING; -+ pfd->plife_time = plife_time; -+ pfd->vlife_time = vlife_time; -+ pfd->plen = prefix_len; -+ pfd->prefix = addr; -+ pfd->t1 = t1; -+ pfd->t2 = t2; -+ notify_pinctrl_main(); -+ } -+} -+ -+static void -+pinctrl_parse_dhcpv6_reply(struct dp_packet *pkt_in, -+ const struct flow *ip_flow) -+ OVS_REQUIRES(pinctrl_mutex) -+{ -+ struct udp_header *udp_in = dp_packet_l4(pkt_in); -+ unsigned char *in_dhcpv6_data = (unsigned char *)(udp_in + 1); -+ size_t dlen = MIN(ntohs(udp_in->udp_len), dp_packet_l4_size(pkt_in)); -+ unsigned t1 = 0, t2 = 0, vlife_time = 0, plife_time = 0; -+ uint8_t *end = (uint8_t *)udp_in + dlen; -+ uint8_t prefix_len = 0; -+ struct in6_addr ipv6; -+ bool status = false; -+ unsigned aid = 0; -+ -+ memset(&ipv6, 0, sizeof (struct in6_addr)); -+ /* skip DHCPv6 common header */ -+ in_dhcpv6_data += 4; -+ while (in_dhcpv6_data < end) { -+ struct dhcpv6_opt_header *in_opt = -+ (struct dhcpv6_opt_header *)in_dhcpv6_data; -+ int opt_len = sizeof *in_opt + ntohs(in_opt->len); -+ -+ if (in_dhcpv6_data + opt_len > end) { -+ break; -+ } -+ -+ switch (ntohs(in_opt->code)) { -+ case DHCPV6_OPT_IA_PD: { -+ int size = sizeof *in_opt + 12; -+ in_opt = (struct dhcpv6_opt_header *)(in_dhcpv6_data + size); -+ struct dhcpv6_opt_ia_na *ia_na = -+ (struct dhcpv6_opt_ia_na *)in_dhcpv6_data; -+ -+ aid = ntohl(ia_na->iaid); -+ t1 = ntohl(ia_na->t1); -+ t2 = ntohl(ia_na->t2); -+ if (t1 > t2 && t2 > 0) { -+ break; -+ } -+ -+ while (size < opt_len) { -+ if (ntohs(in_opt->code) == DHCPV6_OPT_IA_PREFIX) { -+ struct dhcpv6_opt_ia_prefix *ia_hdr = -+ (struct dhcpv6_opt_ia_prefix *)(in_dhcpv6_data + size); -+ -+ prefix_len = ia_hdr->plen; -+ plife_time = ntohl(ia_hdr->plife_time); -+ vlife_time = ntohl(ia_hdr->vlife_time); -+ memcpy(&ipv6, &ia_hdr->ipv6, sizeof (struct in6_addr)); -+ } -+ if (ntohs(in_opt->code) == DHCPV6_OPT_STATUS_CODE) { -+ struct dhcpv6_opt_status *status_hdr; -+ -+ status_hdr = (struct dhcpv6_opt_status *)in_opt; -+ status = ntohs(status_hdr->status_code) == 0; -+ } -+ size += sizeof *in_opt + ntohs(in_opt->len); -+ in_opt = (struct dhcpv6_opt_header *)(in_dhcpv6_data + size); -+ } -+ break; -+ } -+ default: -+ break; -+ } -+ in_dhcpv6_data += opt_len; -+ } -+ if (status) { -+ pinctrl_prefixd_state_handler(ip_flow, ipv6, aid, prefix_len, -+ t1, t2, plife_time, vlife_time); -+ } -+} -+ -+static void -+pinctrl_handle_dhcp6_server(struct rconn *swconn, const struct flow *ip_flow, -+ struct dp_packet *pkt_in, const struct match *md) -+ OVS_REQUIRES(pinctrl_mutex) -+{ -+ if (ip_flow->dl_type != htons(ETH_TYPE_IPV6) || -+ ip_flow->nw_proto != IPPROTO_UDP) { -+ return; -+ } -+ -+ struct udp_header *udp_in = dp_packet_l4(pkt_in); -+ unsigned char *dhcp_hdr = (unsigned char *)(udp_in + 1); -+ -+ switch (*dhcp_hdr) { -+ case DHCPV6_MSG_TYPE_ADVT: -+ pinctrl_parse_dhcpv6_advt(swconn, ip_flow, pkt_in, md); -+ break; -+ case DHCPV6_MSG_TYPE_REPLY: -+ pinctrl_parse_dhcpv6_reply(pkt_in, ip_flow); -+ break; -+ default: -+ break; -+ } -+} -+ -+static void -+compose_prefixd_solicit(struct dp_packet *b, -+ struct ipv6_prefixd_state *pfd, -+ const struct eth_addr eth_dst, -+ const struct in6_addr *ipv6_dst) -+{ -+ eth_compose(b, eth_dst, pfd->ea, ETH_TYPE_IPV6, IPV6_HEADER_LEN); -+ -+ int payload = sizeof(struct dhcpv6_opt_server_id) + -+ sizeof(struct dhcpv6_opt_ia_na); -+ if (ipv6_addr_is_set(&pfd->prefix)) { -+ payload += sizeof(struct dhcpv6_opt_ia_prefix); -+ } -+ int len = UDP_HEADER_LEN + 4 + payload; -+ struct udp_header *udp_h = compose_ipv6(b, IPPROTO_UDP, &pfd->ipv6_addr, -+ ipv6_dst, 0, 0, 255, len); -+ udp_h->udp_len = htons(len); -+ udp_h->udp_csum = 0; -+ packet_set_udp_port(b, htons(546), htons(547)); -+ -+ unsigned char *dhcp_hdr = (unsigned char *)(udp_h + 1); -+ *dhcp_hdr = DHCPV6_MSG_TYPE_SOLICIT; -+ -+ struct dhcpv6_opt_server_id *opt_client_id = -+ (struct dhcpv6_opt_server_id *)(dhcp_hdr + 4); -+ opt_client_id->opt.code = htons(DHCPV6_OPT_CLIENT_ID_CODE); -+ opt_client_id->opt.len = htons(sizeof(struct dhcpv6_opt_server_id) - -+ sizeof(struct dhcpv6_opt_header)); -+ opt_client_id->duid_type = htons(DHCPV6_DUID_LL); -+ opt_client_id->hw_type = htons(DHCPV6_HW_TYPE_ETH); -+ opt_client_id->mac = pfd->cmac; -+ -+ if (!ipv6_addr_is_set(&pfd->prefix)) { -+ pfd->aid = random_uint16(); -+ } -+ struct dhcpv6_opt_ia_na *ia_pd = -+ (struct dhcpv6_opt_ia_na *)(opt_client_id + 1); -+ ia_pd->opt.code = htons(DHCPV6_OPT_IA_PD); -+ int opt_len = sizeof(struct dhcpv6_opt_ia_na) - -+ sizeof(struct dhcpv6_opt_header); -+ if (ipv6_addr_is_set(&pfd->prefix)) { -+ opt_len += sizeof(struct dhcpv6_opt_ia_prefix); -+ } -+ ia_pd->opt.len = htons(opt_len); -+ ia_pd->iaid = htonl(pfd->aid); -+ ia_pd->t1 = OVS_BE32_MAX; -+ ia_pd->t2 = OVS_BE32_MAX; -+ if (ipv6_addr_is_set(&pfd->prefix)) { -+ struct dhcpv6_opt_ia_prefix *ia_prefix = -+ (struct dhcpv6_opt_ia_prefix *)(ia_pd + 1); -+ ia_prefix->opt.code = htons(DHCPV6_OPT_IA_PREFIX); -+ ia_prefix->opt.len = htons(sizeof(struct dhcpv6_opt_ia_prefix) - -+ sizeof(struct dhcpv6_opt_header)); -+ ia_prefix->plife_time = OVS_BE32_MAX; -+ ia_prefix->vlife_time = OVS_BE32_MAX; -+ ia_prefix->plen = pfd->plen; -+ ia_prefix->ipv6 = pfd->prefix; -+ } -+ -+ uint32_t csum = packet_csum_pseudoheader6(dp_packet_l3(b)); -+ csum = csum_continue(csum, udp_h, dp_packet_size(b) - -+ ((const unsigned char *)udp_h - -+ (const unsigned char *)dp_packet_eth(b))); -+ udp_h->udp_csum = csum_finish(csum); -+ if (!udp_h->udp_csum) { -+ udp_h->udp_csum = htons(0xffff); -+ } -+} -+ -+#define IPV6_PREFIXD_TIMEOUT 3000LL -+static long long int -+ipv6_prefixd_send(struct rconn *swconn, struct ipv6_prefixd_state *pfd) -+{ -+ long long int cur_time = time_msec(); -+ if (cur_time < pfd->next_announce) { -+ return pfd->next_announce; -+ } -+ -+ uint64_t packet_stub[256 / 8]; -+ struct dp_packet packet; -+ -+ struct eth_addr eth_dst; -+ eth_dst = (struct eth_addr) ETH_ADDR_C(33,33,00,01,00,02); -+ struct in6_addr ipv6_dst; -+ ipv6_parse("ff02::1:2", &ipv6_dst); -+ -+ dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); -+ compose_prefixd_solicit(&packet, pfd, eth_dst, &ipv6_dst); -+ -+ uint64_t ofpacts_stub[4096 / 8]; -+ struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); -+ -+ /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */ -+ uint32_t dp_key = pfd->metadata; -+ uint32_t port_key = pfd->port_key; -+ put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts); -+ put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts); -+ put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts); -+ struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); -+ resubmit->in_port = OFPP_CONTROLLER; -+ resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE; -+ -+ struct ofputil_packet_out po = { -+ .packet = dp_packet_data(&packet), -+ .packet_len = dp_packet_size(&packet), -+ .buffer_id = UINT32_MAX, -+ .ofpacts = ofpacts.data, -+ .ofpacts_len = ofpacts.size, -+ }; -+ -+ match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); -+ enum ofp_version version = rconn_get_version(swconn); -+ enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version); -+ queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); -+ dp_packet_uninit(&packet); -+ ofpbuf_uninit(&ofpacts); -+ pfd->next_announce = cur_time + random_range(IPV6_PREFIXD_TIMEOUT); -+ pfd->state = PREFIX_SOLICIT; -+ -+ return pfd->next_announce; -+} -+ -+static bool ipv6_prefixd_should_inject(void) -+{ -+ struct shash_node *iter; -+ -+ SHASH_FOR_EACH (iter, &ipv6_prefixd) { -+ struct ipv6_prefixd_state *pfd = iter->data; -+ if (pfd->state == PREFIX_SOLICIT) { -+ return true; -+ } -+ if (pfd->state && pfd->next_announce < time_msec()) { -+ return true; -+ } -+ } -+ return false; -+} -+ -+static void -+ipv6_prefixd_wait(long long int timeout) -+{ -+ if (ipv6_prefixd_should_inject()) { -+ poll_timer_wait_until(timeout); -+ } -+} -+ -+static void -+send_ipv6_prefixd(struct rconn *swconn, long long int *send_prefixd_time) -+ OVS_REQUIRES(pinctrl_mutex) -+{ -+ struct shash_node *iter; -+ -+ *send_prefixd_time = LLONG_MAX; -+ SHASH_FOR_EACH (iter, &ipv6_prefixd) { -+ struct ipv6_prefixd_state *pfd = iter->data; -+ long long int next_msg = ipv6_prefixd_send(swconn, pfd); -+ if (*send_prefixd_time > next_msg) { -+ *send_prefixd_time = next_msg; -+ } -+ } -+} -+ -+static bool -+fill_ipv6_prefix_state(struct ovsdb_idl_txn *ovnsb_idl_txn, -+ const struct local_datapath *ld, -+ struct eth_addr ea, struct in6_addr ipv6_addr, -+ int64_t tunnel_key, int64_t dp_tunnel_key) -+ OVS_REQUIRES(pinctrl_mutex) -+{ -+ bool changed = false; -+ -+ for (size_t i = 0; i < ld->n_peer_ports; i++) { -+ const struct sbrec_port_binding *pb = ld->peer_ports[i].local; -+ struct ipv6_prefixd_state *pfd; -+ -+ if (!smap_get_bool(&pb->options, "ipv6_prefix", false)) { -+ pfd = shash_find_and_delete(&ipv6_prefixd, pb->logical_port); -+ if (pfd) { -+ free(pfd); -+ } -+ continue; -+ } -+ -+ struct lport_addresses c_addrs; -+ for (size_t j = 0; j < pb->n_mac; j++) { -+ if (extract_lsp_addresses(pb->mac[j], &c_addrs)) { -+ break; -+ } -+ } -+ -+ pfd = shash_find_data(&ipv6_prefixd, pb->logical_port); -+ if (!pfd) { -+ pfd = xzalloc(sizeof *pfd); -+ pfd->ipv6_addr = ipv6_addr; -+ pfd->ea = ea; -+ pfd->cmac = c_addrs.ea; -+ pfd->metadata = dp_tunnel_key; -+ pfd->port_key = tunnel_key; -+ shash_add(&ipv6_prefixd, pb->logical_port, pfd); -+ pfd->next_announce = time_msec() + -+ random_range(IPV6_PREFIXD_TIMEOUT); -+ changed = true; -+ -+ char prefix_s[IPV6_SCAN_LEN + 6]; -+ const char *ipv6_pd_list = smap_get(&pb->options, -+ "ipv6_ra_pd_list"); -+ if (!ipv6_pd_list || -+ !ovs_scan(ipv6_pd_list, "%u:"IPV6_SCAN_FMT"/%d", -+ &pfd->aid, prefix_s, &pfd->plen) || -+ !ipv6_parse(prefix_s, &pfd->prefix)) { -+ pfd->prefix = in6addr_any; -+ } -+ } else if (pfd->state == PREFIX_PENDING && ovnsb_idl_txn) { -+ char prefix_str[INET6_ADDRSTRLEN + 1] = {}; -+ struct smap options; -+ -+ pfd->state = PREFIX_DONE; -+ pfd->next_announce = time_msec() + pfd->t1 * 1000; -+ ipv6_string_mapped(prefix_str, &pfd->prefix); -+ smap_clone(&options, &pb->options); -+ smap_add_format(&options, "ipv6_ra_pd_list", "%d:%s/%d", -+ pfd->aid, prefix_str, pfd->plen); -+ sbrec_port_binding_set_options(pb, &options); -+ smap_destroy(&options); -+ } -+ } -+ -+ return changed; -+} -+ -+static void -+prepare_ipv6_prefixd(struct ovsdb_idl_txn *ovnsb_idl_txn, -+ struct ovsdb_idl_index *sbrec_port_binding_by_name, -+ const struct hmap *local_datapaths, -+ const struct sbrec_chassis *chassis, -+ const struct sset *active_tunnels) -+ OVS_REQUIRES(pinctrl_mutex) -+{ -+ const struct local_datapath *ld; -+ bool changed = false; -+ -+ HMAP_FOR_EACH (ld, hmap_node, local_datapaths) { -+ if (datapath_is_switch(ld->datapath)) { -+ /* logical switch */ -+ continue; -+ } -+ -+ for (size_t i = 0; i < ld->n_peer_ports; i++) { -+ const struct sbrec_port_binding *pb = ld->peer_ports[i].local; -+ int j; -+ -+ if (!smap_get_bool(&pb->options, "ipv6_prefix_delegation", -+ false)) { -+ continue; -+ } -+ -+ const char *peer_s = smap_get(&pb->options, "peer"); -+ if (!peer_s) { -+ continue; -+ } -+ -+ const struct sbrec_port_binding *peer -+ = lport_lookup_by_name(sbrec_port_binding_by_name, peer_s); -+ if (!peer) { -+ continue; -+ } -+ -+ char *redirect_name = xasprintf("cr-%s", pb->logical_port); -+ bool resident = lport_is_chassis_resident( -+ sbrec_port_binding_by_name, chassis, active_tunnels, -+ redirect_name); -+ free(redirect_name); -+ if (!resident && strcmp(pb->type, "l3gateway")) { -+ continue; -+ } -+ -+ struct in6_addr ip6_addr; -+ struct eth_addr ea; -+ for (j = 0; j < pb->n_mac; j++) { -+ struct lport_addresses laddrs; -+ -+ if (!extract_lsp_addresses(pb->mac[j], &laddrs)) { -+ continue; -+ } -+ -+ ea = laddrs.ea; -+ if (laddrs.n_ipv6_addrs > 0) { -+ ip6_addr = laddrs.ipv6_addrs[0].addr; -+ break; -+ } -+ } -+ -+ if (eth_addr_is_zero(ea)) { -+ continue; -+ } -+ -+ if (j == pb->n_mac) { -+ in6_generate_lla(ea, &ip6_addr); -+ } -+ -+ changed |= fill_ipv6_prefix_state(ovnsb_idl_txn, ld, -+ ea, ip6_addr, -+ peer->tunnel_key, -+ peer->datapath->tunnel_key); -+ } -+ } -+ -+ if (changed) { -+ notify_pinctrl_handler(); -+ } -+} -+ - struct buffer_info { - struct ofpbuf ofpacts; - ofp_port_t ofp_port; -@@ -2012,6 +2650,12 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg) - pinctrl_handle_bind_vport(&pin.flow_metadata.flow, &userdata); - ovs_mutex_unlock(&pinctrl_mutex); - break; -+ case ACTION_OPCODE_DHCP6_SERVER: -+ ovs_mutex_lock(&pinctrl_mutex); -+ pinctrl_handle_dhcp6_server(swconn, &headers, &packet, -+ &pin.flow_metadata); -+ ovs_mutex_unlock(&pinctrl_mutex); -+ break; - - case ACTION_OPCODE_HANDLE_SVC_CHECK: - ovs_mutex_lock(&pinctrl_mutex); -@@ -2090,6 +2734,7 @@ pinctrl_handler(void *arg_) - /* Next multicast query (IGMP) in ms. */ - static long long int send_mcast_query_time = LLONG_MAX; - static long long int svc_monitors_next_run_time = LLONG_MAX; -+ static long long int send_prefixd_time = LLONG_MAX; - - swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP13_VERSION); - -@@ -2143,6 +2788,7 @@ pinctrl_handler(void *arg_) - ovs_mutex_lock(&pinctrl_mutex); - send_garp_rarp_run(swconn, &send_garp_rarp_time); - send_ipv6_ras(swconn, &send_ipv6_ra_time); -+ send_ipv6_prefixd(swconn, &send_prefixd_time); - send_mac_binding_buffered_pkts(swconn); - ovs_mutex_unlock(&pinctrl_mutex); - -@@ -2160,6 +2806,7 @@ pinctrl_handler(void *arg_) - ipv6_ra_wait(send_ipv6_ra_time); - ip_mcast_querier_wait(send_mcast_query_time); - svc_monitors_wait(svc_monitors_next_run_time); -+ ipv6_prefixd_wait(send_prefixd_time); - - new_seq = seq_read(pinctrl_handler_seq); - seq_wait(pinctrl_handler_seq, new_seq); -@@ -2211,6 +2858,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, - sbrec_port_binding_by_name, br_int, chassis, - local_datapaths, active_tunnels); - prepare_ipv6_ras(local_datapaths); -+ prepare_ipv6_prefixd(ovnsb_idl_txn, sbrec_port_binding_by_name, -+ local_datapaths, chassis, active_tunnels); - sync_dns_cache(dns_table); - controller_event_run(ovnsb_idl_txn, ce_table, chassis); - ip_mcast_sync(ovnsb_idl_txn, chassis, local_datapaths, -@@ -2757,6 +3406,7 @@ pinctrl_destroy(void) - free(pinctrl.br_int_name); - destroy_send_garps_rarps(); - destroy_ipv6_ras(); -+ destroy_ipv6_prefixd(); - destroy_buffered_packets_map(); - event_table_destroy(); - destroy_put_mac_bindings(); -@@ -4471,6 +5121,7 @@ may_inject_pkts(void) - { - return (!shash_is_empty(&ipv6_ras) || - !shash_is_empty(&send_garp_rarp_data) || -+ ipv6_prefixd_should_inject() || - !ovs_list_is_empty(&mcast_query_list) || - !ovs_list_is_empty(&buffered_mac_bindings)); - } -diff --git a/include/ovn/actions.h b/include/ovn/actions.h -index 9b014925b..e3dec99b2 100644 ---- a/include/ovn/actions.h -+++ b/include/ovn/actions.h -@@ -91,7 +91,8 @@ struct ovn_extend_table; - OVNACT(TRIGGER_EVENT, ovnact_controller_event) \ - OVNACT(BIND_VPORT, ovnact_bind_vport) \ - OVNACT(HANDLE_SVC_CHECK, ovnact_handle_svc_check) \ -- OVNACT(FWD_GROUP, ovnact_fwd_group) -+ OVNACT(FWD_GROUP, ovnact_fwd_group) \ -+ OVNACT(DHCP6_REPLY, ovnact_null) - - /* enum ovnact_type, with a member OVNACT_ for each action. */ - enum OVS_PACKED_ENUM ovnact_type { -@@ -577,6 +578,11 @@ enum action_opcode { - * MFF_LOG_INPORT = port - */ - ACTION_OPCODE_HANDLE_SVC_CHECK, -+ /* handle_dhcpv6_reply { ...actions ...}." -+ * -+ * The actions, in OpenFlow 1.3 format, follow the action_header. -+ */ -+ ACTION_OPCODE_DHCP6_SERVER, - }; - - /* Header. */ -diff --git a/lib/actions.c b/lib/actions.c -index 6351db765..02141af30 100644 ---- a/lib/actions.c -+++ b/lib/actions.c -@@ -2313,6 +2313,20 @@ ovnact_put_opts_free(struct ovnact_put_opts *pdo) - free_gen_options(pdo->options, pdo->n_options); - } - -+static void -+format_DHCP6_REPLY(const struct ovnact_null *a OVS_UNUSED, struct ds *s) -+{ -+ ds_put_cstr(s, "handle_dhcpv6_reply;"); -+} -+ -+static void -+encode_DHCP6_REPLY(const struct ovnact_null *a OVS_UNUSED, -+ const struct ovnact_encode_params *ep OVS_UNUSED, -+ struct ofpbuf *ofpacts) -+{ -+ encode_controller_op(ACTION_OPCODE_DHCP6_SERVER, ofpacts); -+} -+ - static void - parse_SET_QUEUE(struct action_context *ctx) - { -@@ -3260,6 +3274,8 @@ parse_action(struct action_context *ctx) - parse_handle_svc_check(ctx); - } else if (lexer_match_id(ctx->lexer, "fwd_group")) { - parse_fwd_group_action(ctx); -+ } else if (lexer_match_id(ctx->lexer, "handle_dhcpv6_reply")) { -+ ovnact_put_DHCP6_REPLY(ctx->ovnacts); - } else { - lexer_syntax_error(ctx->lexer, "expecting action"); - } -diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h -index 931e6ffcf..cbea2a0c8 100644 ---- a/lib/ovn-l7.h -+++ b/lib/ovn-l7.h -@@ -178,8 +178,11 @@ struct dhcp_opt6_header { - #define DHCPV6_OPT_SERVER_ID_CODE 2 - #define DHCPV6_OPT_IA_NA_CODE 3 - #define DHCPV6_OPT_IA_ADDR_CODE 5 -+#define DHCPV6_OPT_STATUS_CODE 13 - #define DHCPV6_OPT_DNS_SERVER_CODE 23 - #define DHCPV6_OPT_DOMAIN_SEARCH_CODE 24 -+#define DHCPV6_OPT_IA_PD 25 -+#define DHCPV6_OPT_IA_PREFIX 26 - - #define DHCPV6_OPT_SERVER_ID \ - DHCP_OPTION("server_id", DHCPV6_OPT_SERVER_ID_CODE, "mac") -@@ -258,6 +261,22 @@ struct ovs_nd_route_info { - }; - BUILD_ASSERT_DECL(ND_ROUTE_INFO_OPT_LEN == sizeof(struct ovs_nd_route_info)); - -+OVS_PACKED( -+struct dhcpv6_opt_ia_prefix { -+ struct dhcpv6_opt_header opt; -+ ovs_be32 plife_time; -+ ovs_be32 vlife_time; -+ uint8_t plen; -+ struct in6_addr ipv6; -+}); -+ -+OVS_PACKED( -+struct dhcpv6_opt_status { -+ struct dhcpv6_opt_header opt; -+ ovs_be16 status_code; -+ uint8_t msg[]; -+}); -+ - #define DHCPV6_DUID_LL 3 - #define DHCPV6_HW_TYPE_ETH 1 - -diff --git a/ovn-sb.xml b/ovn-sb.xml -index 3ae9d4f92..72466b97e 100644 ---- a/ovn-sb.xml -+++ b/ovn-sb.xml -@@ -2149,6 +2149,17 @@ tcp.flags = RST; -

            Example: handle_svc_check(inport);

            - - -+
            handle_dhcpv6_reply;
            -+
            -+

            -+ Handle DHCPv6 prefix delegation advertisements/replies from -+ a IPv6 delegation server. ovn-controller will -+ add an entry ipv6_ra_pd_list in the -+ table for each -+ prefix received from the delegation server -+

            -+
            -+ -
            R = select(N1[=W1], N2[=W2], ...);
            -
            -

            -@@ -2182,6 +2193,13 @@ tcp.flags = RST; -

            -
            - -+
            handle_dhcpv6_reply;
            -+
            -+

            -+ This action is used to parse DHCPv6 replies from IPv6 -+ Delegation Router and managed IPv6 Prefix delegation state machine -+

            -+
            - - - -diff --git a/tests/ovn.at b/tests/ovn.at -index e8554f60d..3bc435e6d 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -1525,6 +1525,10 @@ fwd_group(); - fwd_group(liveness="false", childports="eth0", "lsp1"); - Syntax error at `"false"' expecting `,'. - -+# prefix delegation -+handle_dhcpv6_reply; -+ encodes as controller(userdata=00.00.00.13.00.00.00.00) -+ - # Miscellaneous negative tests. - ; - Syntax error at `;'. -diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c -index 84e5f2b5c..e59698ec4 100644 ---- a/utilities/ovn-trace.c -+++ b/utilities/ovn-trace.c -@@ -2292,6 +2292,8 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len, - - case OVNACT_FWD_GROUP: - break; -+ case OVNACT_DHCP6_REPLY: -+ break; - } - } - ds_destroy(&s); --- -2.25.2 - diff --git a/SOURCES/0002-ovn-controller-Persist-the-conjunction-ids-allocated.patch b/SOURCES/0002-ovn-controller-Persist-the-conjunction-ids-allocated.patch new file mode 100644 index 0000000..6410d70 --- /dev/null +++ b/SOURCES/0002-ovn-controller-Persist-the-conjunction-ids-allocated.patch @@ -0,0 +1,745 @@ +From dc36d5ee7b510e9b258037be454460dea53c00a5 Mon Sep 17 00:00:00 2001 +From: Numan Siddique +Date: Tue, 18 Aug 2020 20:16:07 +0530 +Subject: [PATCH 2/2] ovn-controller: Persist the conjunction ids allocated for + conjuctive matches. + +For a logical flow which results in conjuctive OF matches, we are not persisting +the allocated conjunction ids for it. There are few side effects because of this. + - When a port group or address set gets modified, the logical flows which references + these port groups/address sets gets reprocessed again and the resulting OpenvSwitch flows + with conjunctive matches gets modified in the vswitchd if the conjunction id changes. + + - And because of this there is small probability of a packet getting dropped when the + OF flows gets updated with different conjunction ids. + +This patch fixes this issue by persisting the conjunction ids. Earlier, logical flow caching +support was added [1] to ovn-controller and then reverted [2] later due to some issues. + +This patch takes the lflow caching approach to persist the conjunction ids. But it only +creates the cache for logical flows which result in conjunctive matches. And it doesn't +cache the expr tree. + +The lflow caching is made configurable and enabled by default. Any user can disable caching +by setting 'ovn-enable-lflow-cache' to 'false' in the OVS db. + - ovs-vsctl set open . external_ids:ovn-enable-lflow-cache=false + +Note: Changing the option 'ovn-enable-lflow-cache' doesn't result in full recompute of +I-P engine. But ovn-controller updates the chassis.other_config column. And when it +receives the update2/update3 message, this results in full recompute (as any chassis +changes results in full recompute). This is the case with all the config options in OVS db. + +An upcoming patch series, will attempt to add back the expr caching (addressing all the issues.) + +[1] - 8795bec737b("ovn-controller: Cache logical flow expr tree for each lflow.) +[2] - 065bcf46218("ovn-controller: Revert lflow expr caching") + +Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1858878 +Acked-by: Mark Michelson +Signed-off-by: Numan Siddique + +(cherry-picked from upstream master commit 2662498bfd13ccb13defd1c800f449be7e271abe) +Conflicts: + controller/lflow.c + +Change-Id: I8a8de80e6bc3f7d9935266fbf466c2aab3e30c8c +--- + controller/chassis.c | 22 +++++- + controller/lflow.c | 118 ++++++++++++++++++++++++++++++- + controller/lflow.h | 8 ++- + controller/ovn-controller.c | 97 +++++++++++++++++++++++--- + tests/ovn.at | 135 ++++++++++++++++++++++++++++++++++++ + 5 files changed, 363 insertions(+), 17 deletions(-) + +diff --git a/controller/chassis.c b/controller/chassis.c +index 823116926..e77fb0849 100644 +--- a/controller/chassis.c ++++ b/controller/chassis.c +@@ -86,6 +86,7 @@ struct ovs_chassis_cfg { + const char *cms_options; + const char *monitor_all; + const char *chassis_macs; ++ const char *enable_lflow_cache; + + /* Set of encap types parsed from the 'ovn-encap-type' external-id. */ + struct sset encap_type_set; +@@ -165,6 +166,12 @@ get_monitor_all(const struct smap *ext_ids) + return smap_get_def(ext_ids, "ovn-monitor-all", "false"); + } + ++static const char * ++get_enable_lflow_cache(const struct smap *ext_ids) ++{ ++ return smap_get_def(ext_ids, "ovn-enable-lflow-cache", "true"); ++} ++ + static const char * + get_encap_csum(const struct smap *ext_ids) + { +@@ -284,6 +291,7 @@ chassis_parse_ovs_config(const struct ovsrec_open_vswitch_table *ovs_table, + ovs_cfg->cms_options = get_cms_options(&cfg->external_ids); + ovs_cfg->monitor_all = get_monitor_all(&cfg->external_ids); + ovs_cfg->chassis_macs = get_chassis_mac_mappings(&cfg->external_ids); ++ ovs_cfg->enable_lflow_cache = get_enable_lflow_cache(&cfg->external_ids); + + if (!chassis_parse_ovs_encap_type(encap_type, &ovs_cfg->encap_type_set)) { + return false; +@@ -310,12 +318,14 @@ static void + chassis_build_other_config(struct smap *config, const char *bridge_mappings, + const char *datapath_type, const char *cms_options, + const char *monitor_all, const char *chassis_macs, +- const char *iface_types, bool is_interconn) ++ const char *iface_types, ++ const char *enable_lflow_cache, bool is_interconn) + { + smap_replace(config, "ovn-bridge-mappings", bridge_mappings); + smap_replace(config, "datapath-type", datapath_type); + smap_replace(config, "ovn-cms-options", cms_options); + smap_replace(config, "ovn-monitor-all", monitor_all); ++ smap_replace(config, "ovn-enable-lflow-cache", enable_lflow_cache); + smap_replace(config, "iface-types", iface_types); + smap_replace(config, "ovn-chassis-mac-mappings", chassis_macs); + smap_replace(config, "is-interconn", is_interconn ? "true" : "false"); +@@ -330,6 +340,7 @@ chassis_other_config_changed(const char *bridge_mappings, + const char *cms_options, + const char *monitor_all, + const char *chassis_macs, ++ const char *enable_lflow_cache, + const struct ds *iface_types, + bool is_interconn, + const struct sbrec_chassis *chassis_rec) +@@ -362,6 +373,13 @@ chassis_other_config_changed(const char *bridge_mappings, + return true; + } + ++ const char *chassis_enable_lflow_cache = ++ get_enable_lflow_cache(&chassis_rec->other_config); ++ ++ if (strcmp(enable_lflow_cache, chassis_enable_lflow_cache)) { ++ return true; ++ } ++ + const char *chassis_mac_mappings = + get_chassis_mac_mappings(&chassis_rec->other_config); + if (strcmp(chassis_macs, chassis_mac_mappings)) { +@@ -586,6 +604,7 @@ chassis_update(const struct sbrec_chassis *chassis_rec, + ovs_cfg->cms_options, + ovs_cfg->monitor_all, + ovs_cfg->chassis_macs, ++ ovs_cfg->enable_lflow_cache, + &ovs_cfg->iface_types, + ovs_cfg->is_interconn, + chassis_rec)) { +@@ -598,6 +617,7 @@ chassis_update(const struct sbrec_chassis *chassis_rec, + ovs_cfg->monitor_all, + ovs_cfg->chassis_macs, + ds_cstr_ro(&ovs_cfg->iface_types), ++ ovs_cfg->enable_lflow_cache, + ovs_cfg->is_interconn); + sbrec_chassis_verify_other_config(chassis_rec); + sbrec_chassis_set_other_config(chassis_rec, &other_config); +diff --git a/controller/lflow.c b/controller/lflow.c +index 151561210..c2f939f90 100644 +--- a/controller/lflow.c ++++ b/controller/lflow.c +@@ -269,6 +269,70 @@ lflow_resource_destroy_lflow(struct lflow_resource_ref *lfrr, + free(lfrn); + } + ++/* Represents an lflow cache which ++ * - stores the conjunction id offset if the lflow matches ++ * results in conjunctive OpenvSwitch flows. ++ */ ++struct lflow_cache { ++ struct hmap_node node; ++ struct uuid lflow_uuid; /* key */ ++ uint32_t conj_id_ofs; ++}; ++ ++static struct lflow_cache * ++lflow_cache_add(struct hmap *lflow_cache_map, ++ const struct sbrec_logical_flow *lflow) ++{ ++ struct lflow_cache *lc = xmalloc(sizeof *lc); ++ lc->lflow_uuid = lflow->header_.uuid; ++ lc->conj_id_ofs = 0; ++ hmap_insert(lflow_cache_map, &lc->node, uuid_hash(&lc->lflow_uuid)); ++ return lc; ++} ++ ++static struct lflow_cache * ++lflow_cache_get(struct hmap *lflow_cache_map, ++ const struct sbrec_logical_flow *lflow) ++{ ++ struct lflow_cache *lc; ++ size_t hash = uuid_hash(&lflow->header_.uuid); ++ HMAP_FOR_EACH_WITH_HASH (lc, node, hash, lflow_cache_map) { ++ if (uuid_equals(&lc->lflow_uuid, &lflow->header_.uuid)) { ++ return lc; ++ } ++ } ++ ++ return NULL; ++} ++ ++static void ++lflow_cache_delete(struct hmap *lflow_cache_map, ++ const struct sbrec_logical_flow *lflow) ++{ ++ struct lflow_cache *lc = lflow_cache_get(lflow_cache_map, lflow); ++ if (lc) { ++ hmap_remove(lflow_cache_map, &lc->node); ++ free(lc); ++ } ++} ++ ++void ++lflow_cache_init(struct hmap *lflow_cache_map) ++{ ++ hmap_init(lflow_cache_map); ++} ++ ++void ++lflow_cache_destroy(struct hmap *lflow_cache_map) ++{ ++ struct lflow_cache *lc; ++ HMAP_FOR_EACH_POP (lc, node, lflow_cache_map) { ++ free(lc); ++ } ++ ++ hmap_destroy(lflow_cache_map); ++} ++ + /* Adds the logical flows from the Logical_Flow table to flow tables. */ + static void + add_logical_flows(struct lflow_ctx_in *l_ctx_in, +@@ -306,6 +370,7 @@ add_logical_flows(struct lflow_ctx_in *l_ctx_in, + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); + VLOG_ERR_RL(&rl, "Conjunction id overflow when processing lflow " + UUID_FMT, UUID_ARGS(&lflow->header_.uuid)); ++ l_ctx_out->conj_id_overflow = true; + } + } + +@@ -355,6 +420,9 @@ lflow_handle_changed_flows(struct lflow_ctx_in *l_ctx_in, + /* Delete entries from lflow resource reference. */ + lflow_resource_destroy_lflow(l_ctx_out->lfrr, + &lflow->header_.uuid); ++ if (l_ctx_out->lflow_cache_map) { ++ lflow_cache_delete(l_ctx_out->lflow_cache_map, lflow); ++ } + } + } + +@@ -382,6 +450,7 @@ lflow_handle_changed_flows(struct lflow_ctx_in *l_ctx_in, + &nd_ra_opts, &controller_event_opts, + l_ctx_in, l_ctx_out)) { + ret = false; ++ l_ctx_out->conj_id_overflow = true; + break; + } + } +@@ -467,6 +536,7 @@ lflow_handle_changed_ref(enum ref_type ref_type, const char *ref_name, + &nd_ra_opts, &controller_event_opts, + l_ctx_in, l_ctx_out)) { + ret = false; ++ l_ctx_out->conj_id_overflow = true; + break; + } + *changed = true; +@@ -652,13 +722,41 @@ consider_logical_flow(const struct sbrec_logical_flow *lflow, + ovnacts_free(ovnacts.data, ovnacts.size); + ofpbuf_uninit(&ovnacts); + ++ uint32_t conj_id_ofs = *l_ctx_out->conj_id_ofs; ++ if (n_conjs) { ++ if (l_ctx_out->lflow_cache_map) { ++ struct lflow_cache *lc = ++ lflow_cache_get(l_ctx_out->lflow_cache_map, lflow); ++ if (!lc) { ++ lc = lflow_cache_add(l_ctx_out->lflow_cache_map, lflow); ++ } ++ ++ if (!lc->conj_id_ofs) { ++ lc->conj_id_ofs = *l_ctx_out->conj_id_ofs; ++ if (!update_conj_id_ofs(l_ctx_out->conj_id_ofs, n_conjs)) { ++ lc->conj_id_ofs = 0; ++ expr_matches_destroy(&matches); ++ return false; ++ } ++ } ++ ++ conj_id_ofs = lc->conj_id_ofs; ++ } else { ++ /* lflow caching is disabled. */ ++ if (!update_conj_id_ofs(l_ctx_out->conj_id_ofs, n_conjs)) { ++ expr_matches_destroy(&matches); ++ return false; ++ } ++ } ++ } ++ + /* Prepare the OpenFlow matches for adding to the flow table. */ + struct expr_match *m; + HMAP_FOR_EACH (m, hmap_node, &matches) { + match_set_metadata(&m->match, + htonll(lflow->logical_datapath->tunnel_key)); + if (m->match.wc.masks.conj_id) { +- m->match.flow.conj_id += *l_ctx_out->conj_id_ofs; ++ m->match.flow.conj_id += conj_id_ofs; + } + if (datapath_is_switch(ldp)) { + unsigned int reg_index +@@ -693,7 +791,7 @@ consider_logical_flow(const struct sbrec_logical_flow *lflow, + struct ofpact_conjunction *dst; + + dst = ofpact_put_CONJUNCTION(&conj); +- dst->id = src->id + *l_ctx_out->conj_id_ofs; ++ dst->id = src->id + conj_id_ofs; + dst->clause = src->clause; + dst->n_clauses = src->n_clauses; + } +@@ -708,7 +806,7 @@ consider_logical_flow(const struct sbrec_logical_flow *lflow, + /* Clean up. */ + expr_matches_destroy(&matches); + ofpbuf_uninit(&ofpacts); +- return update_conj_id_ofs(l_ctx_out->conj_id_ofs, n_conjs); ++ return true; + } + + static void +@@ -858,6 +956,19 @@ lflow_run(struct lflow_ctx_in *l_ctx_in, struct lflow_ctx_out *l_ctx_out) + { + COVERAGE_INC(lflow_run); + ++ /* when lflow_run is called, it's possible that some of the logical flows ++ * are deleted. We need to delete the lflow cache for these ++ * lflows (if present), otherwise, they will not be deleted at all. */ ++ if (l_ctx_out->lflow_cache_map) { ++ const struct sbrec_logical_flow *lflow; ++ SBREC_LOGICAL_FLOW_TABLE_FOR_EACH_TRACKED (lflow, ++ l_ctx_in->logical_flow_table) { ++ if (sbrec_logical_flow_is_deleted(lflow)) { ++ lflow_cache_delete(l_ctx_out->lflow_cache_map, lflow); ++ } ++ } ++ } ++ + add_logical_flows(l_ctx_in, l_ctx_out); + add_neighbor_flows(l_ctx_in->sbrec_port_binding_by_name, + l_ctx_in->mac_binding_table, l_ctx_in->local_datapaths, +@@ -914,6 +1025,7 @@ lflow_add_flows_for_datapath(const struct sbrec_datapath_binding *dp, + &nd_ra_opts, &controller_event_opts, + l_ctx_in, l_ctx_out)) { + handled = false; ++ l_ctx_out->conj_id_overflow = true; + break; + } + } +diff --git a/controller/lflow.h b/controller/lflow.h +index ae02eaf5e..c66b318e9 100644 +--- a/controller/lflow.h ++++ b/controller/lflow.h +@@ -141,12 +141,13 @@ struct lflow_ctx_out { + struct ovn_extend_table *group_table; + struct ovn_extend_table *meter_table; + struct lflow_resource_ref *lfrr; +- struct hmap *lflow_expr_cache; ++ struct hmap *lflow_cache_map; + uint32_t *conj_id_ofs; ++ bool conj_id_overflow; + }; + + void lflow_init(void); +-void lflow_run(struct lflow_ctx_in *, struct lflow_ctx_out *); ++void lflow_run(struct lflow_ctx_in *, struct lflow_ctx_out *); + bool lflow_handle_changed_flows(struct lflow_ctx_in *, struct lflow_ctx_out *); + bool lflow_handle_changed_ref(enum ref_type, const char *ref_name, + struct lflow_ctx_in *, struct lflow_ctx_out *, +@@ -159,7 +160,8 @@ void lflow_handle_changed_neighbors( + + void lflow_destroy(void); + +-void lflow_expr_destroy(struct hmap *lflow_expr_cache); ++void lflow_cache_init(struct hmap *); ++void lflow_cache_destroy(struct hmap *); + + bool lflow_add_flows_for_datapath(const struct sbrec_datapath_binding *, + struct lflow_ctx_in *, +diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c +index 6aeeb15f4..3531b187f 100644 +--- a/controller/ovn-controller.c ++++ b/controller/ovn-controller.c +@@ -76,6 +76,7 @@ static unixctl_cb_func cluster_state_reset_cmd; + static unixctl_cb_func debug_pause_execution; + static unixctl_cb_func debug_resume_execution; + static unixctl_cb_func debug_status_execution; ++static unixctl_cb_func flush_lflow_cache; + + #define DEFAULT_BRIDGE_NAME "br-int" + #define DEFAULT_PROBE_INTERVAL_MSEC 5000 +@@ -462,7 +463,8 @@ get_ofctrl_probe_interval(struct ovsdb_idl *ovs_idl) + * updates 'sbdb_idl' with that pointer. */ + static void + update_sb_db(struct ovsdb_idl *ovs_idl, struct ovsdb_idl *ovnsb_idl, +- bool *monitor_all_p, bool *reset_ovnsb_idl_min_index) ++ bool *monitor_all_p, bool *reset_ovnsb_idl_min_index, ++ bool *enable_lflow_cache) + { + const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(ovs_idl); + if (!cfg) { +@@ -498,6 +500,11 @@ update_sb_db(struct ovsdb_idl *ovs_idl, struct ovsdb_idl *ovnsb_idl, + ovsdb_idl_reset_min_index(ovnsb_idl); + *reset_ovnsb_idl_min_index = false; + } ++ ++ if (enable_lflow_cache != NULL) { ++ *enable_lflow_cache = ++ smap_get_bool(&cfg->external_ids, "ovn-enable-lflow-cache", true); ++ } + } + + static void +@@ -788,6 +795,10 @@ enum ovs_engine_node { + OVS_NODES + #undef OVS_NODE + ++struct controller_engine_ctx { ++ bool enable_lflow_cache; ++}; ++ + struct ed_type_ofctrl_is_connected { + bool connected; + }; +@@ -1519,6 +1530,12 @@ physical_flow_changes_ovs_iface_handler(struct engine_node *node, void *data) + return true; + } + ++struct flow_output_persistent_data { ++ uint32_t conj_id_ofs; ++ struct hmap lflow_cache_map; ++ bool lflow_cache_enabled; ++}; ++ + struct ed_type_flow_output { + /* desired flows */ + struct ovn_desired_flow_table flow_table; +@@ -1526,10 +1543,12 @@ struct ed_type_flow_output { + struct ovn_extend_table group_table; + /* meter ids for QoS */ + struct ovn_extend_table meter_table; +- /* conjunction id offset */ +- uint32_t conj_id_ofs; + /* lflow resource cross reference */ + struct lflow_resource_ref lflow_resource_ref; ++ ++ /* Data which is persistent and not cleared during ++ * full recompute. */ ++ struct flow_output_persistent_data pd; + }; + + static void init_physical_ctx(struct engine_node *node, +@@ -1680,7 +1699,13 @@ static void init_lflow_ctx(struct engine_node *node, + l_ctx_out->group_table = &fo->group_table; + l_ctx_out->meter_table = &fo->meter_table; + l_ctx_out->lfrr = &fo->lflow_resource_ref; +- l_ctx_out->conj_id_ofs = &fo->conj_id_ofs; ++ l_ctx_out->conj_id_ofs = &fo->pd.conj_id_ofs; ++ if (fo->pd.lflow_cache_enabled) { ++ l_ctx_out->lflow_cache_map = &fo->pd.lflow_cache_map; ++ } else { ++ l_ctx_out->lflow_cache_map = NULL; ++ } ++ l_ctx_out->conj_id_overflow = false; + } + + static void * +@@ -1692,8 +1717,10 @@ en_flow_output_init(struct engine_node *node OVS_UNUSED, + ovn_desired_flow_table_init(&data->flow_table); + ovn_extend_table_init(&data->group_table); + ovn_extend_table_init(&data->meter_table); +- data->conj_id_ofs = 1; ++ data->pd.conj_id_ofs = 1; + lflow_resource_init(&data->lflow_resource_ref); ++ lflow_cache_init(&data->pd.lflow_cache_map); ++ data->pd.lflow_cache_enabled = true; + return data; + } + +@@ -1705,6 +1732,7 @@ en_flow_output_cleanup(void *data) + ovn_extend_table_destroy(&flow_output_data->group_table); + ovn_extend_table_destroy(&flow_output_data->meter_table); + lflow_resource_destroy(&flow_output_data->lflow_resource_ref); ++ lflow_cache_destroy(&flow_output_data->pd.lflow_cache_map); + } + + static void +@@ -1738,7 +1766,6 @@ en_flow_output_run(struct engine_node *node, void *data) + struct ovn_desired_flow_table *flow_table = &fo->flow_table; + struct ovn_extend_table *group_table = &fo->group_table; + struct ovn_extend_table *meter_table = &fo->meter_table; +- uint32_t *conj_id_ofs = &fo->conj_id_ofs; + struct lflow_resource_ref *lfrr = &fo->lflow_resource_ref; + + static bool first_run = true; +@@ -1751,12 +1778,39 @@ en_flow_output_run(struct engine_node *node, void *data) + lflow_resource_clear(lfrr); + } + +- *conj_id_ofs = 1; ++ struct controller_engine_ctx *ctrl_ctx = engine_get_context()->client_ctx; ++ if (fo->pd.lflow_cache_enabled && !ctrl_ctx->enable_lflow_cache) { ++ lflow_cache_destroy(&fo->pd.lflow_cache_map); ++ lflow_cache_init(&fo->pd.lflow_cache_map); ++ } ++ fo->pd.lflow_cache_enabled = ctrl_ctx->enable_lflow_cache; ++ ++ if (!fo->pd.lflow_cache_enabled) { ++ fo->pd.conj_id_ofs = 1; ++ } ++ + struct lflow_ctx_in l_ctx_in; + struct lflow_ctx_out l_ctx_out; + init_lflow_ctx(node, rt_data, fo, &l_ctx_in, &l_ctx_out); + lflow_run(&l_ctx_in, &l_ctx_out); + ++ if (l_ctx_out.conj_id_overflow) { ++ /* Conjunction ids overflow. There can be many holes in between. ++ * Destroy lflow cache and call lflow_run() again. */ ++ ovn_desired_flow_table_clear(flow_table); ++ ovn_extend_table_clear(group_table, false /* desired */); ++ ovn_extend_table_clear(meter_table, false /* desired */); ++ lflow_resource_clear(lfrr); ++ fo->pd.conj_id_ofs = 1; ++ lflow_cache_destroy(&fo->pd.lflow_cache_map); ++ lflow_cache_init(&fo->pd.lflow_cache_map); ++ l_ctx_out.conj_id_overflow = false; ++ lflow_run(&l_ctx_in, &l_ctx_out); ++ if (l_ctx_out.conj_id_overflow) { ++ VLOG_WARN("Conjunction id overflow."); ++ } ++ } ++ + struct physical_ctx p_ctx; + init_physical_ctx(node, rt_data, &p_ctx); + +@@ -2321,6 +2375,8 @@ main(int argc, char *argv[]) + + unixctl_command_register("recompute", "", 0, 0, engine_recompute_cmd, + NULL); ++ unixctl_command_register("flush-lflow-cache", "", 0, 0, flush_lflow_cache, ++ &flow_output_data->pd); + + bool reset_ovnsb_idl_min_index = false; + unixctl_command_register("sb-cluster-state-reset", "", 0, 0, +@@ -2338,6 +2394,10 @@ main(int argc, char *argv[]) + unsigned int ovs_cond_seqno = UINT_MAX; + unsigned int ovnsb_cond_seqno = UINT_MAX; + ++ struct controller_engine_ctx ctrl_engine_ctx = { ++ .enable_lflow_cache = true ++ }; ++ + /* Main loop. */ + exiting = false; + restart = false; +@@ -2366,7 +2426,8 @@ main(int argc, char *argv[]) + } + + update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl, &sb_monitor_all, +- &reset_ovnsb_idl_min_index); ++ &reset_ovnsb_idl_min_index, ++ &ctrl_engine_ctx.enable_lflow_cache); + update_ssl_config(ovsrec_ssl_table_get(ovs_idl_loop.idl)); + ofctrl_set_probe_interval(get_ofctrl_probe_interval(ovs_idl_loop.idl)); + +@@ -2384,7 +2445,8 @@ main(int argc, char *argv[]) + + struct engine_context eng_ctx = { + .ovs_idl_txn = ovs_idl_txn, +- .ovnsb_idl_txn = ovnsb_idl_txn ++ .ovnsb_idl_txn = ovnsb_idl_txn, ++ .client_ctx = &ctrl_engine_ctx + }; + + engine_set_context(&eng_ctx); +@@ -2624,7 +2686,8 @@ loop_done: + if (!restart) { + bool done = !ovsdb_idl_has_ever_connected(ovnsb_idl_loop.idl); + while (!done) { +- update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl, NULL, NULL); ++ update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl, ++ NULL, NULL, NULL); + update_ssl_config(ovsrec_ssl_table_get(ovs_idl_loop.idl)); + + struct ovsdb_idl_txn *ovs_idl_txn +@@ -2854,6 +2917,20 @@ engine_recompute_cmd(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED, + unixctl_command_reply(conn, NULL); + } + ++static void ++flush_lflow_cache(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED, ++ const char *argv[] OVS_UNUSED, void *arg_) ++{ ++ VLOG_INFO("User triggered lflow cache flush."); ++ struct flow_output_persistent_data *fo_pd = arg_; ++ lflow_cache_destroy(&fo_pd->lflow_cache_map); ++ lflow_cache_init(&fo_pd->lflow_cache_map); ++ fo_pd->conj_id_ofs = 1; ++ engine_set_force_recompute(true); ++ poll_immediate_wake(); ++ unixctl_command_reply(conn, NULL); ++} ++ + static void + cluster_state_reset_cmd(struct unixctl_conn *conn, int argc OVS_UNUSED, + const char *argv[] OVS_UNUSED, void *idl_reset_) +diff --git a/tests/ovn.at b/tests/ovn.at +index 7cc1756e1..5a1e59df5 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -21402,6 +21402,141 @@ OVN_CLEANUP([hv1],[hv2]) + + AT_CLEANUP + ++AT_SETUP([ovn -- lflow cache for conjunctions]) ++ovn_start ++net_add n1 ++sim_add hv1 ++ ++as hv1 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.1 ++ ++ovn-nbctl ls-add sw0 ++ovn-nbctl lsp-add sw0 sw0-p1 ++ovn-nbctl lsp-set-addresses sw0-p1 "10:14:00:00:00:03 10.0.0.3" ++ovn-nbctl lsp-set-port-security sw0-p1 "10:14:00:00:00:03 10.0.0.3" ++ ++ovn-nbctl lsp-add sw0 sw0-p2 ++ovn-nbctl lsp-set-addresses sw0-p2 "10:14:00:00:00:04 10.0.0.4" ++ovn-nbctl lsp-set-port-security sw0-p2 "10:14:00:00:00:04 10.0.0.4" ++ ++ovn-nbctl lsp-add sw0 sw0-p3 ++ovn-nbctl lsp-set-addresses sw0-p3 "10:14:00:00:00:05 10.0.0.5" ++ovn-nbctl lsp-set-port-security sw0-p3 "10:14:00:00:00:05 10.0.0.5" ++ ++ovn-nbctl lsp-add sw0 sw0-p4 ++ovn-nbctl lsp-set-addresses sw0-p4 "10:14:00:00:00:06 10.0.0.6" ++ovn-nbctl lsp-set-port-security sw0-p4 "10:14:00:00:00:06 10.0.0.6" ++ ++as hv1 ++ovs-vsctl -- add-port br-int hv1-vif1 -- \ ++ set interface hv1-vif1 external-ids:iface-id=sw0-p1 \ ++ options:tx_pcap=hv1/vif1-tx.pcap \ ++ options:rxq_pcap=hv1/vif1-rx.pcap \ ++ ofport-request=1 ++ovs-vsctl -- add-port br-int hv1-vif2 -- \ ++ set interface hv1-vif2 external-ids:iface-id=sw0-p2 \ ++ options:tx_pcap=hv1/vif2-tx.pcap \ ++ options:rxq_pcap=hv1/vif2-rx.pcap \ ++ ofport-request=2 ++ovs-vsctl -- add-port br-int hv1-vif3 -- \ ++ set interface hv1-vif3 external-ids:iface-id=sw0-p3 \ ++ options:tx_pcap=hv1/vif3-tx.pcap \ ++ options:rxq_pcap=hv1/vif3-rx.pcap \ ++ ofport-request=3 ++ovs-vsctl -- add-port br-int hv1-vif4 -- \ ++ set interface hv1-vif4 external-ids:iface-id=sw0-p4 \ ++ options:tx_pcap=hv1/vif4-tx.pcap \ ++ options:rxq_pcap=hv1/vif4-rx.pcap \ ++ ofport-request=4 ++ ++OVS_WAIT_UNTIL([test x$(ovn-nbctl lsp-get-up sw0-p1) = xup]) ++OVS_WAIT_UNTIL([test x$(ovn-nbctl lsp-get-up sw0-p2) = xup]) ++OVS_WAIT_UNTIL([test x$(ovn-nbctl lsp-get-up sw0-p3) = xup]) ++OVS_WAIT_UNTIL([test x$(ovn-nbctl lsp-get-up sw0-p4) = xup]) ++ ++ovn-nbctl pg-add pg0 sw0-p1 sw0-p2 ++ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && tcp.dst >= 80 && tcp.dst <= 82" allow ++ovn-nbctl --wait=hv sync ++ ++OVS_WAIT_UNTIL([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id=2")]) ++ ++# Add sw0-p3 to the port group pg0. The conj_id should be 2. ++ovn-nbctl pg-set-ports pg0 sw0-p1 sw0-p2 sw0-p3 ++OVS_WAIT_UNTIL([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id=2")]) ++ ++# Add sw0p4 to the port group pg0. The conj_id should be 2. ++ovn-nbctl pg-set-ports pg0 sw0-p1 sw0-p2 sw0-p3 sw0-p4 ++OVS_WAIT_UNTIL([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id=2")]) ++ ++# Add another ACL with conjunction. ++ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && udp.dst >= 80 && udp.dst <= 82" allow ++OVS_WAIT_UNTIL([test 2 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep tcp | grep -c "conj_id=2")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep udp | grep -c "conj_id=3")]) ++ ++# Delete tcp ACL. ++ovn-nbctl acl-del pg0 to-lport 1002 "outport == @pg0 && ip4 && tcp.dst >= 80 && tcp.dst <= 82" ++OVS_WAIT_UNTIL([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep udp | grep -c "conj_id=3")]) ++ ++# Add back the tcp ACL. ++ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && tcp.dst >= 80 && tcp.dst <= 82" allow ++OVS_WAIT_UNTIL([test 2 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep udp | grep -c "conj_id=3")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep tcp | grep -c "conj_id=4")]) ++ ++ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && inport == @pg0 && ip4 && tcp.dst >= 84 && tcp.dst <= 86" allow ++OVS_WAIT_UNTIL([test 3 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep udp | grep -c "conj_id=3")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep tcp | grep -c "conj_id=4")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep tcp | grep -c "conj_id=5")]) ++ ++ovn-nbctl clear port_group pg0 acls ++OVS_WAIT_UNTIL([test 0 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id")]) ++ ++ovn-nbctl --wait=hv acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && tcp.dst >= 80 && tcp.dst <= 82" allow ++ovn-nbctl --wait=hv acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && udp.dst >= 80 && udp.dst <= 82" allow ++OVS_WAIT_UNTIL([test 2 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep tcp | grep -c "conj_id=6")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep udp | grep -c "conj_id=7")]) ++ ++# Flush the lflow cache. ++as hv1 ovn-appctl -t ovn-controller flush-lflow-cache ++OVS_WAIT_UNTIL([test 2 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id=2")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id=3")]) ++ ++# Disable lflow caching. ++ ++as hv1 ovs-vsctl set open . external_ids:ovn-enable-lflow-cache=false ++ ++# Wait until ovn-enble-lflow-cache is processed by ovn-controller. ++OVS_WAIT_UNTIL([ ++ test $(ovn-sbctl get chassis hv1 other_config:ovn-enable-lflow-cache) = '"false"' ++]) ++ ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id=2")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id=3")]) ++ ++# Remove port sw0-p4 from port group. ++ovn-nbctl pg-set-ports pg0 sw0-p1 sw0-p2 sw0-p3 ++OVS_WAIT_UNTIL([test 2 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id=4")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id=5")]) ++ ++as hv1 ovn-appctl -t ovn-controller recompute ++ ++OVS_WAIT_UNTIL([test 2 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id")]) ++OVS_WAIT_UNTIL([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id=2")]) ++AT_CHECK([test 1 = $(as hv1 ovs-ofctl dump-flows br-int table=44 | grep -c "conj_id=3")]) ++ ++OVN_CLEANUP([hv1]) ++ ++AT_CLEANUP ++ + AT_SETUP([ovn -- Delete Port_Binding and OVS port Incremental Processing]) + ovn_start + +-- +2.26.2 + diff --git a/SOURCES/0002-ovn-northd-Fix-memory-leak-and-incorrect-limiting-of.patch b/SOURCES/0002-ovn-northd-Fix-memory-leak-and-incorrect-limiting-of.patch deleted file mode 100644 index b82edf3..0000000 --- a/SOURCES/0002-ovn-northd-Fix-memory-leak-and-incorrect-limiting-of.patch +++ /dev/null @@ -1,47 +0,0 @@ -From ccaeae4261bc7d35d48a511f71cfc25728786b24 Mon Sep 17 00:00:00 2001 -From: Ilya Maximets -Date: Tue, 5 May 2020 13:08:50 +0200 -Subject: [PATCH 2/4] ovn-northd: Fix memory leak and incorrect limiting of - ECMP routes. - -If route count reaches UINT16_MAX, ecmp_groups_add_route() will leak the -allocated route structure. Also, since group->route_count incremented -unconditionally, next attempt to add new route will succeed, because the -value of 'route_count' is zero now and out of sync with the real number -of routes. - -Fixes: 4e53974bdc4e ("ovn-northd: Support ECMP routes.") -Signed-off-by: Ilya Maximets -Acked-by: Mark Michelson -Signed-off-by: Han Zhou ---- - northd/ovn-northd.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index ec77ae1a8..dc647d7c5 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -7188,15 +7188,15 @@ static void - ecmp_groups_add_route(struct ecmp_groups_node *group, - const struct parsed_route *route) - { -- struct ecmp_route_list_node *er = xmalloc(sizeof *er); -- er->route = route; -- er->id = ++group->route_count; -- if (er->id == 0) { -+ if (group->route_count == UINT16_MAX) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); - VLOG_WARN_RL(&rl, "too many routes in a single ecmp group."); - return; - } - -+ struct ecmp_route_list_node *er = xmalloc(sizeof *er); -+ er->route = route; -+ er->id = ++group->route_count; - ovs_list_insert(&group->route_list, &er->list_node); - } - --- -2.26.2 - diff --git a/SOURCES/0002-ovn-northd-Fix-memory-leak-in-case-of-duplicate-logi.patch b/SOURCES/0002-ovn-northd-Fix-memory-leak-in-case-of-duplicate-logi.patch deleted file mode 100644 index 3c0eb4c..0000000 --- a/SOURCES/0002-ovn-northd-Fix-memory-leak-in-case-of-duplicate-logi.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 2fdce8ec0e631b759bb03a35457b17060605d887 Mon Sep 17 00:00:00 2001 -From: Ilya Maximets -Date: Tue, 12 May 2020 12:46:18 +0200 -Subject: [PATCH 2/2] ovn-northd: Fix memory leak in case of duplicate logical - router port. - -'lrp_networks' must be destroyed on error path. - -Fixes: 8e83e561879a ("ovn: Support multiple addresses on a single logical router port.") -Signed-off-by: Ilya Maximets -Signed-off-by: Numan Siddique ---- - northd/ovn-northd.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index c1cdb2280..a41e3d46e 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -2125,6 +2125,7 @@ join_logical_ports(struct northd_context *ctx, - = VLOG_RATE_LIMIT_INIT(5, 1); - VLOG_WARN_RL(&rl, "duplicate logical router port %s", - nbrp->name); -+ destroy_lport_addresses(&lrp_networks); - continue; - } - ovn_port_set_nb(op, NULL, nbrp); --- -2.26.2 - diff --git a/SOURCES/0002-ovn-northd-Fix-tunnel_key-allocation-for-SB-Port_Bin.patch b/SOURCES/0002-ovn-northd-Fix-tunnel_key-allocation-for-SB-Port_Bin.patch deleted file mode 100644 index 0da2b8f..0000000 --- a/SOURCES/0002-ovn-northd-Fix-tunnel_key-allocation-for-SB-Port_Bin.patch +++ /dev/null @@ -1,146 +0,0 @@ -From 0cda584a6ee05830c0611650fe22c642cb39b09f Mon Sep 17 00:00:00 2001 -From: Dumitru Ceara -Date: Thu, 30 Apr 2020 20:32:35 +0200 -Subject: [PATCH 2/2] ovn-northd: Fix tunnel_key allocation for SB - Port_Bindings. - -When generating Port_Binding records ovn-northd tries to reuse the -tunnel_key value from the original SB record, if any available. - -However, there's no check for tunnel_keys that would conflict with -newly allocated keys for new records. In order to avoid that, we -don't reuse stale Port_Binding entries, i.e., their "datapath" field -doesn't match the Datapath_Binding record associated with the -logical switch/router they're part of. - -One way to reproduce the issue is: -$ ovn-nbctl ls-add ls1 -$ ovn-nbctl ls-add ls2 -$ ovn-nbctl lsp-add ls1 lsp1 -$ ovn-nbctl lsp-add ls2 lsp2 -$ ovn-nbctl --wait=sb sync -$ ovn-nbctl lsp-del lsp2 -- lsp-add ls1 lsp2 - -Another option to reproduce the issue is with HA_Chassis_Group: -$ ovn-nbctl ls-add ls1 -$ ovn-nbctl ls-add ls2 -$ ovn-nbctl lsp-add ls1 lsp1 -$ ovn-nbctl lsp-add ls2 lsp2 -$ ovn-nbctl lsp-set-type lsp2 external -$ ovn-nbctl ha-chassis-group-add chg1 -$ ovn-nbctl ha-chassis-group-add-chassis chg1 chassis-1 30 -$ chg1_uuid=$(ovn-nbctl --bare --columns _uuid list ha_Chassis_Group .) -$ ovn-nbctl set logical_switch_port lsp2 ha_chassis_group=${chg1_uuid} -$ ovn-nbctl lsp-del lsp2 -- lsp-add ls1 lsp2 - -Reported-by: Dan Williams -Reported-at: https://bugzilla.redhat.com/1828637 -Signed-off-by: Dumitru Ceara -Acked-by: Numan Siddique -Signed-off-by: Han Zhou -(cherry picked from upstream commit 8bf9075968ac8b26f1d4d32697f4b117a61a2c49) - -Change-Id: I03146f0778ff50375c3f843c0e650e76008bba94 ---- - northd/ovn-northd.c | 6 +++--- - tests/ovn-northd.at | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 59 insertions(+), 3 deletions(-) - -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index 5e649d0..bc1ea0b 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -2025,7 +2025,7 @@ join_logical_ports(struct northd_context *ctx, - const struct nbrec_logical_switch_port *nbsp - = od->nbs->ports[i]; - struct ovn_port *op = ovn_port_find(ports, nbsp->name); -- if (op) { -+ if (op && op->sb->datapath == od->sb) { - if (op->nbsp || op->nbrp) { - static struct vlog_rate_limit rl - = VLOG_RATE_LIMIT_INIT(5, 1); -@@ -2119,7 +2119,7 @@ join_logical_ports(struct northd_context *ctx, - } - - struct ovn_port *op = ovn_port_find(ports, nbrp->name); -- if (op) { -+ if (op && op->sb->datapath == od->sb) { - if (op->nbsp || op->nbrp) { - static struct vlog_rate_limit rl - = VLOG_RATE_LIMIT_INIT(5, 1); -@@ -2171,7 +2171,7 @@ join_logical_ports(struct northd_context *ctx, - char *redirect_name = - ovn_chassis_redirect_name(nbrp->name); - struct ovn_port *crp = ovn_port_find(ports, redirect_name); -- if (crp) { -+ if (crp && crp->sb->datapath == od->sb) { - crp->derived = true; - ovn_port_set_nb(crp, NULL, nbrp); - ovs_list_remove(&crp->list); -diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at -index 94f892b..569390c 100644 ---- a/tests/ovn-northd.at -+++ b/tests/ovn-northd.at -@@ -1350,3 +1350,59 @@ lr_uuid=$(ovn-nbctl --columns _uuid list Logical_Router .) - AT_CHECK[test ${nb_uuid} = ${lr_uuid}] - - AT_CLEANUP -+ -+AT_SETUP([ovn -- check reconcile stale tunnel keys]) -+ovn_start -+ -+ovn-nbctl ls-add ls1 -+ovn-nbctl ls-add ls2 -+ovn-nbctl lsp-add ls1 lsp1 -+ovn-nbctl lsp-add ls2 lsp2 -+AT_CHECK([ovn-nbctl --wait=sb sync], [0]) -+ -+# Ports are bound on different datapaths so it's expected that they both -+# get tunnel_key == 1. -+AT_CHECK([test 1 = $(ovn-sbctl --bare --columns tunnel_key find \ -+port_binding logical_port=lsp1)]) -+AT_CHECK([test 1 = $(ovn-sbctl --bare --columns tunnel_key find \ -+port_binding logical_port=lsp2)]) -+ -+ovn-nbctl lsp-del lsp2 -- lsp-add ls1 lsp2 -+AT_CHECK([ovn-nbctl --wait=sb sync], [0]) -+ -+AT_CHECK([test 1 = $(ovn-sbctl --bare --columns tunnel_key find \ -+port_binding logical_port=lsp1)]) -+AT_CHECK([test 2 = $(ovn-sbctl --bare --columns tunnel_key find \ -+port_binding logical_port=lsp2)]) -+ -+# ovn-northd should allocate a new tunnel_key for lsp1 or lsp2 to maintain -+# unique DB indices. -+AT_CHECK([test ${pb1_key} != ${pb2_key}]) -+ -+AT_CLEANUP -+ -+AT_SETUP([ovn -- check reconcile stale Ha_Chassis_Group]) -+ovn_start -+ -+ovn-nbctl ls-add ls1 -+ovn-nbctl ls-add ls2 -+ovn-nbctl lsp-add ls1 lsp1 -+ovn-nbctl lsp-add ls2 lsp2 -+ -+ovn-nbctl lsp-set-type lsp2 external -+ -+ovn-nbctl ha-chassis-group-add chg1 -+ovn-nbctl ha-chassis-group-add-chassis chg1 chassis-1 30 -+ -+chg1_uuid=$(ovn-nbctl --bare --columns _uuid list Ha_Chassis_Group .) -+ovn-nbctl set logical_switch_port lsp2 ha_chassis_group=${chg1_uuid} -+AT_CHECK([ovn-nbctl --wait=sb sync], [0]) -+ -+# Move lsp2 from ls2 to ls1. This should also remove the SB HA_Chassis_Group -+# record. -+ovn-nbctl lsp-del lsp2 -- lsp-add ls1 lsp2 -+AT_CHECK([ovn-nbctl --wait=sb sync], [0]) -+ -+AT_CHECK([test 0 = $(ovn-sbctl list Ha_Chassis_Group | wc -l)]) -+ -+AT_CLEANUP --- -1.8.3.1 - diff --git a/SOURCES/0002-ovn-northd-Skip-unsnat-flows-for-load-balancer-vips-.patch b/SOURCES/0002-ovn-northd-Skip-unsnat-flows-for-load-balancer-vips-.patch deleted file mode 100644 index c64e616..0000000 --- a/SOURCES/0002-ovn-northd-Skip-unsnat-flows-for-load-balancer-vips-.patch +++ /dev/null @@ -1,340 +0,0 @@ -From f075920452dbcaab7c185efd4f63a02bd6e384ce Mon Sep 17 00:00:00 2001 -From: Numan Siddique -Date: Thu, 26 Mar 2020 20:19:12 +0530 -Subject: [PATCH 2/2] ovn-northd: Skip unsnat flows for load balancer vips in - router ingress pipeline - -Suppose there is below NAT entry with external_ip = 172.168.0.100 - -nat - external ip: "172.168.0.100" - logical ip: "10.0.0.0/24" - type: "snat" - -And a load balancer with the VIP - 172.168.0.100 - -_uuid : -external_ids : {} -name : lb1 -protocol : tcp -vips : {"172.168.0.100:8080"="10.0.0.4:8080"} - -And if these are associated to a gateway logical router - -Then we will see the below lflows in the router pipeline - -... -table=5 (lr_in_unsnat ), priority=90 , match=(ip && ip4.dst == 172.168.0.100), action=(ct_snat;) -... -table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 172.168.0.100 && tcp && tcp.dst == 8080), action=(ct_lb(10.0.0.4:8080);) -table=6 (lr_in_dnat ), priority=120 , match=(ct.est && ip && ip4.dst == 172.168.0.100 && tcp && tcp.dst == 8080), action=(ct_dnat;) - -When a new connection packet destinated for the lb vip 172.168.0.100 and tcp.dst = 8080 -is received, the ct.new flow in the lr_in_dnat is hit and the packet's ip4.dst is -dnatted to 10.0.0.4 in the dnat conntrack zone. - -But for the subsequent packet destined to the vip, the ct.est lflow in the lr_in_dnat -stage doesn't get hit. In this case, the packet first hits the lr_in_unsnat pri 90 flow -as mentioned above with the action ct_snat. Even though ct_snat should have no effect, -looks like it is resetting the ct flags. - -In the case of tcp, the ct.new flow is hit instead of ct.est. In the the case of sctp, neither of the above -lflows in lr_in_dnat stage hit. - -This needs to be investigated further. But we can avoid this scenario in OVN -by adding the below lflow. - -table=5 (lr_in_unsnat ), priority=120 , match=(ip4 && ip4.dst == 172.168.0.100 && tcp.dst == 8080), action=(next;) - -This patch adds the above lflow if the lb vip also has an entry in the NAT table. - -This patch is also required to support sctp load balancers in OVN. - -Reported-by: Tim Rozet -Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1815217 -Signed-off-by: Numan Siddique -Acked-by: Mark Michelson -(cherry picked from upstream OVS branch20.03 commit 3cf40e8b35c5d6a4e593b42b96c284a8742235d5) - -Change-Id: Ibf609537189e80a9e69e6c968b8e4041ecc9cc40 ---- - northd/ovn-northd.8.xml | 27 +++++++++++++++++++ - northd/ovn-northd.c | 59 +++++++++++++++++++++++++++++++---------- - tests/ovn-northd.at | 38 ++++++++++++++++++++++++++ - tests/system-ovn.at | 51 ++++++++++++++++++++++++++++++++++- - 4 files changed, 160 insertions(+), 15 deletions(-) - -diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml -index 1e0993e07..b5e4d6d84 100644 ---- a/northd/ovn-northd.8.xml -+++ b/northd/ovn-northd.8.xml -@@ -2075,6 +2075,33 @@ icmp6 { - unSNATted here. -

            - -+

            Ingress Table 5: UNSNAT on Gateway and Distributed Routers

            -+
              -+
            • -+

              -+ If the Router (Gateway or Distributed) is configured with -+ load balancers, then below lflows are added: -+

              -+ -+

              -+ For each IPv4 address A defined as load balancer -+ VIP with the protocol P (and the protocol port -+ T if defined) is also present as an -+ external_ip in the NAT table, -+ a priority-120 logical flow is added with the match -+ ip4 && ip4.dst == A && -+ P with the action next; to -+ advance the packet to the next table. If the load balancer -+ has protocol port B defined, then the match also has -+ P.dst == B. -+

              -+ -+

              -+ The above flows are also added for IPv6 load balancers. -+

              -+
            • -+
            -+ -

            Ingress Table 5: UNSNAT on Gateway Routers

            - -
              -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index cdaeff401..75c19df62 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -7570,7 +7570,7 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, - struct ds *match, struct ds *actions, int priority, - const char *lb_force_snat_ip, struct lb_vip *lb_vip, - bool is_udp, struct nbrec_load_balancer *lb, -- struct shash *meter_groups) -+ struct shash *meter_groups, struct sset *nat_entries) - { - build_empty_lb_event_flow(od, lflows, lb_vip, lb, S_ROUTER_IN_DNAT, - meter_groups); -@@ -7603,6 +7603,40 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, - free(new_match); - free(est_match); - -+ const char *ip_match = NULL; -+ if (lb_vip->addr_family == AF_INET) { -+ ip_match = "ip4"; -+ } else { -+ ip_match = "ip6"; -+ } -+ -+ if (sset_contains(nat_entries, lb_vip->vip)) { -+ /* The load balancer vip is also present in the NAT entries. -+ * So add a high priority lflow to advance the the packet -+ * destined to the vip (and the vip port if defined) -+ * in the S_ROUTER_IN_UNSNAT stage. -+ * There seems to be an issue with ovs-vswitchd. When the new -+ * connection packet destined for the lb vip is received, -+ * it is dnat'ed in the S_ROUTER_IN_DNAT stage in the dnat -+ * conntrack zone. For the next packet, if it goes through -+ * unsnat stage, the conntrack flags are not set properly, and -+ * it doesn't hit the established state flows in -+ * S_ROUTER_IN_DNAT stage. */ -+ struct ds unsnat_match = DS_EMPTY_INITIALIZER; -+ ds_put_format(&unsnat_match, "%s && %s.dst == %s && %s", -+ ip_match, ip_match, lb_vip->vip, -+ is_udp ? "udp" : "tcp"); -+ if (lb_vip->vip_port) { -+ ds_put_format(&unsnat_match, " && %s.dst == %d", -+ is_udp ? "udp" : "tcp", lb_vip->vip_port); -+ } -+ -+ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, 120, -+ ds_cstr(&unsnat_match), "next;", &lb->header_); -+ -+ ds_destroy(&unsnat_match); -+ } -+ - if (!od->l3dgw_port || !od->l3redirect_port || !lb_vip->n_backends) { - return; - } -@@ -7612,19 +7646,11 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, - * router has a gateway router port associated. - */ - struct ds undnat_match = DS_EMPTY_INITIALIZER; -- if (lb_vip->addr_family == AF_INET) { -- ds_put_cstr(&undnat_match, "ip4 && ("); -- } else { -- ds_put_cstr(&undnat_match, "ip6 && ("); -- } -+ ds_put_format(&undnat_match, "%s && (", ip_match); - - for (size_t i = 0; i < lb_vip->n_backends; i++) { - struct lb_vip_backend *backend = &lb_vip->backends[i]; -- if (backend->addr_family == AF_INET) { -- ds_put_format(&undnat_match, "(ip4.src == %s", backend->ip); -- } else { -- ds_put_format(&undnat_match, "(ip6.src == %s", backend->ip); -- } -+ ds_put_format(&undnat_match, "(%s.src == %s", ip_match, backend->ip); - - if (backend->port) { - ds_put_format(&undnat_match, " && %s.src == %d) || ", -@@ -8875,6 +8901,11 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - &nat->header_); - sset_add(&nat_entries, nat->external_ip); - } -+ } else { -+ /* Add the NAT external_ip to the nat_entries even for -+ * gateway routers. This is required for adding load balancer -+ * flows.*/ -+ sset_add(&nat_entries, nat->external_ip); - } - - /* Egress UNDNAT table: It is for already established connections' -@@ -9055,8 +9086,6 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - } - } - -- sset_destroy(&nat_entries); -- - /* Handle force SNAT options set in the gateway router. */ - if (dnat_force_snat_ip && !od->l3dgw_port) { - /* If a packet with destination IP address as that of the -@@ -9117,6 +9146,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - /* Load balancing and packet defrag are only valid on - * Gateway routers or router with gateway port. */ - if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { -+ sset_destroy(&nat_entries); - continue; - } - -@@ -9191,10 +9221,11 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - } - add_router_lb_flow(lflows, od, &match, &actions, prio, - lb_force_snat_ip, lb_vip, is_udp, -- nb_lb, meter_groups); -+ nb_lb, meter_groups, &nat_entries); - } - } - sset_destroy(&all_ips); -+ sset_destroy(&nat_entries); - } - - /* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: IPv6 Router -diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at -index a2989e78e..d127152f5 100644 ---- a/tests/ovn-northd.at -+++ b/tests/ovn-northd.at -@@ -1288,3 +1288,41 @@ ovn-nbctl --wait=sb lb-del lb2 - OVS_WAIT_UNTIL([test 0 = `ovn-sbctl list service_monitor | wc -l`]) - - AT_CLEANUP -+ -+AT_SETUP([ovn -- Load balancer VIP in NAT entries]) -+AT_SKIP_IF([test $HAVE_PYTHON = no]) -+ovn_start -+ -+ovn-nbctl lr-add lr0 -+ovn-nbctl lrp-add lr0 lr0-public 00:00:01:01:02:04 192.168.2.1/24 -+ovn-nbctl lrp-add lr0 lr0-join 00:00:01:01:02:04 10.10.0.1/24 -+ -+ovn-nbctl set logical_router lr0 options:chassis=ch1 -+ -+ovn-nbctl lb-add lb1 "192.168.2.1:8080" "10.0.0.4:8080" -+ovn-nbctl lb-add lb2 "192.168.2.4:8080" "10.0.0.5:8080" udp -+ovn-nbctl lb-add lb3 "192.168.2.5:8080" "10.0.0.6:8080" -+ovn-nbctl lb-add lb4 "192.168.2.6:8080" "10.0.0.7:8080" -+ -+ovn-nbctl lr-lb-add lr0 lb1 -+ovn-nbctl lr-lb-add lr0 lb2 -+ovn-nbctl lr-lb-add lr0 lb3 -+ovn-nbctl lr-lb-add lr0 lb4 -+ -+ovn-nbctl lr-nat-add lr0 snat 192.168.2.1 10.0.0.0/24 -+ovn-nbctl lr-nat-add lr0 dnat_and_snat 192.168.2.4 10.0.0.4 -+ovn-nbctl lr-nat-add lr0 dnat 192.168.2.5 10.0.0.5 -+ -+OVS_WAIT_UNTIL([test 1 = $(ovn-sbctl dump-flows lr0 | grep lr_in_unsnat | \ -+grep "ip4 && ip4.dst == 192.168.2.1 && tcp && tcp.dst == 8080" -c) ]) -+ -+AT_CHECK([test 1 = $(ovn-sbctl dump-flows lr0 | grep lr_in_unsnat | \ -+grep "ip4 && ip4.dst == 192.168.2.4 && udp && udp.dst == 8080" -c) ]) -+ -+AT_CHECK([test 1 = $(ovn-sbctl dump-flows lr0 | grep lr_in_unsnat | \ -+grep "ip4 && ip4.dst == 192.168.2.5 && tcp && tcp.dst == 8080" -c) ]) -+ -+AT_CHECK([test 0 = $(ovn-sbctl dump-flows lr0 | grep lr_in_unsnat | \ -+grep "ip4 && ip4.dst == 192.168.2.6 && tcp && tcp.dst == 8080" -c) ]) -+ -+AT_CLEANUP -diff --git a/tests/system-ovn.at b/tests/system-ovn.at -index 3b3379840..f1ae69b20 100644 ---- a/tests/system-ovn.at -+++ b/tests/system-ovn.at -@@ -1635,7 +1635,6 @@ ovn-nbctl set load_balancer $uuid vips:'"30.0.0.2:8000"'='"192.168.1.2:80,192.16 - ovn-nbctl -- --id=@nat create nat type="snat" logical_ip=192.168.2.2 \ - external_ip=30.0.0.2 -- add logical_router R2 nat @nat - -- - # Wait for ovn-controller to catch up. - ovn-nbctl --wait=hv sync - OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ -@@ -1671,6 +1670,56 @@ tcp,orig=(src=172.16.1.2,dst=30.0.0.2,sport=,dport=),reply=(sr - tcp,orig=(src=172.16.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.2,sport=,dport=),zone=,protoinfo=(state=) - ]) - -+check_est_flows () { -+ n=$(ovs-ofctl dump-flows br-int table=14 | grep \ -+"priority=120,ct_state=+est+trk,tcp,metadata=0x2,nw_dst=30.0.0.2,tp_dst=8000" \ -+| grep nat | sed -n 's/.*n_packets=\([[0-9]]\{1,\}\).*/\1/p') -+ -+ echo "n_packets=$n" -+ test "$n" != 0 -+} -+ -+OVS_WAIT_UNTIL([check_est_flows], [check established flows]) -+ -+ -+ovn-nbctl set logical_router R2 options:lb_force_snat_ip="20.0.0.2" -+ -+# Destroy the load balancer and create again. ovn-controller will -+# clear the OF flows and re add again and clears the n_packets -+# for these flows. -+ovn-nbctl destroy load_balancer $uuid -+uuid=`ovn-nbctl create load_balancer vips:30.0.0.1="192.168.1.2,192.168.2.2"` -+ovn-nbctl set logical_router R2 load_balancer=$uuid -+ -+# Config OVN load-balancer with another VIP (this time with ports). -+ovn-nbctl set load_balancer $uuid vips:'"30.0.0.2:8000"'='"192.168.1.2:80,192.168.2.2:80"' -+ -+ovn-nbctl list load_balancer -+ovn-sbctl dump-flows R2 -+OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-flows br-int table=41 | \ -+grep 'nat(src=20.0.0.2)']) -+ -+dnl Test load-balancing that includes L4 ports in NAT. -+for i in `seq 1 20`; do -+ echo Request $i -+ NS_CHECK_EXEC([alice1], [wget 30.0.0.2:8000 -t 5 -T 1 --retry-connrefused -v -o wget$i.log]) -+done -+ -+dnl Each server should have at least one connection. -+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.2) | -+sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl -+tcp,orig=(src=172.16.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.1.2,sport=,dport=),zone=,protoinfo=(state=) -+tcp,orig=(src=172.16.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.2,sport=,dport=),zone=,protoinfo=(state=) -+]) -+ -+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(20.0.0.2) | -+sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl -+tcp,orig=(src=172.16.1.2,dst=192.168.1.2,sport=,dport=),reply=(src=192.168.1.2,dst=20.0.0.2,sport=,dport=),zone=,protoinfo=(state=) -+tcp,orig=(src=172.16.1.2,dst=192.168.2.2,sport=,dport=),reply=(src=192.168.2.2,dst=20.0.0.2,sport=,dport=),zone=,protoinfo=(state=) -+]) -+ -+OVS_WAIT_UNTIL([check_est_flows], [check established flows]) -+ - OVS_APP_EXIT_AND_WAIT([ovn-controller]) - - as ovn-sb --- -2.25.1 - diff --git a/SOURCES/0003-IPv6-PD-Disable-pd-processing-if-the-router-port-is-.patch b/SOURCES/0003-IPv6-PD-Disable-pd-processing-if-the-router-port-is-.patch deleted file mode 100644 index 57864aa..0000000 --- a/SOURCES/0003-IPv6-PD-Disable-pd-processing-if-the-router-port-is-.patch +++ /dev/null @@ -1,37 +0,0 @@ -From fe499ffd96efda2ffd3f7f066faebe6ec41a83f4 Mon Sep 17 00:00:00 2001 -Message-Id: -In-Reply-To: <0b9d16670d5561d8300d2448cbd4686a3acdc57e.1588608928.git.lorenzo.bianconi@redhat.com> -References: <0b9d16670d5561d8300d2448cbd4686a3acdc57e.1588608928.git.lorenzo.bianconi@redhat.com> -From: Lorenzo Bianconi -Date: Wed, 29 Apr 2020 18:05:31 +0200 -Subject: [PATCH 3/3] IPv6 PD: Disable pd processing if the router port is - disabled. - -Tested-by: Jianlin Shi -Signed-off-by: Lorenzo Bianconi -Signed-off-by: Numan Siddique ---- - northd/ovn-northd.c | 6 ++++++ - 1 file changed, 6 insertions(+) - ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -9373,12 +9373,18 @@ build_lrouter_flows(struct hmap *datapat - /* enable IPv6 prefix delegation */ - bool prefix_delegation = smap_get_bool(&op->nbrp->options, - "prefix_delegation", false); -+ if (!lrport_is_enabled(op->nbrp)) { -+ prefix_delegation = false; -+ } - smap_add(&options, "ipv6_prefix_delegation", - prefix_delegation ? "true" : "false"); - sbrec_port_binding_set_options(op->sb, &options); - - bool ipv6_prefix = smap_get_bool(&op->nbrp->options, - "prefix", false); -+ if (!lrport_is_enabled(op->nbrp)) { -+ ipv6_prefix = false; -+ } - smap_add(&options, "ipv6_prefix", - ipv6_prefix ? "true" : "false"); - sbrec_port_binding_set_options(op->sb, &options); diff --git a/SOURCES/0003-Support-packet-metadata-marking-for-logical-router-p.patch b/SOURCES/0003-Support-packet-metadata-marking-for-logical-router-p.patch new file mode 100644 index 0000000..8cfd40b --- /dev/null +++ b/SOURCES/0003-Support-packet-metadata-marking-for-logical-router-p.patch @@ -0,0 +1,455 @@ +From 74b88cd58dcfdb5e80fa2d64a909170b5a24f76a Mon Sep 17 00:00:00 2001 +From: Numan Siddique +Date: Wed, 17 Jun 2020 23:39:29 +0530 +Subject: [PATCH 03/22] Support packet metadata marking for logical router + policies. + +This patch adds a new column 'options' of type smap in the +Logical_Router_Policy table in the NB DB and supports the key 'pkt_mark'. +CMS can set a desired value for this key in the 'options' column. When this +router policy is applied, the packet metadata is marked with the specified +value (to the NXM_NX_PKT_MARK OVS field). + +In the case of Linux, this corresponds to struct sk_buff's "skb_mark" +member and this mark can be seen by the linux networking subsystem. +CMS can inspect this value (as an iptables rule or adding an OF flow +in another ovs bridge) and take appropriate action when the marked packet +leaves the integration bridge via the patch port. + +Change-Id: I05efa5e1d77bb02bf59d28b174151ba33779b8c3 +Requested-at: https://bugzilla.redhat.com/show_bug.cgi?id=1828933 +Requested-by: Alexander Constantinescu +Acked-by: Dumitru Ceara +Signed-off-by: Numan Siddique + +(cherry-picked from upstream master commit a123ef0fb8fd03cc586f924cb511fb24a661743a) +--- + NEWS | 2 + + lib/logical-fields.c | 2 + + northd/ovn-northd.c | 8 ++ + ovn-nb.ovsschema | 7 +- + ovn-nb.xml | 9 ++ + ovn-sb.xml | 1 + + tests/ovn.at | 291 +++++++++++++++++++++++++++++++++++++++++++ + 7 files changed, 318 insertions(+), 2 deletions(-) + +diff --git a/NEWS b/NEWS +index 30b24e5ad..8abdc95b9 100644 +--- a/NEWS ++++ b/NEWS +@@ -7,6 +7,8 @@ OVN v20.06.2 - 21 Aug 2020 + + OVN v20.06.1 - 08 Jul 2020 + -------------------------- ++ - Added packet marking support for traffic routed with ++ a routing policy. + + OVN v20.06.0 - 08 Jun 2020 + -------------------------- +diff --git a/lib/logical-fields.c b/lib/logical-fields.c +index a007085b3..8ad56aa53 100644 +--- a/lib/logical-fields.c ++++ b/lib/logical-fields.c +@@ -254,6 +254,8 @@ ovn_init_symtab(struct shash *symtab) + expr_symtab_add_field(symtab, "sctp.src", MFF_SCTP_SRC, "sctp", false); + expr_symtab_add_field(symtab, "sctp.dst", MFF_SCTP_DST, "sctp", false); + ++ expr_symtab_add_field(symtab, "pkt.mark", MFF_PKT_MARK, NULL, false); ++ + expr_symtab_add_ovn_field(symtab, "icmp4.frag_mtu", OVN_ICMP4_FRAG_MTU); + } + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index a665d52e9..2b1257114 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -7386,6 +7386,10 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od, + rule->priority, rule->nexthop); + return; + } ++ uint32_t pkt_mark = smap_get_int(&rule->options, "pkt_mark", 0); ++ if (pkt_mark) { ++ ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark); ++ } + bool is_ipv4 = strchr(rule->nexthop, '.') ? true : false; + ds_put_format(&actions, "%s = %s; " + "%s = %s; " +@@ -7403,6 +7407,10 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od, + } else if (!strcmp(rule->action, "drop")) { + ds_put_cstr(&actions, "drop;"); + } else if (!strcmp(rule->action, "allow")) { ++ uint32_t pkt_mark = smap_get_int(&rule->options, "pkt_mark", 0); ++ if (pkt_mark) { ++ ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark); ++ } + ds_put_cstr(&actions, "next;"); + } + ds_put_format(&match, "%s", rule->match); +diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema +index a06972aa0..da9af7157 100644 +--- a/ovn-nb.ovsschema ++++ b/ovn-nb.ovsschema +@@ -1,7 +1,7 @@ + { + "name": "OVN_Northbound", +- "version": "5.23.0", +- "cksum": "111023208 25806", ++ "version": "5.24.0", ++ "cksum": "1092394564 25961", + "tables": { + "NB_Global": { + "columns": { +@@ -379,6 +379,9 @@ + "key": {"type": "string", + "enum": ["set", ["allow", "drop", "reroute"]]}}}, + "nexthop": {"type": {"key": "string", "min": 0, "max": 1}}, ++ "options": { ++ "type": {"key": "string", "value": "string", ++ "min": 0, "max": "unlimited"}}, + "external_ids": { + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}}, +diff --git a/ovn-nb.xml b/ovn-nb.xml +index 0fdc1592b..02161372a 100644 +--- a/ovn-nb.xml ++++ b/ovn-nb.xml +@@ -2581,6 +2581,15 @@ +

              + + ++ ++

              ++ Marks the packet with the value specified when the router policy ++ is applied. CMS can inspect this packet marker and take some decisions ++ if desired. This value is not preserved when the packet goes out on the ++ wire. ++

              ++
              ++ + + + See External IDs at the beginning of this document. +diff --git a/ovn-sb.xml b/ovn-sb.xml +index 293b0920c..709cb4c48 100644 +--- a/ovn-sb.xml ++++ b/ovn-sb.xml +@@ -975,6 +975,7 @@ +
            • xxreg0 xxreg1
            • +
            • inport outport
            • +
            • flags.loopback
            • ++
            • pkt.mark
            • +
            • eth.src eth.dst eth.type
            • +
            • vlan.tci vlan.vid vlan.pcp vlan.present
            • +
            • ip.proto ip.dscp ip.ecn ip.ttl ip.frag
            • +diff --git a/tests/ovn.at b/tests/ovn.at +index be677b663..8ce45823f 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -965,6 +965,23 @@ ip.ttl--; + ip.ttl + Syntax error at end of input expecting `--'. + ++# Packet mark. ++pkt.mark=1; ++ formats as pkt.mark = 1; ++ encodes as set_field:0x1->pkt_mark ++ ++pkt.mark = 1000; ++ encodes as set_field:0x3e8->pkt_mark ++ ++pkt.mark; ++ Syntax error at `pkt.mark' expecting action. ++ ++pkt.mark = foo; ++ Syntax error at `foo' expecting field name. ++ ++pkt.mark = "foo"; ++ Integer field pkt.mark is not compatible with string constant. ++ + # load balancing. + ct_lb; + encodes as ct(table=19,zone=NXM_NX_REG13[0..15],nat) +@@ -20199,6 +20216,280 @@ AT_CHECK([ovn-nbctl --if-exists lr-nat-del r1 snat beef:0000::0/ffff:ffff:ffff:f + AT_CHECK([ovn-nbctl --if-exists lr-nat-del r1 dnat aef0:0000:00::1]) + AT_CLEANUP + ++AT_SETUP([ovn -- Logical router policy packet marking]) ++ovn_start ++ ++net_add n1 ++sim_add hv1 ++as hv1 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.1 ++ovs-vsctl -- add-port br-int hv1-vif1 -- \ ++ set interface hv1-vif1 external-ids:iface-id=sw0-port1 \ ++ options:tx_pcap=hv1/vif1-tx.pcap \ ++ options:rxq_pcap=hv1/vif1-rx.pcap \ ++ ofport-request=1 ++ovs-vsctl -- add-port br-int hv1-vif2 -- \ ++ set interface hv1-vif2 external-ids:iface-id=sw0-port2 \ ++ options:tx_pcap=hv1/vif2-tx.pcap \ ++ options:rxq_pcap=hv1/vif2-rx.pcap \ ++ ofport-request=2 ++ ++as hv1 ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=public:br-phys ++ ++ovn-nbctl ls-add sw0 ++ovn-nbctl lsp-add sw0 sw0-port1 ++ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 10.0.0.5" ++ovn-nbctl lsp-set-port-security sw0-port1 "50:54:00:00:00:03 10.0.0.3 10.0.0.5" ++ ++ovn-nbctl lsp-add sw0 sw0-port2 ++ovn-nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 aef0::4" ++ ++ovn-nbctl lr-add lr0 ++ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 aef0::1/64 ++ovn-nbctl lsp-add sw0 sw0-lr0 ++ovn-nbctl lsp-set-type sw0-lr0 router ++ovn-nbctl lsp-set-addresses sw0-lr0 router ++ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 ++ ++ovn-nbctl ls-add public ++ovn-nbctl lrp-add lr0 lr0-public 00:00:20:20:12:13 172.168.0.100/24 bef0::1/64 ++ovn-nbctl lsp-add public public-lr0 ++ovn-nbctl lsp-set-type public-lr0 router ++ovn-nbctl lsp-set-addresses public-lr0 router ++ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public ++ ++# localnet port ++ovn-nbctl lsp-add public ln-public ++ovn-nbctl lsp-set-type ln-public localnet ++ovn-nbctl lsp-set-addresses ln-public unknown ++ovn-nbctl lsp-set-options ln-public network_name=public ++ ++ovn-nbctl lrp-set-gateway-chassis lr0-public hv1 20 ++ovn-nbctl lr-nat-add lr0 snat 172.168.0.100 10.0.0.0/24 ++lr0_dp_uuid=$(ovn-sbctl --bare --columns _uuid list datapath_binding lr0) ++ovn-sbctl create mac_binding datapath=$lr0_dp_uuid ip=172.168.0.120 \ ++logical_port=lr0-public mac="10\:54\:00\:00\:00\:03" ++ovn-sbctl create mac_binding datapath=$lr0_dp_uuid ip=172.168.0.200 \ ++logical_port=lr0-public mac="10\:54\:00\:00\:00\:04" ++ovn-sbctl create mac_binding datapath=$lr0_dp_uuid ip="bef0\:\:4" \ ++logical_port=lr0-public mac="10\:54\:00\:00\:00\:05" ++ovn-sbctl create mac_binding datapath=$lr0_dp_uuid ip="bef0\:\:5" \ ++logical_port=lr0-public mac="10\:54\:00\:00\:00\:06" ++ovn-sbctl create mac_binding datapath=$lr0_dp_uuid ip="bef0\:\:6" \ ++logical_port=lr0-public mac="10\:54\:00\:00\:00\:07" ++ ++ovn-nbctl -- --id=@lrt create Logical_Router_Static_Route \ ++ip_prefix="\:\:/64" nexthop="bef0\:\:4" -- add Logical_Router lr0 \ ++static_routes @lrt ++ ++ovn-nbctl --wait=hv sync ++ ++# Add logical router policy and set pkt_mark on it. ++ovn-nbctl lr-policy-add lr0 2000 "ip4.src == 10.0.0.3" allow ++ovn-nbctl lr-policy-add lr0 1000 "ip4.src == 10.0.0.4" allow ++ovn-nbctl lr-policy-add lr0 900 "ip4.src == 10.0.0.5" reroute 172.168.0.200 ++ovn-nbctl lr-policy-add lr0 2001 "ip6.dst == bef0::5" reroute bef0::6 ++ovn-nbctl lr-policy-add lr0 1001 "ip6" allow ++ ++ ++pol1=$(ovn-nbctl --bare --columns _uuid find logical_router_policy priority=2000) ++pol3=$(ovn-nbctl --bare --columns _uuid find logical_router_policy priority=900) ++pol4=$(ovn-nbctl --bare --columns _uuid find logical_router_policy priority=2001) ++pol5=$(ovn-nbctl --bare --columns _uuid find logical_router_policy priority=1001) ++ ++ovn-nbctl set logical_router_policy $pol1 options:pkt_mark=100 ++ovn-nbctl set logical_router_policy $pol3 options:pkt_mark=3 ++ovn-nbctl set logical_router_policy $pol4 options:pkt_mark=4 ++ovn-nbctl set logical_router_policy $pol5 options:pkt_mark=5 ++ovn-nbctl --wait=hv sync ++ ++OVS_WAIT_UNTIL([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=19 | \ ++ grep "load:0x64->NXM_NX_PKT_MARK" -c) ++]) ++ ++OVS_WAIT_UNTIL([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=19 | \ ++ grep "load:0x3->NXM_NX_PKT_MARK" -c) ++]) ++ ++OVS_WAIT_UNTIL([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=19 | \ ++ grep "load:0x4->NXM_NX_PKT_MARK" -c) ++]) ++ ++OVS_WAIT_UNTIL([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=19 | \ ++ grep "load:0x5->NXM_NX_PKT_MARK" -c) ++]) ++ ++AT_CHECK([as hv1 ovs-ofctl del-flows br-phys]) ++AT_DATA([flows.txt], [dnl ++table=0, priority=0 actions=NORMAL ++table=0, priority=200 arp,actions=drop ++table=0, priority=100, pkt_mark=0x64 actions=drop ++table=0, priority=100, pkt_mark=0x2 actions=drop ++table=0, priority=100, pkt_mark=0x3 actions=drop ++table=0, priority=100, pkt_mark=0x4 actions=drop ++table=0, priority=100, pkt_mark=0x5 actions=drop ++]) ++ ++AT_CHECK([as hv1 ovs-ofctl --protocols=OpenFlow13 add-flows br-phys flows.txt]) ++ ++ip_to_hex() { ++ printf "%02x%02x%02x%02x" "$@" ++} ++ ++send_ipv4_pkt() { ++ local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 ++ local ip_src=$5 ip_dst=$6 ++ packet=${eth_dst}${eth_src}08004500001c0000000040110000${ip_src}${ip_dst}0035111100080000 ++ as $hv ovs-appctl netdev-dummy/receive ${inport} ${packet} ++} ++ ++send_icmp6_packet() { ++ local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 ipv6_src=$5 ipv6_dst=$6 ++ ++ local ip6_hdr=6000000000083aff${ipv6_src}${ipv6_dst} ++ local packet=${eth_dst}${eth_src}86dd${ip6_hdr}8000dcb662f00001 ++ ++ as $hv ovs-appctl netdev-dummy/receive ${inport} ${packet} ++} ++ ++send_ipv4_pkt hv1 hv1-vif1 505400000003 00000000ff01 \ ++ $(ip_to_hex 10 0 0 3) $(ip_to_hex 172 168 0 120) ++ ++OVS_WAIT_UNTIL([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-phys table=0 | \ ++ grep "priority=100,pkt_mark=0x64" | \ ++ grep "n_packets=1" -c) ++]) ++ ++AT_CHECK([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-phys table=0 | \ ++ grep priority=0 | \ ++ grep "n_packets=0" -c) ++]) ++ ++# Send the pkt from sw0-port2. Packet should not be marked. ++send_ipv4_pkt hv1 hv1-vif2 505400000004 00000000ff01 \ ++ $(ip_to_hex 10 0 0 4) $(ip_to_hex 172 168 0 120) ++ ++AT_CHECK([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-phys table=0 | \ ++ grep priority=0 | \ ++ grep "n_packets=1" -c) ++]) ++ ++AT_CHECK([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-phys table=0 | \ ++ grep "priority=100,pkt_mark=0x64" | \ ++ grep "n_packets=1" -c) ++]) ++ ++AT_CHECK([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-phys table=0 | \ ++ grep "priority=100,pkt_mark=0x3" | \ ++ grep "n_packets=0" -c) ++]) ++ ++AT_CHECK([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-phys table=0 | \ ++ grep "priority=100,pkt_mark=0x4" | \ ++ grep "n_packets=0" -c) ++]) ++ ++AT_CHECK([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-phys table=0 | \ ++ grep "priority=100,pkt_mark=0x5" | \ ++ grep "n_packets=0" -c) ++]) ++ ++ovn-nbctl set logical_router_policy $pol1 options:pkt_mark=2 ++send_ipv4_pkt hv1 hv1-vif1 505400000003 00000000ff01 \ ++ $(ip_to_hex 10 0 0 3) $(ip_to_hex 172 168 0 120) ++ ++OVS_WAIT_UNTIL([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=19 | \ ++ grep "load:0x2->NXM_NX_PKT_MARK" -c) ++]) ++ ++AT_CHECK([ ++ test 0 -eq $(as hv1 ovs-ofctl dump-flows br-int table=19 | \ ++ grep "load:0x64->NXM_NX_PKT_MARK" -c) ++]) ++ ++AT_CHECK([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-phys table=0 | \ ++ grep "priority=100,pkt_mark=0x2" | \ ++ grep "n_packets=1" -c) ++]) ++ ++AT_CHECK([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-phys table=0 | \ ++ grep priority=0 | \ ++ grep "n_packets=1" -c) ++]) ++ ++AT_CHECK([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-phys table=0 | \ ++ grep "priority=100,pkt_mark=0x3" | \ ++ grep "n_packets=0" -c) ++]) ++ ++# Send with src ip 10.0.0.5. The reroute policy should be hit ++# and the packet should be marked with 5. ++send_ipv4_pkt hv1 hv1-vif1 505400000003 00000000ff01 \ ++ $(ip_to_hex 10 0 0 5) $(ip_to_hex 172 168 0 120) ++ ++OVS_WAIT_UNTIL([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-phys table=0 | \ ++ grep "priority=100,pkt_mark=0x3" | \ ++ grep "n_packets=1" -c) ++]) ++ ++# Send IPv6 traffic. ++src_ip6=aef00000000000000000000000000004 ++dst_ip6=bef00000000000000000000000000004 ++ ++send_icmp6_packet hv1 hv1-vif2 505400000004 00000000ff01 ${src_ip6} ${dst_ip6} ++ ++OVS_WAIT_UNTIL([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-phys table=0 | \ ++ grep "priority=100,pkt_mark=0x5" | \ ++ grep "n_packets=1" -c) ++]) ++ ++AT_CHECK([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-phys table=0 | \ ++ grep "priority=100,pkt_mark=0x4" | \ ++ grep "n_packets=0" -c) ++]) ++ ++# Send IPv6 packet which hits the reroute policy. Packet should be marked ++# with 4. ++ ++src_ip6=aef00000000000000000000000000004 ++dst_ip6=bef00000000000000000000000000005 ++ ++send_icmp6_packet hv1 hv1-vif2 505400000004 00000000ff01 ${src_ip6} ${dst_ip6} ++ ++OVS_WAIT_UNTIL([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-phys table=0 | \ ++ grep "priority=100,pkt_mark=0x4" | \ ++ grep "n_packets=1" -c) ++]) ++ ++AT_CHECK([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-phys table=0 | \ ++ grep "priority=100,pkt_mark=0x5" | \ ++ grep "n_packets=1" -c) ++]) ++ ++OVN_CLEANUP([hv1]) ++AT_CLEANUP ++ + AT_SETUP([ovn -- Load balancer selection fields]) + AT_KEYWORDS([lb]) + ovn_start +-- +2.26.2 + diff --git a/SOURCES/0003-Support-selection-fields-in-load-balancer.patch b/SOURCES/0003-Support-selection-fields-in-load-balancer.patch deleted file mode 100644 index 97a7296..0000000 --- a/SOURCES/0003-Support-selection-fields-in-load-balancer.patch +++ /dev/null @@ -1,741 +0,0 @@ -From 2ee7fb81b5f396972ce279547456fd6d57891180 Mon Sep 17 00:00:00 2001 -From: Numan Siddique -Date: Wed, 22 Apr 2020 18:03:57 +0530 -Subject: [PATCH 3/4] Support selection fields in load balancer. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This patch add a new column 'selection_fields' in Load_Balancer -table in NB DB. CMS can define a set of packet headers to use -while selecting a backend. If this column is set, OVN will add the -flow in group table with selection method as 'hash' with the set fields. -Otherwise it will use the default 'dp_hash' selection method. - -If a load balancer is configured with the selection_fields as -selection_fields : [ip_dst, ip_src, tp_dst, tp_src] - -then with this patch, the modified ct_lb action will look like - - ct_lb(backends=IP1:P1,IP2:P1; hash_fields="ip_dst,ip_src,tp_dst,tp_src"); - -And the OF flow will look like - - group_id=2,type=select,selection_method=hash, - fields(ip_src,ip_dst,tcp_src,tcp_dst),bucket=bucket_id:0,weight:100,actions=ct(.... - -Change-Id: Iac595d50d77783fd28bcb1af7b63e9274b94f622 -Tested-by: Maciej Józefczyk -Acked-by: Maciej Józefczyk -Acked-by: Han Zhou -Acked-by: Mark Michelson -Signed-off-by: Numan Siddique ---- - NEWS | 6 ++ - include/ovn/actions.h | 1 + - lib/actions.c | 45 +++++++++-- - northd/ovn-northd.c | 30 ++++++-- - ovn-nb.ovsschema | 10 ++- - ovn-nb.xml | 27 +++++++ - tests/ovn-northd.at | 24 +++--- - tests/ovn.at | 49 +++++++----- - tests/system-ovn.at | 172 +++++++++++++++++++++++++++++++++++++++--- - 9 files changed, 311 insertions(+), 53 deletions(-) - -diff --git a/NEWS b/NEWS -index e77343c89..15c3453f8 100644 ---- a/NEWS -+++ b/NEWS -@@ -9,6 +9,12 @@ OVN v20.03.0 - 28 Feb 2020 - - Added support for ECMP routes in OVN router. - - Added IPv6 Prefix Delegation support in OVN. - - OVN now uses OpenFlow 1.5. -+ - Added support to choose selection methods - dp_hash or -+ hash (with specified hash fields) for OVN load balancer -+ backend selection. This is incompatible with older versions. -+ Care should be taken while upgrading as the existing -+ load balancer traffic will be affected if ovn-controllers -+ are not stopped before uprading northd services. - - - OVN Interconnection: - * Support for L3 interconnection of multiple OVN deployments with tunnels -diff --git a/include/ovn/actions.h b/include/ovn/actions.h -index e3dec99b2..df11a5713 100644 ---- a/include/ovn/actions.h -+++ b/include/ovn/actions.h -@@ -252,6 +252,7 @@ struct ovnact_ct_lb { - struct ovnact_ct_lb_dst *dsts; - size_t n_dsts; - uint8_t ltable; /* Logical table ID of next table. */ -+ char *hash_fields; - }; - - struct ovnact_select_dst { -diff --git a/lib/actions.c b/lib/actions.c -index 605dbffe4..ee7ccae0d 100644 ---- a/lib/actions.c -+++ b/lib/actions.c -@@ -900,9 +900,18 @@ parse_ct_lb_action(struct action_context *ctx) - struct ovnact_ct_lb_dst *dsts = NULL; - size_t allocated_dsts = 0; - size_t n_dsts = 0; -+ char *hash_fields = NULL; - -- if (lexer_match(ctx->lexer, LEX_T_LPAREN)) { -- while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) { -+ if (lexer_match(ctx->lexer, LEX_T_LPAREN) && -+ !lexer_match(ctx->lexer, LEX_T_RPAREN)) { -+ if (!lexer_match_id(ctx->lexer, "backends") || -+ !lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { -+ lexer_syntax_error(ctx->lexer, "expecting backends"); -+ return; -+ } -+ -+ while (!lexer_match(ctx->lexer, LEX_T_SEMICOLON) && -+ !lexer_match(ctx->lexer, LEX_T_RPAREN)) { - struct ovnact_ct_lb_dst dst; - if (lexer_match(ctx->lexer, LEX_T_LSQUARE)) { - /* IPv6 address and port */ -@@ -969,12 +978,27 @@ parse_ct_lb_action(struct action_context *ctx) - } - dsts[n_dsts++] = dst; - } -+ -+ if (lexer_match_id(ctx->lexer, "hash_fields")) { -+ if (!lexer_match(ctx->lexer, LEX_T_EQUALS) || -+ ctx->lexer->token.type != LEX_T_STRING || -+ lexer_lookahead(ctx->lexer) != LEX_T_RPAREN) { -+ lexer_syntax_error(ctx->lexer, "invalid hash_fields"); -+ free(dsts); -+ return; -+ } -+ -+ hash_fields = xstrdup(ctx->lexer->token.s); -+ lexer_get(ctx->lexer); -+ lexer_get(ctx->lexer); -+ } - } - - struct ovnact_ct_lb *cl = ovnact_put_CT_LB(ctx->ovnacts); - cl->ltable = ctx->pp->cur_ltable + 1; - cl->dsts = dsts; - cl->n_dsts = n_dsts; -+ cl->hash_fields = hash_fields; - } - - static void -@@ -982,10 +1006,10 @@ format_CT_LB(const struct ovnact_ct_lb *cl, struct ds *s) - { - ds_put_cstr(s, "ct_lb"); - if (cl->n_dsts) { -- ds_put_char(s, '('); -+ ds_put_cstr(s, "(backends="); - for (size_t i = 0; i < cl->n_dsts; i++) { - if (i) { -- ds_put_cstr(s, ", "); -+ ds_put_char(s, ','); - } - - const struct ovnact_ct_lb_dst *dst = &cl->dsts[i]; -@@ -1005,7 +1029,13 @@ format_CT_LB(const struct ovnact_ct_lb *cl, struct ds *s) - } - } - ds_put_char(s, ')'); -+ -+ if (cl->hash_fields) { -+ ds_chomp(s, ')'); -+ ds_put_format(s, "; hash_fields=\"%s\")", cl->hash_fields); -+ } - } -+ - ds_put_char(s, ';'); - } - -@@ -1052,7 +1082,11 @@ encode_CT_LB(const struct ovnact_ct_lb *cl, - : MFF_LOG_DNAT_ZONE - MFF_REG0; - - struct ds ds = DS_EMPTY_INITIALIZER; -- ds_put_format(&ds, "type=select,selection_method=dp_hash"); -+ ds_put_format(&ds, "type=select,selection_method=%s", -+ cl->hash_fields ? "hash": "dp_hash"); -+ if (cl->hash_fields) { -+ ds_put_format(&ds, ",fields(%s)", cl->hash_fields); -+ } - - BUILD_ASSERT(MFF_LOG_CT_ZONE >= MFF_REG0); - BUILD_ASSERT(MFF_LOG_CT_ZONE < MFF_REG0 + FLOW_N_REGS); -@@ -1094,6 +1128,7 @@ static void - ovnact_ct_lb_free(struct ovnact_ct_lb *ct_lb) - { - free(ct_lb->dsts); -+ free(ct_lb->hash_fields); - } - - static void -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index dc647d7c5..b07e68cfa 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -3108,7 +3108,7 @@ struct ovn_lb { - struct hmap_node hmap_node; - - const struct nbrec_load_balancer *nlb; /* May be NULL. */ -- -+ char *selection_fields; - struct lb_vip *vips; - size_t n_vips; - }; -@@ -3336,6 +3336,15 @@ ovn_lb_create(struct northd_context *ctx, struct hmap *lbs, - n_vips++; - } - -+ if (lb->nlb->n_selection_fields) { -+ struct ds sel_fields = DS_EMPTY_INITIALIZER; -+ for (size_t i = 0; i < lb->nlb->n_selection_fields; i++) { -+ ds_put_format(&sel_fields, "%s,", lb->nlb->selection_fields[i]); -+ } -+ ds_chomp(&sel_fields, ','); -+ lb->selection_fields = ds_steal_cstr(&sel_fields); -+ } -+ - return lb; - } - -@@ -3354,13 +3363,15 @@ ovn_lb_destroy(struct ovn_lb *lb) - free(lb->vips[i].backends); - } - free(lb->vips); -+ free(lb->selection_fields); - } - - static void build_lb_vip_ct_lb_actions(struct lb_vip *lb_vip, -- struct ds *action) -+ struct ds *action, -+ char *selection_fields) - { - if (lb_vip->health_check) { -- ds_put_cstr(action, "ct_lb("); -+ ds_put_cstr(action, "ct_lb(backends="); - - size_t n_active_backends = 0; - for (size_t k = 0; k < lb_vip->n_backends; k++) { -@@ -3384,7 +3395,13 @@ static void build_lb_vip_ct_lb_actions(struct lb_vip *lb_vip, - ds_put_cstr(action, ");"); - } - } else { -- ds_put_format(action, "ct_lb(%s);", lb_vip->backend_ips); -+ ds_put_format(action, "ct_lb(backends=%s);", lb_vip->backend_ips); -+ } -+ -+ if (selection_fields && selection_fields[0]) { -+ ds_chomp(action, ';'); -+ ds_chomp(action, ')'); -+ ds_put_format(action, "; hash_fields=\"%s\");", selection_fields); - } - } - -@@ -5660,7 +5677,7 @@ build_lb_rules(struct ovn_datapath *od, struct hmap *lflows, struct ovn_lb *lb) - - /* New connections in Ingress table. */ - struct ds action = DS_EMPTY_INITIALIZER; -- build_lb_vip_ct_lb_actions(lb_vip, &action); -+ build_lb_vip_ct_lb_actions(lb_vip, &action, lb->selection_fields); - - struct ds match = DS_EMPTY_INITIALIZER; - ds_put_format(&match, "ct.new && %s.dst == %s", ip_match, lb_vip->vip); -@@ -9290,7 +9307,8 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - for (size_t j = 0; j < lb->n_vips; j++) { - struct lb_vip *lb_vip = &lb->vips[j]; - ds_clear(&actions); -- build_lb_vip_ct_lb_actions(lb_vip, &actions); -+ build_lb_vip_ct_lb_actions(lb_vip, &actions, -+ lb->selection_fields); - - if (!sset_contains(&all_ips, lb_vip->vip)) { - sset_add(&all_ips, lb_vip->vip); -diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema -index 949f6258b..359201c26 100644 ---- a/ovn-nb.ovsschema -+++ b/ovn-nb.ovsschema -@@ -1,7 +1,7 @@ - { - "name": "OVN_Northbound", -- "version": "5.22.0", -- "cksum": "170077561 25417", -+ "version": "5.23.0", -+ "cksum": "3367447924 25747", - "tables": { - "NB_Global": { - "columns": { -@@ -179,6 +179,12 @@ - "ip_port_mappings": { - "type": {"key": "string", "value": "string", - "min": 0, "max": "unlimited"}}, -+ "selection_fields": { -+ "type": {"key": {"type": "string", -+ "enum": ["set", -+ ["eth_src", "eth_dst", "ip_src", "ip_dst", -+ "tp_src", "tp_dst"]]}, -+ "min": 0, "max": "unlimited"}}, - "external_ids": { - "type": {"key": "string", "value": "string", - "min": 0, "max": "unlimited"}}}, -diff --git a/ovn-nb.xml b/ovn-nb.xml -index 045c63fb0..55f0ef9f6 100644 ---- a/ovn-nb.xml -+++ b/ovn-nb.xml -@@ -1497,6 +1497,33 @@ -

              -
              - -+ -+

              -+ OVN native load balancers are supported using the OpenFlow groups -+ of type select. OVS supports two selection methods: -+ dp_hash and hash (with optional fields -+ specified) in selecting the buckets of a group. -+ Please see the OVS documentation (man ovs-ofctl) -+ for more details on the selection methods. Each endpoint IP (and port -+ if set) is mapped to a bucket in the group flow. -+

              -+ -+

              -+ CMS can choose the hash selection method by setting the -+ selection fields in this column. ovs-vswitchd uses the -+ specified fields in generating the hash. -+

              -+ -+

              -+ dp_hash selection method uses the assistance of -+ datapath to calculate the hash and it is expected to be -+ faster than hash selection method. So CMS should take -+ this into consideration before using the hash method. -+ Please consult the OVS documentation and OVS sources for the -+ implementation details. -+

              -+
              -+ - - - See External IDs at the beginning of this document. -diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at -index 569390cee..e6a8c04da 100644 ---- a/tests/ovn-northd.at -+++ b/tests/ovn-northd.at -@@ -1119,7 +1119,7 @@ ovn-nbctl --wait=sb ls-lb-add sw0 lb1 - - ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt - AT_CHECK([cat lflows.txt], [0], [dnl -- table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) -+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) - ]) - - # Delete the Load_Balancer_Health_Check -@@ -1128,7 +1128,7 @@ OVS_WAIT_UNTIL([test 0 = `ovn-sbctl list service_monitor | wc -l`]) - - ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt - AT_CHECK([cat lflows.txt], [0], [dnl -- table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) -+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) - ]) - - # Create the Load_Balancer_Health_Check again. -@@ -1141,7 +1141,7 @@ service_monitor | sed '/^$/d' | wc -l`]) - - ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt - AT_CHECK([cat lflows.txt], [0], [dnl -- table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) -+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) - ]) - - # Get the uuid of both the service_monitor -@@ -1157,7 +1157,7 @@ OVS_WAIT_UNTIL([ - - ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt - AT_CHECK([cat lflows.txt], [0], [dnl -- table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);) -+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80);) - ]) - - # Set the service monitor for sw0-p1 to offline -@@ -1187,7 +1187,7 @@ OVS_WAIT_UNTIL([ - - ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt - AT_CHECK([cat lflows.txt], [0], [dnl -- table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) -+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) - ]) - - # Set the service monitor for sw1-p1 to error -@@ -1199,7 +1199,7 @@ OVS_WAIT_UNTIL([ - ovn-sbctl dump-flows sw0 | grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" \ - | grep priority=120 > lflows.txt - AT_CHECK([cat lflows.txt], [0], [dnl -- table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);) -+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80);) - ]) - - # Add one more vip to lb1 -@@ -1229,8 +1229,8 @@ service_monitor port=1000 | sed '/^$/d' | wc -l`]) - - ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt - AT_CHECK([cat lflows.txt], [0], [dnl -- table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);) -- table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000);) -+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80);) -+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000);) - ]) - - # Set the service monitor for sw1-p1 to online -@@ -1242,16 +1242,16 @@ OVS_WAIT_UNTIL([ - - ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt - AT_CHECK([cat lflows.txt], [0], [dnl -- table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) -- table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000,20.0.0.3:80);) -+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) -+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000,20.0.0.3:80);) - ]) - - # Associate lb1 to sw1 - ovn-nbctl --wait=sb ls-lb-add sw1 lb1 - ovn-sbctl dump-flows sw1 | grep ct_lb | grep priority=120 > lflows.txt - AT_CHECK([cat lflows.txt], [0], [dnl -- table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) -- table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000,20.0.0.3:80);) -+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) -+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000,20.0.0.3:80);) - ]) - - # Now create lb2 same as lb1 but udp protocol. -diff --git a/tests/ovn.at b/tests/ovn.at -index 5fb100ad4..ae3b44cb3 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -968,29 +968,42 @@ ct_lb(); - encodes as ct(table=19,zone=NXM_NX_REG13[0..15],nat) - has prereqs ip - ct_lb(192.168.1.2:80, 192.168.1.3:80); -+ Syntax error at `192.168.1.2' expecting backends. -+ct_lb(backends=192.168.1.2:80,192.168.1.3:80); - encodes as group:1 - uses group: id(1), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2:80),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15])) - has prereqs ip --ct_lb(192.168.1.2, 192.168.1.3, ); -- formats as ct_lb(192.168.1.2, 192.168.1.3); -+ct_lb(backends=192.168.1.2, 192.168.1.3, ); -+ formats as ct_lb(backends=192.168.1.2,192.168.1.3); - encodes as group:2 - uses group: id(2), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3),commit,table=19,zone=NXM_NX_REG13[0..15])) - has prereqs ip --ct_lb(fd0f::2, fd0f::3, ); -- formats as ct_lb(fd0f::2, fd0f::3); -+ct_lb(backends=fd0f::2, fd0f::3, ); -+ formats as ct_lb(backends=fd0f::2,fd0f::3); - encodes as group:3 - uses group: id(3), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15])) - has prereqs ip - --ct_lb(192.168.1.2:); -+ct_lb(backends=192.168.1.2:); - Syntax error at `)' expecting port number. --ct_lb(192.168.1.2:123456); -+ct_lb(backends=192.168.1.2:123456); - Syntax error at `123456' expecting port number. --ct_lb(foo); -+ct_lb(backends=foo); - Syntax error at `foo' expecting IP address. --ct_lb([192.168.1.2]); -+ct_lb(backends=[192.168.1.2]); - Syntax error at `192.168.1.2' expecting IPv6 address. - -+ct_lb(backends=192.168.1.2:80,192.168.1.3:80; hash_fields=eth_src,eth_dst,ip_src); -+ Syntax error at `eth_src' invalid hash_fields. -+ct_lb(backends=192.168.1.2:80,192.168.1.3:80; hash_fields="eth_src,eth_dst,ip_src"); -+ encodes as group:4 -+ uses group: id(4), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2:80),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15])) -+ has prereqs ip -+ct_lb(backends=fd0f::2,fd0f::3; hash_fields="eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst"); -+ encodes as group:5 -+ uses group: id(5), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15])) -+ has prereqs ip -+ - # ct_next - ct_next; - encodes as ct(table=19,zone=NXM_NX_REG13[0..15]) -@@ -1491,13 +1504,13 @@ handle_svc_check(reg0); - # select - reg9[16..31] = select(1=50, 2=100, 3, ); - formats as reg9[16..31] = select(1=50, 2=100, 3=100); -- encodes as group:4 -- uses group: id(4), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19)) -+ encodes as group:6 -+ uses group: id(6), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19)) - - reg0 = select(1, 2); - formats as reg0 = select(1=100, 2=100); -- encodes as group:5 -- uses group: id(5), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19)) -+ encodes as group:7 -+ uses group: id(7), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19)) - - reg0 = select(1=, 2); - Syntax error at `,' expecting weight. -@@ -1513,12 +1526,12 @@ reg0[0..14] = select(1, 2, 3); - cannot use 15-bit field reg0[0..14] for "select", which requires at least 16 bits. - - fwd_group(liveness="true", childports="eth0", "lsp1"); -- encodes as group:6 -- uses group: id(6), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64)) -+ encodes as group:8 -+ uses group: id(8), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64)) - - fwd_group(childports="eth0", "lsp1"); -- encodes as group:7 -- uses group: id(7), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64)) -+ encodes as group:9 -+ uses group: id(9), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64)) - - fwd_group(childports=eth0); - Syntax error at `eth0' expecting logical switch port. -@@ -17916,12 +17929,12 @@ service_monitor | sed '/^$/d' | wc -l`]) - - ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt - AT_CHECK([cat lflows.txt], [0], [dnl -- table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) -+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) - ]) - - ovn-sbctl dump-flows lr0 | grep ct_lb | grep priority=120 > lflows.txt - AT_CHECK([cat lflows.txt], [0], [dnl -- table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 && is_chassis_resident("cr-lr0-public")), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) -+ table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 && is_chassis_resident("cr-lr0-public")), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) - ]) - - # get the svc monitor mac. -diff --git a/tests/system-ovn.at b/tests/system-ovn.at -index 117f1e835..9a5ef1ec3 100644 ---- a/tests/system-ovn.at -+++ b/tests/system-ovn.at -@@ -1095,15 +1095,15 @@ ovn-nbctl lsp-add bar bar3 \ - -- lsp-set-addresses bar3 "f0:00:0f:01:02:05 172.16.1.4" - - # Config OVN load-balancer with a VIP. --uuid=`ovn-nbctl create load_balancer vips:30.0.0.1="172.16.1.2,172.16.1.3,172.16.1.4"` --ovn-nbctl set logical_switch foo load_balancer=$uuid -+ovn-nbctl lb-add lb1 30.0.0.1 "172.16.1.2,172.16.1.3,172.16.1.4" -+ovn-nbctl ls-lb-add foo lb1 - - # Create another load-balancer with another VIP. --uuid=`ovn-nbctl create load_balancer vips:30.0.0.3="172.16.1.2,172.16.1.3,172.16.1.4"` --ovn-nbctl add logical_switch foo load_balancer $uuid -+lb2_uuid=`ovn-nbctl create load_balancer name=lb2 vips:30.0.0.3="172.16.1.2,172.16.1.3,172.16.1.4"` -+ovn-nbctl ls-lb-add foo lb2 - - # Config OVN load-balancer with another VIP (this time with ports). --ovn-nbctl set load_balancer $uuid vips:'"30.0.0.2:8000"'='"172.16.1.2:80,172.16.1.3:80,172.16.1.4:80"' -+ovn-nbctl set load_balancer $lb2_uuid vips:'"30.0.0.2:8000"'='"172.16.1.2:80,172.16.1.3:80,172.16.1.4:80"' - - # Wait for ovn-controller to catch up. - ovn-nbctl --wait=hv sync -@@ -1157,6 +1157,82 @@ tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(s - tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=172.16.1.4,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) - ]) - -+# Configure selection_fields. -+ovn-nbctl set load_balancer $lb2_uuid selection_fields="ip_src,ip_dst,tp_src,tp_dst" -+OVS_WAIT_UNTIL([ -+ test $(ovs-ofctl dump-groups br-int | \ -+ grep "selection_method=hash,fields(ip_src,ip_dst,tcp_src,tcp_dst)" -c) -eq 2 -+]) -+ -+AT_CHECK([ovs-appctl dpctl/flush-conntrack]) -+ -+dnl Test load-balancing that includes L4 ports in NAT. -+for i in `seq 1 20`; do -+ echo Request $i -+ NS_CHECK_EXEC([foo1], [wget 30.0.0.2:8000 -t 5 -T 1 --retry-connrefused -v -o wget$i.log]) -+done -+ -+dnl Each server should have at least one connection. -+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.2) | \ -+sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl -+tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=172.16.1.2,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) -+tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=172.16.1.3,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) -+tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=172.16.1.4,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) -+]) -+ -+AT_CHECK([ovs-appctl dpctl/flush-conntrack]) -+ -+echo "foo" > foo -+for i in `seq 1 20`; do -+ echo Request $i -+ ip netns exec foo1 nc -p 30000 30.0.0.2 8000 < foo -+done -+ -+dnl Only one backend should be chosen. -+AT_CHECK([test $(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 -c) -eq 1]) -+ -+ovn-nbctl set load_balancer $lb2_uuid selection_fields="ip_src" -+OVS_WAIT_UNTIL([ -+ test $(ovs-ofctl dump-groups br-int | \ -+ grep "selection_method=hash,fields=ip_src" -c) -eq 2 -+]) -+ -+AT_CHECK([ovs-appctl dpctl/flush-conntrack]) -+for i in `seq 1 20`; do -+ echo Request $i -+ ip netns exec foo1 nc 30.0.0.2 8000 < foo -+done -+ -+dnl Only one backend should be chosen as eth_src and ip_src is fixed. -+bar1_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep 172.16.1.2 -c) -+bar2_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep 172.16.1.3 -c) -+bar3_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep 172.16.1.4 -c) -+ -+AT_CHECK([test $(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep 172.16.1 -c) -ne 0]) -+ -+if [[ "$bar1_ct" == "20" ]]; then -+ AT_CHECK([test $bar1_ct -eq 20]) -+ AT_CHECK([test $bar2_ct -eq 0]) -+ AT_CHECK([test $bar3_ct -eq 0]) -+else -+ AT_CHECK([test $bar1_ct -eq 0]) -+fi -+ -+if [[ "$bar2_ct" == "20" ]]; then -+ AT_CHECK([test $bar1_ct -eq 20]) -+ AT_CHECK([test $bar2_ct -eq 0]) -+ AT_CHECK([test $bar3_ct -eq 0]) -+else -+ AT_CHECK([test $bar2_ct -eq 0]) -+fi -+ -+if [[ "$bar3_ct" == "20" ]]; then -+ AT_CHECK([test $bar1_ct -eq 20]) -+ AT_CHECK([test $bar2_ct -eq 0]) -+ AT_CHECK([test $bar3_ct -eq 0]) -+else -+ AT_CHECK([test $bar3_ct -eq 0]) -+fi - - OVS_APP_EXIT_AND_WAIT([ovn-controller]) - -@@ -1246,11 +1322,11 @@ uuid=`ovn-nbctl create load_balancer vips:\"fd03::1\"=\"fd02::2,fd02::3,fd02::4 - ovn-nbctl set logical_switch foo load_balancer=$uuid - - # Create another load-balancer with another VIP. --uuid=`ovn-nbctl create load_balancer vips:\"fd03::3\"=\"fd02::2,fd02::3,fd02::4\"` --ovn-nbctl add logical_switch foo load_balancer $uuid -+lb2_uuid=`ovn-nbctl create load_balancer vips:\"fd03::3\"=\"fd02::2,fd02::3,fd02::4\"` -+ovn-nbctl add logical_switch foo load_balancer $lb2_uuid - - # Config OVN load-balancer with another VIP (this time with ports). --ovn-nbctl set load_balancer $uuid vips:'"[[fd03::2]]:8000"'='"@<:@fd02::2@:>@:80,@<:@fd02::3@:>@:80,@<:@fd02::4@:>@:80"' -+ovn-nbctl set load_balancer $lb2_uuid vips:'"[[fd03::2]]:8000"'='"@<:@fd02::2@:>@:80,@<:@fd02::3@:>@:80,@<:@fd02::4@:>@:80"' - - # Wait for ovn-controller to catch up. - ovn-nbctl --wait=hv sync -@@ -1304,7 +1380,83 @@ tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd - tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd02::4,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) - ]) - -+# Configure selection_fields. -+ovn-nbctl set load_balancer $lb2_uuid selection_fields="ip_src,ip_dst,tp_src,tp_dst" -+OVS_WAIT_UNTIL([ -+ test $(ovs-ofctl dump-groups br-int | \ -+ grep "selection_method=hash,fields(ip_src,ip_dst,tcp_src,tcp_dst)" -c) -eq 2 -+]) -+ -+AT_CHECK([ovs-appctl dpctl/flush-conntrack]) -+ -+dnl Test load-balancing that includes L4 ports in NAT. -+for i in `seq 1 20`; do -+ echo Request $i -+ NS_CHECK_EXEC([foo1], [wget http://[[fd03::2]]:8000 -t 5 -T 1 --retry-connrefused -v -o wget$i.log]) -+done -+ -+dnl Each server should have at least one connection. -+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd03::2) | grep -v fe80 | \ -+sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl -+tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd02::2,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) -+tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd02::3,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) -+tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd02::4,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) -+]) -+ -+AT_CHECK([ovs-appctl dpctl/flush-conntrack]) -+ -+echo "foo" > foo -+for i in `seq 1 20`; do -+ echo Request $i -+ ip netns exec foo1 nc -6 -p 30000 fd03::2 8000 < foo -+done -+ -+# Only one backend should be chosen. Since the source port is fixed, -+# there should be only one conntrack entry. -+AT_CHECK([test $(ovs-appctl dpctl/dump-conntrack | grep fd03::2 -c) -eq 1]) -+ -+ovn-nbctl set load_balancer $lb2_uuid selection_fields="eth_src,ip_src" -+OVS_WAIT_UNTIL([ -+ test $(ovs-ofctl dump-groups br-int | \ -+ grep "selection_method=hash,fields(eth_src,ip_src)" -c) -eq 2 -+]) -+ -+AT_CHECK([ovs-appctl dpctl/flush-conntrack]) -+for i in `seq 1 20`; do -+ echo Request $i -+ ip netns exec foo1 nc -6 fd03::2 8000 < foo -+done - -+dnl Only one backend should be chosen as eth_src and ip_src is fixed. -+bar1_ct=$(ovs-appctl dpctl/dump-conntrack | grep fd03::2 | grep fd02::2 -c) -+bar2_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep fd02::3 -c) -+bar3_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep fd02::4 -c) -+ -+AT_CHECK([test $(ovs-appctl dpctl/dump-conntrack | grep fd03::2 | grep fd02 -c) -ne 0]) -+ -+if [[ "$bar1_ct" == "20" ]]; then -+ AT_CHECK([test $bar1_ct -eq 20]) -+ AT_CHECK([test $bar2_ct -eq 0]) -+ AT_CHECK([test $bar3_ct -eq 0]) -+else -+ AT_CHECK([test $bar1_ct -eq 0]) -+fi -+ -+if [[ "$bar2_ct" == "20" ]]; then -+ AT_CHECK([test $bar1_ct -eq 20]) -+ AT_CHECK([test $bar2_ct -eq 0]) -+ AT_CHECK([test $bar3_ct -eq 0]) -+else -+ AT_CHECK([test $bar2_ct -eq 0]) -+fi -+ -+if [[ "$bar3_ct" == "20" ]]; then -+ AT_CHECK([test $bar1_ct -eq 20]) -+ AT_CHECK([test $bar2_ct -eq 0]) -+ AT_CHECK([test $bar3_ct -eq 0]) -+else -+ AT_CHECK([test $bar3_ct -eq 0]) -+fi - OVS_APP_EXIT_AND_WAIT([ovn-controller]) - - as ovn-sb -@@ -3448,7 +3600,7 @@ service_monitor | sed '/^$/d' | grep online | wc -l`]) - - OVS_WAIT_UNTIL( - [ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 | grep "ip4.dst == 10.0.0.10" > lflows.txt -- test 1 = `cat lflows.txt | grep "ct_lb(10.0.0.3:80,20.0.0.3:80)" | wc -l`] -+ test 1 = `cat lflows.txt | grep "ct_lb(backends=10.0.0.3:80,20.0.0.3:80)" | wc -l`] - ) - - # From sw0-p2 send traffic to vip - 10.0.0.10 -@@ -3474,7 +3626,7 @@ service_monitor logical_port=sw0-p1 | sed '/^$/d' | grep offline | wc -l`]) - - OVS_WAIT_UNTIL( - [ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 | grep "ip4.dst == 10.0.0.10" > lflows.txt -- test 1 = `cat lflows.txt | grep "ct_lb(20.0.0.3:80)" | wc -l`] -+ test 1 = `cat lflows.txt | grep "ct_lb(backends=20.0.0.3:80)" | wc -l`] - ) - - ovs-appctl dpctl/flush-conntrack --- -2.26.2 - diff --git a/SOURCES/0003-controller-fix-ip-buffering-with-static-routes.patch b/SOURCES/0003-controller-fix-ip-buffering-with-static-routes.patch deleted file mode 100644 index 9f3209d..0000000 --- a/SOURCES/0003-controller-fix-ip-buffering-with-static-routes.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 754d5581fa9d5de97f7c2acf8c2900e105f588c9 Mon Sep 17 00:00:00 2001 -Message-Id: <754d5581fa9d5de97f7c2acf8c2900e105f588c9.1590585469.git.lorenzo.bianconi@redhat.com> -In-Reply-To: -References: -From: Lorenzo Bianconi -Date: Wed, 20 May 2020 22:01:16 +0200 -Subject: [PATCH ovn 2/3] controller: fix ip buffering with static routes - -When the arp request is sent to a gw router and not to the final -destination of the packet buffered_packets_map needs to be updated using -next-hop ip address and not the destionation one. - -Fixes: 2e5cdb4b1392 ("OVN: add buffering support for ip packets") -Signed-off-by: Lorenzo Bianconi ---- - controller/pinctrl.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - ---- a/controller/pinctrl.c -+++ b/controller/pinctrl.c -@@ -1378,8 +1378,7 @@ pinctrl_find_buffered_packets(const stru - - /* Called with in the pinctrl_handler thread context. */ - static int --pinctrl_handle_buffered_packets(const struct flow *ip_flow, -- struct dp_packet *pkt_in, -+pinctrl_handle_buffered_packets(struct dp_packet *pkt_in, - const struct match *md, bool is_arp) - OVS_REQUIRES(pinctrl_mutex) - { -@@ -1388,9 +1387,10 @@ pinctrl_handle_buffered_packets(const st - struct in6_addr addr; - - if (is_arp) { -- addr = in6_addr_mapped_ipv4(ip_flow->nw_dst); -+ addr = in6_addr_mapped_ipv4(htonl(md->flow.regs[0])); - } else { -- addr = ip_flow->ipv6_dst; -+ ovs_be128 ip6 = hton128(flow_get_xxreg(&md->flow, 0)); -+ memcpy(&addr, &ip6, sizeof addr); - } - - uint32_t hash = hash_bytes(&addr, sizeof addr, 0); -@@ -1431,7 +1431,7 @@ pinctrl_handle_arp(struct rconn *swconn, - } - - ovs_mutex_lock(&pinctrl_mutex); -- pinctrl_handle_buffered_packets(ip_flow, pkt_in, md, true); -+ pinctrl_handle_buffered_packets(pkt_in, md, true); - ovs_mutex_unlock(&pinctrl_mutex); - - /* Compose an ARP packet. */ -@@ -5278,7 +5278,7 @@ pinctrl_handle_nd_ns(struct rconn *swcon - } - - ovs_mutex_lock(&pinctrl_mutex); -- pinctrl_handle_buffered_packets(ip_flow, pkt_in, md, false); -+ pinctrl_handle_buffered_packets(pkt_in, md, false); - ovs_mutex_unlock(&pinctrl_mutex); - - uint64_t packet_stub[128 / 8]; diff --git a/SOURCES/0003-logical-fields-fix-memory-leak-caused-by-initialize-.patch b/SOURCES/0003-logical-fields-fix-memory-leak-caused-by-initialize-.patch deleted file mode 100644 index 26e8773..0000000 --- a/SOURCES/0003-logical-fields-fix-memory-leak-caused-by-initialize-.patch +++ /dev/null @@ -1,120 +0,0 @@ -From e6b687cc23212e0c007d9f69dfa89536c8f92306 Mon Sep 17 00:00:00 2001 -From: Damijan Skvarc -Date: Thu, 5 Mar 2020 07:21:41 +0100 -Subject: [PATCH 3/3] logical-fields: fix memory leak caused by initialize - ovnfield_by_name twice - -ovnfield_by_name is hash of strings which is used to quickly find -field by name. This hash is initialized from ovn_init_symtab(). In case -the latter function is called multiple times then also ovnfield_by_name is -initialized multiple times but without freeing previously allocated -memory resources what cause memory leaks. This actually happens in -ovn-controller which calls ovn_init_symtab() function twice, once from -ofctrl.c and the other time from lflow.c files. - -Problem was solved by initializing ovnfield_by_name entity only once -and using design pattern from stopwatch.c or meta_flow.c files (ovs). - -Problem was reported by valgrind with flood of messages (190) while executing -ovn test suite: - - ==5999== 47 (32 direct, 15 indirect) bytes in 1 blocks are definitely lost in loss record 86 of 102 - ==5999== at 0x4C2DB8F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) - ==5999== by 0x50635D: xmalloc (util.c:138) - ==5999== by 0x4F6513: shash_add_nocopy__ (shash.c:109) - ==5999== by 0x4F6585: shash_add_nocopy (shash.c:121) - ==5999== by 0x4F65BD: shash_add (shash.c:129) - ==5999== by 0x4F6602: shash_add_once (shash.c:136) - ==5999== by 0x4395B7: ovn_init_symtab (logical-fields.c:261) - ==5999== by 0x406C91: main (ovn-controller.c:1750) - -Signed-off-by: Damijan Skvarc -Signed-off-by: Ben Pfaff ---- - controller/lflow.c | 1 - - include/ovn/logical-fields.h | 1 - - lib/logical-fields.c | 39 +++++++++++++++++++++++++----------- - 3 files changed, 27 insertions(+), 14 deletions(-) - -diff --git a/controller/lflow.c b/controller/lflow.c -index ee11fc617..01214a3a6 100644 ---- a/controller/lflow.c -+++ b/controller/lflow.c -@@ -846,5 +846,4 @@ lflow_destroy(void) - { - expr_symtab_destroy(&symtab); - shash_destroy(&symtab); -- ovn_destroy_ovnfields(); - } -diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h -index 9b7c34fb7..c7bd2dba9 100644 ---- a/include/ovn/logical-fields.h -+++ b/include/ovn/logical-fields.h -@@ -130,5 +130,4 @@ ovn_field_from_id(enum ovn_field_id id) - const char *event_to_string(enum ovn_controller_event event); - int string_to_event(const char *s); - const struct ovn_field *ovn_field_from_name(const char *name); --void ovn_destroy_ovnfields(void); - #endif /* ovn/lib/logical-fields.h */ -diff --git a/lib/logical-fields.c b/lib/logical-fields.c -index 25ace5840..a007085b3 100644 ---- a/lib/logical-fields.c -+++ b/lib/logical-fields.c -@@ -254,12 +254,6 @@ ovn_init_symtab(struct shash *symtab) - expr_symtab_add_field(symtab, "sctp.src", MFF_SCTP_SRC, "sctp", false); - expr_symtab_add_field(symtab, "sctp.dst", MFF_SCTP_DST, "sctp", false); - -- shash_init(&ovnfield_by_name); -- for (int i = 0; i < OVN_FIELD_N_IDS; i++) { -- const struct ovn_field *of = &ovn_fields[i]; -- ovs_assert(of->id == i); /* Fields must be in the enum order. */ -- shash_add_once(&ovnfield_by_name, of->name, of); -- } - expr_symtab_add_ovn_field(symtab, "icmp4.frag_mtu", OVN_ICMP4_FRAG_MTU); - } - -@@ -284,14 +278,35 @@ string_to_event(const char *s) - return -1; - } - --const struct ovn_field * --ovn_field_from_name(const char *name) -+static void -+ovn_destroy_ovnfields(void) - { -- return shash_find_data(&ovnfield_by_name, name); -+ shash_destroy(&ovnfield_by_name); - } - --void --ovn_destroy_ovnfields(void) -+static void -+ovn_do_init_ovnfields(void) - { -- shash_destroy(&ovnfield_by_name); -+ shash_init(&ovnfield_by_name); -+ for (int i = 0; i < OVN_FIELD_N_IDS; i++) { -+ const struct ovn_field *of = &ovn_fields[i]; -+ ovs_assert(of->id == i); /* Fields must be in the enum order. */ -+ shash_add_once(&ovnfield_by_name, of->name, of); -+ } -+ atexit(ovn_destroy_ovnfields); -+} -+ -+static void -+ovn_init_ovnfields(void) -+{ -+ static pthread_once_t once = PTHREAD_ONCE_INIT; -+ pthread_once(&once, ovn_do_init_ovnfields); -+} -+ -+const struct ovn_field * -+ovn_field_from_name(const char *name) -+{ -+ ovn_init_ovnfields(); -+ -+ return shash_find_data(&ovnfield_by_name, name); - } --- -2.24.1 - diff --git a/SOURCES/0003-northd-Add-logical-flows-for-dhcpv6-pfd-parsing.patch b/SOURCES/0003-northd-Add-logical-flows-for-dhcpv6-pfd-parsing.patch deleted file mode 100644 index 79d5a4f..0000000 --- a/SOURCES/0003-northd-Add-logical-flows-for-dhcpv6-pfd-parsing.patch +++ /dev/null @@ -1,449 +0,0 @@ -From 643d4be1b3f40c3075af584d0cbc83e34a5e51ca Mon Sep 17 00:00:00 2001 -Message-Id: <643d4be1b3f40c3075af584d0cbc83e34a5e51ca.1586727203.git.lorenzo.bianconi@redhat.com> -In-Reply-To: <2e84aada0b45d2f8739c2fdbc351098fc1c09c26.1586727203.git.lorenzo.bianconi@redhat.com> -References: <2e84aada0b45d2f8739c2fdbc351098fc1c09c26.1586727203.git.lorenzo.bianconi@redhat.com> -From: Lorenzo Bianconi -Date: Wed, 1 Apr 2020 18:37:31 +0200 -Subject: [PATCH 3/3] northd: Add logical flows for dhcpv6 pfd parsing - -Introduce logical flows in ovn router pipeline in order to parse dhcpv6 -advertise/reply from IPv6 prefix delegation router. -Do not overwrite ipv6_ra_pd_list info in options column of SB port_binding -table written by ovn-controller -Introduce ipv6_prefix column in NB Logical_router_port table to report -IPv6 prefix received from delegation router to the CMS - -Change-Id: Ibc1bd83bf3d9d4671f70610df5635e6266580a18 -Signed-off-by: Lorenzo Bianconi -Signed-off-by: Numan Siddique ---- - NEWS | 1 + - northd/ovn-northd.8.xml | 8 +++ - northd/ovn-northd.c | 95 ++++++++++++++++++++++++-- - ovn-nb.ovsschema | 7 +- - ovn-nb.xml | 21 ++++++ - tests/atlocal.in | 5 +- - tests/system-ovn.at | 143 ++++++++++++++++++++++++++++++++++++++++ - 7 files changed, 272 insertions(+), 8 deletions(-) - -diff --git a/NEWS b/NEWS -index 9b36bfd17..21c80f0dc 100644 ---- a/NEWS -+++ b/NEWS -@@ -7,6 +7,7 @@ OVN v20.03.0 - 28 Feb 2020 - - Added Forwarding Group support in OVN. - - Added support for MLD Snooping and MLD Querier. - - Added support for ECMP routes in OVN router. -+ - Added IPv6 Prefix Delegation support in OVN. - - - OVN Interconnection: - * Support for L3 interconnection of multiple OVN deployments with tunnels -diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml -index b5e4d6d84..82c86f636 100644 ---- a/northd/ovn-northd.8.xml -+++ b/northd/ovn-northd.8.xml -@@ -1660,6 +1660,14 @@ next; -
            -
          • - -+
          • -+ A priority-100 flow parses DHCPv6 replies from IPv6 prefix -+ delegation routers (udp.src == 547 && -+ udp.dst == 546). The handle_dhcpv6_reply -+ is used to send IPv6 prefix delegation messages to the delegation -+ router. -+
          • -+ -
          • -

            - A priority-87 flow explicitly allows IPv6 multicast traffic that is -diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c -index bb68b8fe9..fd1be5b27 100644 ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -2688,6 +2688,33 @@ op_get_name(const struct ovn_port *op) - return name; - } - -+static void -+ovn_update_ipv6_prefix(struct hmap *ports) -+{ -+ const struct ovn_port *op; -+ HMAP_FOR_EACH (op, key_node, ports) { -+ if (!op->nbrp) { -+ continue; -+ } -+ -+ char prefix[IPV6_SCAN_LEN + 6]; -+ unsigned aid; -+ const char *ipv6_pd_list = smap_get(&op->sb->options, -+ "ipv6_ra_pd_list"); -+ if (!ipv6_pd_list || -+ !ovs_scan(ipv6_pd_list, "%u:%s", &aid, prefix)) { -+ continue; -+ } -+ -+ struct sset ipv6_prefix_set = SSET_INITIALIZER(&ipv6_prefix_set); -+ sset_add(&ipv6_prefix_set, prefix); -+ nbrec_logical_router_port_set_ipv6_prefix(op->nbrp, -+ sset_array(&ipv6_prefix_set), -+ sset_count(&ipv6_prefix_set)); -+ sset_destroy(&ipv6_prefix_set); -+ } -+} -+ - static void - ovn_port_update_sbrec(struct northd_context *ctx, - struct ovsdb_idl_index *sbrec_chassis_by_name, -@@ -2818,6 +2845,13 @@ ovn_port_update_sbrec(struct northd_context *ctx, - smap_add(&new, "l3gateway-chassis", chassis_name); - } - } -+ -+ const char *ipv6_pd_list = smap_get(&op->sb->options, -+ "ipv6_ra_pd_list"); -+ if (ipv6_pd_list) { -+ smap_add(&new, "ipv6_ra_pd_list", ipv6_pd_list); -+ } -+ - sbrec_port_binding_set_options(op->sb, &new); - smap_destroy(&new); - -@@ -4683,12 +4717,12 @@ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows) - * unreachable packets. */ - ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, - "nd || nd_rs || nd_ra || icmp4.type == 3 || " -- "icmp6.type == 1 || (tcp && tcp.flags == 20)", -- "next;"); -+ "icmp6.type == 1 || (tcp && tcp.flags == 20) || " -+ "(udp && udp.src == 546 && udp.dst == 547)", "next;"); - ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, - "nd || nd_rs || nd_ra || icmp4.type == 3 || " -- "icmp6.type == 1 || (tcp && tcp.flags == 20)", -- "next;"); -+ "icmp6.type == 1 || (tcp && tcp.flags == 20) ||" -+ "(udp && udp.src == 546 && udp.dst == 547)", "next;"); - - /* Ingress and Egress Pre-ACL Table (Priority 100). - * -@@ -7744,6 +7778,11 @@ copy_ra_to_sb(struct ovn_port *op, const char *address_mode) - } - ds_put_format(&s, "%s/%u ", addrs->network_s, addrs->plen); - } -+ -+ const char *ra_pd_list = smap_get(&op->sb->options, "ipv6_ra_pd_list"); -+ if (ra_pd_list) { -+ ds_put_cstr(&s, ra_pd_list); -+ } - /* Remove trailing space */ - ds_chomp(&s, ' '); - smap_add(&options, "ipv6_ra_prefixes", ds_cstr(&s)); -@@ -8488,7 +8527,34 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - free(snat_ips); - } - -- /* Logical router ingress table 3: IP Input for IPv6. */ -+ /* DHCPv6 reply handling */ -+ HMAP_FOR_EACH (op, key_node, ports) { -+ if (!op->nbrp) { -+ continue; -+ } -+ -+ if (op->derived) { -+ continue; -+ } -+ -+ struct lport_addresses lrp_networks; -+ if (!extract_lrp_networks(op->nbrp, &lrp_networks)) { -+ continue; -+ } -+ -+ for (size_t i = 0; i < lrp_networks.n_ipv6_addrs; i++) { -+ ds_clear(&actions); -+ ds_clear(&match); -+ ds_put_format(&match, "ip6.dst == %s && udp.src == 547 &&" -+ " udp.dst == 546", -+ lrp_networks.ipv6_addrs[i].addr_s); -+ ds_put_format(&actions, "reg0 = 0; handle_dhcpv6_reply;"); -+ ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, -+ ds_cstr(&match), ds_cstr(&actions)); -+ } -+ } -+ -+ /* Logical router ingress table 1: IP Input for IPv6. */ - HMAP_FOR_EACH (op, key_node, ports) { - if (!op->nbrp) { - continue; -@@ -9250,6 +9316,24 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, - continue; - } - -+ struct smap options; -+ /* enable IPv6 prefix delegation */ -+ bool prefix_delegation = smap_get_bool(&op->nbrp->options, -+ "prefix_delegation", false); -+ if (prefix_delegation) { -+ smap_clone(&options, &op->sb->options); -+ smap_add(&options, "ipv6_prefix_delegation", "true"); -+ sbrec_port_binding_set_options(op->sb, &options); -+ smap_destroy(&options); -+ } -+ -+ if (smap_get_bool(&op->nbrp->options, "prefix", false)) { -+ smap_clone(&options, &op->sb->options); -+ smap_add(&options, "ipv6_prefix", "true"); -+ sbrec_port_binding_set_options(op->sb, &options); -+ smap_destroy(&options); -+ } -+ - const char *address_mode = smap_get( - &op->nbrp->ipv6_ra_configs, "address_mode"); - -@@ -10941,6 +11025,7 @@ ovnnb_db_run(struct northd_context *ctx, - build_meter_groups(ctx, &meter_groups); - build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups, - &igmp_groups, &meter_groups, &lbs); -+ ovn_update_ipv6_prefix(ports); - - sync_address_sets(ctx); - sync_port_groups(ctx); -diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema -index ea6f4e354..949f6258b 100644 ---- a/ovn-nb.ovsschema -+++ b/ovn-nb.ovsschema -@@ -1,7 +1,7 @@ - { - "name": "OVN_Northbound", -- "version": "5.20.1", -- "cksum": "721375950 25251", -+ "version": "5.22.0", -+ "cksum": "170077561 25417", - "tables": { - "NB_Global": { - "columns": { -@@ -342,6 +342,9 @@ - "ipv6_ra_configs": { - "type": {"key": "string", "value": "string", - "min": 0, "max": "unlimited"}}, -+ "ipv6_prefix": {"type": {"key": "string", -+ "min": 0, -+ "max": "unlimited"}}, - "external_ids": { - "type": {"key": "string", "value": "string", - "min": 0, "max": "unlimited"}}}, -diff --git a/ovn-nb.xml b/ovn-nb.xml -index f7ba9c334..045c63fb0 100644 ---- a/ovn-nb.xml -+++ b/ovn-nb.xml -@@ -2065,6 +2065,11 @@ - port has all ingress and egress traffic dropped. - - -+ -+ This column contains IPv6 prefix obtained by prefix delegation -+ router according to RFC 3633 -+ -+ - -

            - This column defines the IPv6 ND RA address mode and ND MTU Option to be -@@ -2320,6 +2325,22 @@ - ovn-northd honors the configured value. - - -+ -+

            -+ If set to true, enable IPv6 prefix delegation state -+ machine on this logical router port (RFC3633). IPv6 prefix -+ delegation is available just on a gateway router or on a gateway -+ router port. -+

            -+ -+ -+ -+

            -+ If set to true, this interface will receive an IPv6 -+ prefix according to RFC3663 -+

            -+
            - - - -diff --git a/tests/atlocal.in b/tests/atlocal.in -index 5f14c3da0..8f3ff03b9 100644 ---- a/tests/atlocal.in -+++ b/tests/atlocal.in -@@ -157,7 +157,7 @@ find_command() - { - which $1 > /dev/null 2>&1 - status=$? -- var=HAVE_`echo "$1" | tr '[a-z]' '[A-Z]'` -+ var=HAVE_`echo "$1" | tr '-' '_' | tr '[a-z]' '[A-Z]'` - if test "$status" = "0"; then - eval ${var}="yes" - else -@@ -192,6 +192,9 @@ else - DIFF_SUPPORTS_NORMAL_FORMAT=no - fi - -+# Set HAVE_DIBBLER-SERVER -+find_command dibbler-server -+ - # Turn off proxies. - unset http_proxy - unset https_proxy -diff --git a/tests/system-ovn.at b/tests/system-ovn.at -index f1ae69b20..000b3f13b 100644 ---- a/tests/system-ovn.at -+++ b/tests/system-ovn.at -@@ -3793,3 +3793,146 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d - /connection dropped.*/d"]) - - AT_CLEANUP -+ -+AT_SETUP([ovn -- IPv6 prefix delegation]) -+AT_SKIP_IF([test $HAVE_DIBBLER_SERVER = no]) -+AT_SKIP_IF([test $HAVE_TCPDUMP = no]) -+AT_KEYWORDS([ovn-ipv6-prefix_d]) -+ -+ovn_start -+OVS_TRAFFIC_VSWITCHD_START() -+ -+ADD_BR([br-int]) -+ADD_BR([br-ext]) -+ -+ovs-ofctl add-flow br-ext action=normal -+# Set external-ids in br-int needed for ovn-controller -+ovs-vsctl \ -+ -- set Open_vSwitch . external-ids:system-id=hv1 \ -+ -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ -+ -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ -+ -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ -+ -- set bridge br-int fail-mode=secure other-config:disable-in-band=true -+ -+# Start ovn-controller -+start_daemon ovn-controller -+ -+ovn-nbctl lr-add R1 -+ -+ovn-nbctl ls-add sw0 -+ovn-nbctl ls-add sw1 -+ovn-nbctl ls-add public -+ -+ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24 -+ovn-nbctl lrp-add R1 rp-sw1 00:00:03:01:02:03 192.168.2.1/24 -+ovn-nbctl lrp-add R1 rp-public 00:00:02:01:02:03 172.16.1.1/24 \ -+ -- set Logical_Router_Port rp-public options:redirect-chassis=hv1 -+ -+ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \ -+ type=router options:router-port=rp-sw0 \ -+ -- lsp-set-addresses sw0-rp router -+ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \ -+ type=router options:router-port=rp-sw1 \ -+ -- lsp-set-addresses sw1-rp router -+ -+ovn-nbctl lsp-add public public-rp -- set Logical_Switch_Port public-rp \ -+ type=router options:router-port=rp-public \ -+ -- lsp-set-addresses public-rp router -+ -+ADD_NAMESPACES(sw01) -+ADD_VETH(sw01, sw01, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \ -+ "192.168.1.1") -+ovn-nbctl lsp-add sw0 sw01 \ -+ -- lsp-set-addresses sw01 "f0:00:00:01:02:03 192.168.1.2" -+ -+ADD_NAMESPACES(sw11) -+ADD_VETH(sw11, sw11, br-int, "192.168.2.2/24", "f0:00:00:02:02:03", \ -+ "192.168.2.1") -+ovn-nbctl lsp-add sw1 sw11 \ -+ -- lsp-set-addresses sw11 "f0:00:00:02:02:03 192.168.2.2" -+ -+ADD_NAMESPACES(server) -+ADD_VETH(s1, server, br-ext, "2001:1db8:3333::2/64", "f0:00:00:01:02:05", \ -+ "2001:1db8:3333::1") -+ -+OVS_WAIT_UNTIL([test "$(ip netns exec server ip a | grep 2001:1db8:3333::2 | grep tentative)" = ""]) -+OVS_WAIT_UNTIL([test "$(ip netns exec server ip a | grep fe80 | grep tentative)" = ""]) -+ -+AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext]) -+ovn-nbctl lsp-add public public1 \ -+ -- lsp-set-addresses public1 unknown \ -+ -- lsp-set-type public1 localnet \ -+ -- lsp-set-options public1 network_name=phynet -+ -+ovn-nbctl set logical_router_port rp-public options:prefix_delegation=true -+ovn-nbctl set logical_router_port rp-public options:prefix=true -+ovn-nbctl set logical_router_port rp-sw0 options:prefix=true -+ovn-nbctl set logical_router_port rp-sw1 options:prefix=true -+ -+# reset dibbler state -+sed s/^iface.*/"iface \"s1\" {"/g -i /etc/dibbler/server.conf -+sed s/pd-pool.*/"pd-pool 2001:1db8:3333::\/80"/g -i /etc/dibbler/server.conf -+sed s/t1.*/"t1 10"/g -i /etc/dibbler/server.conf -+sed s/t2.*/"t2 15"/g -i /etc/dibbler/server.conf -+cat > /var/lib/dibbler/server-AddrMgr.xml < -+ 1575481348 -+ 0 -+ -+EOF -+cat > /var/lib/dibbler/server-CfgMgr.xml < -+ /var/lib/dibbler -+ Server -+ 8 -+ 0 -+ 0 -+ -+EOF -+ -+NS_CHECK_EXEC([server], [dibbler-server run > dibbler.log &]) -+ovn-nbctl --wait=hv sync -+ -+OVS_WAIT_WHILE([test "$(ovn-nbctl get logical_router_port rp-public ipv6_prefix | cut -c4-15)" = ""]) -+OVS_WAIT_WHILE([test "$(ovn-nbctl get logical_router_port rp-sw0 ipv6_prefix | cut -c4-15)" = ""]) -+OVS_WAIT_WHILE([test "$(ovn-nbctl get logical_router_port rp-sw1 ipv6_prefix | cut -c4-15)" = ""]) -+ -+AT_CHECK([ovn-nbctl get logical_router_port rp-public ipv6_prefix | cut -c3-16], [0], [dnl -+[2001:1db8:3333] -+]) -+AT_CHECK([ovn-nbctl get logical_router_port rp-sw0 ipv6_prefix | cut -c3-16], [0], [dnl -+[2001:1db8:3333] -+]) -+AT_CHECK([ovn-nbctl get logical_router_port rp-sw1 ipv6_prefix | cut -c3-16], [0], [dnl -+[2001:1db8:3333] -+]) -+ -+kill $(pidof dibbler-server) -+ -+prefix=$(ovn-nbctl list logical_router_port rp-public | awk -F/ '/ipv6_prefix/{print substr($1,25,9)}' | sed 's/://g') -+ovn-nbctl set logical_router_port rp-sw0 options:prefix=false -+ovn-nbctl set logical_router_port rp-sw1 options:prefix=false -+ -+NS_CHECK_EXEC([server], [tcpdump -c 1 -nni s1 ip6[[95:4]]=0x${prefix} > public.pcap &]) -+ -+OVS_WAIT_UNTIL([ -+ total_pkts=$(cat public.pcap | wc -l) -+ test "${total_pkts}" = "1" -+]) -+ -+kill $(pidof tcpdump) -+kill $(pidof ovn-controller) -+ -+as ovn-sb -+OVS_APP_EXIT_AND_WAIT([ovsdb-server]) -+ -+as ovn-nb -+OVS_APP_EXIT_AND_WAIT([ovsdb-server]) -+ -+as northd -+OVS_APP_EXIT_AND_WAIT([ovn-northd]) -+ -+as -+OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d -+/.*terminating with signal 15.*/d"]) -+AT_CLEANUP --- -2.25.2 - diff --git a/SOURCES/0004-northd-manage-ARP-request-locally-for-FIP-traffic.patch b/SOURCES/0004-northd-manage-ARP-request-locally-for-FIP-traffic.patch deleted file mode 100644 index de2a192..0000000 --- a/SOURCES/0004-northd-manage-ARP-request-locally-for-FIP-traffic.patch +++ /dev/null @@ -1,187 +0,0 @@ -From 92f6a2f668708c677a8b10b0ac861bfd712f6a20 Mon Sep 17 00:00:00 2001 -Message-Id: <92f6a2f668708c677a8b10b0ac861bfd712f6a20.1590585469.git.lorenzo.bianconi@redhat.com> -In-Reply-To: -References: -From: Lorenzo Bianconi -Date: Mon, 25 May 2020 18:31:27 +0200 -Subject: [PATCH ovn 3/3] northd: manage ARP request locally for FIP traffic - -Modify 100-priority logical flows in Gateway Redirect table of -logical router ingress pipeline (table 15) in order to manage ARP -request locally for FIP traffic. In particular set reg1 and eth.src -to NAT external ip and NAT external mac respectively and do not -distribute ARP traffic using FIP - -Signed-off-by: Lorenzo Bianconi ---- - northd/ovn-northd.8.xml | 10 +++++++--- - northd/ovn-northd.c | 23 ++++++++++++++++------- - tests/ovn.at | 28 +++++++++++++++++++++++++--- - tests/system-ovn.at | 30 ++++++++++++++++++++++++++++++ - 4 files changed, 78 insertions(+), 13 deletions(-) - ---- a/northd/ovn-northd.8.xml -+++ b/northd/ovn-northd.8.xml -@@ -2879,9 +2879,13 @@ icmp4 { - For each NAT rule in the OVN Northbound database that can - be handled in a distributed manner, a priority-100 logical - flow with match ip4.src == B && -- outport == GW, where GW is -- the logical router distributed gateway port, with actions -- next;. -+ outport == GW && -+ is_chassis_resident(P), where GW is -+ the logical router distributed gateway port and P -+ is the NAT logical port. IP traffic matching the above rule -+ will be managed locally setting reg1 to C -+ and eth.src to D, where C is NAT -+ external ip and D is NAT external mac. -
          • - -
          • ---- a/northd/ovn-northd.c -+++ b/northd/ovn-northd.c -@@ -9137,16 +9137,25 @@ build_lrouter_flows(struct hmap *datapat - /* Ingress Gateway Redirect Table: For NAT on a distributed - * router, add flows that are specific to a NAT rule. These - * flows indicate the presence of an applicable NAT rule that -- * can be applied in a distributed manner. */ -+ * can be applied in a distributed manner. -+ * In particulr reg1 and eth.src are set to NAT external IP and -+ * NAT external mac so the ARP request generated in the following -+ * stage is sent out with proper IP/MAC src addresses -+ */ - if (distributed) { - ds_clear(&match); -- ds_put_format(&match, "ip%s.src == %s && outport == %s", -- is_v6 ? "6" : "4", -- nat->logical_ip, -- od->l3dgw_port->json_key); -+ ds_clear(&actions); -+ ds_put_format(&match, -+ "ip%s.src == %s && outport == %s && " -+ "is_chassis_resident(\"%s\")", -+ is_v6 ? "6" : "4", nat->logical_ip, -+ od->l3dgw_port->json_key, nat->logical_port); -+ ds_put_format(&actions, "eth.src = %s; %sreg1 = %s; next;", -+ nat->external_mac, is_v6 ? "xx" : "", -+ nat->external_ip); - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, -- 100, ds_cstr(&match), "next;", -- &nat->header_); -+ 100, ds_cstr(&match), -+ ds_cstr(&actions), &nat->header_); - } - - /* Egress Loopback table: For NAT on a distributed router. ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -14353,9 +14353,14 @@ ovs-vsctl -- add-port br-int hv2-vif1 -- - set interface hv2-vif1 external-ids:iface-id=sw1-p0 \ - options:tx_pcap=hv2/vif1-tx.pcap \ - options:rxq_pcap=hv2/vif1-rx.pcap \ -- ofport-request=1 -+ ofport-request=2 -+ovs-vsctl -- add-port br-int hv2-vif2 -- \ -+ set interface hv2-vif2 external-ids:iface-id=sw0-p1 \ -+ options:tx_pcap=hv2/vif2-tx.pcap \ -+ options:rxq_pcap=hv2/vif2-rx.pcap \ -+ ofport-request=3 - --ovn-nbctl create Logical_Router name=lr0 options:chassis=hv1 -+ovn-nbctl create Logical_Router name=lr0 - ovn-nbctl ls-add sw0 - ovn-nbctl ls-add sw1 - -@@ -14364,13 +14369,16 @@ ovn-nbctl lsp-add sw0 rp-sw0 -- set Logi - type=router options:router-port=sw0 \ - -- lsp-set-addresses rp-sw0 router - --ovn-nbctl lrp-add lr0 sw1 00:00:02:01:02:03 172.16.1.1/24 2002:0:0:0:0:0:0:1/64 -+ovn-nbctl lrp-add lr0 sw1 00:00:02:01:02:03 172.16.1.1/24 2002:0:0:0:0:0:0:1/64 \ -+ -- set Logical_Router_Port sw1 options:redirect-chassis="hv2" - ovn-nbctl lsp-add sw1 rp-sw1 -- set Logical_Switch_Port rp-sw1 \ - type=router options:router-port=sw1 \ - -- lsp-set-addresses rp-sw1 router - - ovn-nbctl lsp-add sw0 sw0-p0 \ - -- lsp-set-addresses sw0-p0 "f0:00:00:01:02:03 192.168.1.2 2001::2" -+ovn-nbctl lsp-add sw0 sw0-p1 \ -+ -- lsp-set-addresses sw0-p1 "f0:00:00:11:02:03 192.168.1.3 2001::3" - - ovn-nbctl lsp-add sw1 sw1-p0 \ - -- lsp-set-addresses sw1-p0 unknown -@@ -14416,6 +14424,20 @@ send_na 2 1 $dst_mac $router_mac1 $dst_i - - OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected]) - -+# Create FIP on sw0-p0, add a route on logical router pipeline and -+# ARP request for a unkwon destination is sent using FIP MAC/IP -+ovn-nbctl lr-nat-add lr0 dnat_and_snat 172.16.1.2 192.168.1.3 sw0-p1 f0:00:00:01:02:04 -+ovn-nbctl lr-route-add lr0 172.16.2.0/24 172.16.1.11 -+ -+dst_ip=$(ip_to_hex 172 16 2 10) -+fip_ip=$(ip_to_hex 172 16 1 2) -+src_ip=$(ip_to_hex 192 168 1 3) -+gw_router=$(ip_to_hex 172 16 1 11) -+send_icmp_packet 2 2 f00000110203 $router_mac0 $src_ip $dst_ip 0000 $data -+echo $(get_arp_req f00000010204 $fip_ip $gw_router) >> expected -+ -+OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected]) -+ - OVN_CLEANUP([hv1],[hv2]) - AT_CLEANUP - ---- a/tests/system-ovn.at -+++ b/tests/system-ovn.at -@@ -2747,6 +2747,19 @@ ADD_VETH(alice1, alice1, br-int, "172.16 - ovn-nbctl lsp-add alice alice1 \ - -- lsp-set-addresses alice1 "f0:00:00:01:02:05 172.16.1.2" - -+# Add external network -+ADD_NAMESPACES(ext-net) -+AT_CHECK([ip link add alice-ext netns alice1 type veth peer name ext-veth netns ext-net]) -+NS_CHECK_EXEC([ext-net], [ip link set dev ext-veth up], [0], []) -+NS_CHECK_EXEC([ext-net], [ip addr add 10.0.0.1/24 dev ext-veth], [0], []) -+NS_CHECK_EXEC([ext-net], [ip route add default via 10.0.0.2], [0], []) -+ -+NS_CHECK_EXEC([alice1], [ip link set dev alice-ext up], [0], []) -+NS_CHECK_EXEC([alice1], [ip addr add 10.0.0.2/24 dev alice-ext], [0], []) -+NS_CHECK_EXEC([alice1], [sysctl -w net.ipv4.conf.all.forwarding=1],[0], [dnl -+net.ipv4.conf.all.forwarding = 1 -+]) -+ - # Add DNAT rules - AT_CHECK([ovn-nbctl lr-nat-add R1 dnat_and_snat 172.16.1.3 192.168.1.2 foo1 00:00:02:02:03:04]) - AT_CHECK([ovn-nbctl lr-nat-add R1 dnat_and_snat 172.16.1.4 192.168.1.3 foo2 00:00:02:02:03:05]) -@@ -2754,6 +2767,9 @@ AT_CHECK([ovn-nbctl lr-nat-add R1 dnat_a - # Add a SNAT rule - AT_CHECK([ovn-nbctl lr-nat-add R1 snat 172.16.1.1 192.168.0.0/16]) - -+# Add default route to ext-net -+AT_CHECK([ovn-nbctl lr-route-add R1 10.0.0.0/24 172.16.1.2]) -+ - ovn-nbctl --wait=hv sync - OVS_WAIT_UNTIL([ovs-ofctl dump-flows br-int | grep 'nat(src=172.16.1.1)']) - -@@ -2797,6 +2813,20 @@ sed -e 's/zone=[[0-9]]*/zone=/' - icmp,orig=(src=192.168.2.2,dst=172.16.1.2,id=,type=8,code=0),reply=(src=172.16.1.2,dst=172.16.1.1,id=,type=0,code=0),zone= - ]) - -+# Try to ping external network -+NS_CHECK_EXEC([ext-net], [tcpdump -n -c 3 -i ext-veth dst 172.16.1.3 and icmp > ext-net.pcap &]) -+sleep 1 -+AT_CHECK([ovn-nbctl lr-nat-del R1 snat]) -+NS_CHECK_EXEC([foo1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.1 | FORMAT_PING], \ -+[0], [dnl -+3 packets transmitted, 3 received, 0% packet loss, time 0ms -+]) -+ -+OVS_WAIT_UNTIL([ -+ total_pkts=$(cat ext-net.pcap | wc -l) -+ test "${total_pkts}" = "3" -+]) -+ - OVS_APP_EXIT_AND_WAIT([ovn-controller]) - - as ovn-sb diff --git a/SOURCES/0004-ovn-nbctl-Enhance-lr-policy-add-to-set-the-options.patch b/SOURCES/0004-ovn-nbctl-Enhance-lr-policy-add-to-set-the-options.patch new file mode 100644 index 0000000..379c3df --- /dev/null +++ b/SOURCES/0004-ovn-nbctl-Enhance-lr-policy-add-to-set-the-options.patch @@ -0,0 +1,254 @@ +From 025fec42cebe9efc7c6a2d94816b173748e7e4f6 Mon Sep 17 00:00:00 2001 +From: Numan Siddique +Date: Tue, 23 Jun 2020 17:07:04 +0530 +Subject: [PATCH 04/22] ovn-nbctl: Enhance lr-policy-add to set the options. + +The commit [1] added a new column - 'options' to Logical_Router_Policy NB DB +table. This patch enhances the lr-policy-add command to set the options +as key=value pairs. + +For nbctl_lr_policy_add(), this patch now returns after ctl_error() as there is no +point continuing further and the comments in the ctl_error() implementation says so. + +[1] - a123ef0fb8fd("Support packet metadata marking for logical router policies.") + +Acked-by: Mark Michelson +Signed-off-by: Numan Siddique + +(cherry-picked from upstream master commit 742474bad730fbdc9705b4c2784a2b4acca327cf) + +Change-Id: I64c786ff4c5244b643a57bff270a14d85d5204f1 +--- + tests/ovn-nbctl.at | 15 +++++++++--- + tests/ovn.at | 8 ++----- + utilities/ovn-nbctl.8.xml | 11 ++++++++- + utilities/ovn-nbctl.c | 48 ++++++++++++++++++++++++++++++++++----- + 4 files changed, 66 insertions(+), 16 deletions(-) + +diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at +index dc9d9d76a..6d6608729 100644 +--- a/tests/ovn-nbctl.at ++++ b/tests/ovn-nbctl.at +@@ -1590,11 +1590,20 @@ AT_CHECK([ovn-nbctl lr-add lr0]) + + dnl Add policies with allow and drop actions + AT_CHECK([ovn-nbctl lr-policy-add lr0 100 "ip4.src == 1.1.1.0/24" drop]) +-AT_CHECK([ovn-nbctl lr-policy-add lr0 100 "ip4.src == 1.1.2.0/24" allow]) ++AT_CHECK([ovn-nbctl lr-policy-add lr0 100 "ip4.src == 1.1.2.0/24" allow pkt_mark=100,foo=bar]) + AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip4.src == 2.1.1.0/24" allow]) + AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip4.src == 2.1.2.0/24" drop]) + AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip6.src == 2002::/64" drop]) + ++dnl Incomplete option set. ++AT_CHECK([ovn-nbctl lr-policy-add lr0 200 "ip4.src == 1.1.4.0/24" reroute 192.168.0.10 foo], [1], [], ++ [ovn-nbctl: No value specified for the option : foo ++]) ++ ++AT_CHECK([ovn-nbctl lr-policy-add lr0 200 "ip4.src == 1.1.4.0/24" allow bar=], [1], [], ++ [ovn-nbctl: No value specified for the option : bar ++]) ++ + dnl Add duplicated policy + AT_CHECK([ovn-nbctl lr-policy-add lr0 100 "ip4.src == 1.1.1.0/24" drop], [1], [], + [ovn-nbctl: Same routing policy already existed on the logical router lr0. +@@ -1612,14 +1621,14 @@ Routing Policies + 101 ip4.src == 2.1.1.0/24 allow + 101 ip4.src == 2.1.2.0/24 drop + 101 ip6.src == 2002::/64 drop +- 100 ip4.src == 1.1.2.0/24 allow ++ 100 ip4.src == 1.1.2.0/24 allow pkt_mark=100,foo=bar + ]) + + dnl Delete all policies for given priority + AT_CHECK([ovn-nbctl lr-policy-del lr0 101]) + AT_CHECK([ovn-nbctl lr-policy-list lr0], [0], [dnl + Routing Policies +- 100 ip4.src == 1.1.2.0/24 allow ++ 100 ip4.src == 1.1.2.0/24 allow pkt_mark=100,foo=bar + ]) + + +diff --git a/tests/ovn.at b/tests/ovn.at +index 8ce45823f..b84cf75fd 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -20286,20 +20286,16 @@ static_routes @lrt + ovn-nbctl --wait=hv sync + + # Add logical router policy and set pkt_mark on it. +-ovn-nbctl lr-policy-add lr0 2000 "ip4.src == 10.0.0.3" allow ++ovn-nbctl lr-policy-add lr0 2000 "ip4.src == 10.0.0.3" allow pkt_mark=100 + ovn-nbctl lr-policy-add lr0 1000 "ip4.src == 10.0.0.4" allow +-ovn-nbctl lr-policy-add lr0 900 "ip4.src == 10.0.0.5" reroute 172.168.0.200 ++ovn-nbctl lr-policy-add lr0 900 "ip4.src == 10.0.0.5" reroute 172.168.0.200 pkt_mark=3 + ovn-nbctl lr-policy-add lr0 2001 "ip6.dst == bef0::5" reroute bef0::6 + ovn-nbctl lr-policy-add lr0 1001 "ip6" allow + +- + pol1=$(ovn-nbctl --bare --columns _uuid find logical_router_policy priority=2000) +-pol3=$(ovn-nbctl --bare --columns _uuid find logical_router_policy priority=900) + pol4=$(ovn-nbctl --bare --columns _uuid find logical_router_policy priority=2001) + pol5=$(ovn-nbctl --bare --columns _uuid find logical_router_policy priority=1001) + +-ovn-nbctl set logical_router_policy $pol1 options:pkt_mark=100 +-ovn-nbctl set logical_router_policy $pol3 options:pkt_mark=3 + ovn-nbctl set logical_router_policy $pol4 options:pkt_mark=4 + ovn-nbctl set logical_router_policy $pol5 options:pkt_mark=5 + ovn-nbctl --wait=hv sync +diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml +index d265c7fcc..de86b70e6 100644 +--- a/utilities/ovn-nbctl.8.xml ++++ b/utilities/ovn-nbctl.8.xml +@@ -721,7 +721,8 @@ + +
            +
            lr-policy-add router priority +- match action [nexthop]
            ++ match action [nexthop] ++ [options key=value]] +
            +

            + Add Policy to router which provides a way to configure +@@ -732,6 +733,8 @@ + only when action is reroute. A policy is + uniquely identified by priority and match. + Multiple policies can have the same priority. ++ options sets the router policy options as key-value pair. ++ The supported option is : pkt_mark. +

            + +

            +@@ -743,6 +746,12 @@ + lr-policy-add lr1 100 ip4.src == 192.168.100.0/24 drop. +

            + ++

            ++ ++ lr-policy-add lr1 100 ip4.src == 192.168.100.0/24 allow ++ pkt_mark=100 ++ . ++

            +
            + +
            lr-policy-del router [{priority | uuid} +diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c +index 159a44960..7578b9928 100644 +--- a/utilities/ovn-nbctl.c ++++ b/utilities/ovn-nbctl.c +@@ -694,7 +694,8 @@ Route commands:\n\ + lr-route-list ROUTER print routes for ROUTER\n\ + \n\ + Policy commands:\n\ +- lr-policy-add ROUTER PRIORITY MATCH ACTION [NEXTHOP]\n\ ++ lr-policy-add ROUTER PRIORITY MATCH ACTION [NEXTHOP] \ ++[OPTIONS KEY=VALUE ...] \n\ + add a policy to router\n\ + lr-policy-del ROUTER [{PRIORITY | UUID} [MATCH]]\n\ + remove policies from ROUTER\n\ +@@ -3609,16 +3610,19 @@ nbctl_lr_policy_add(struct ctl_context *ctx) + const char *action = ctx->argv[4]; + char *next_hop = NULL; + ++ bool reroute = false; + /* Validate action. */ + if (strcmp(action, "allow") && strcmp(action, "drop") + && strcmp(action, "reroute")) { + ctl_error(ctx, "%s: action must be one of \"allow\", \"drop\", " + "and \"reroute\"", action); ++ return; + } + if (!strcmp(action, "reroute")) { + if (ctx->argc < 6) { + ctl_error(ctx, "Nexthop is required when action is reroute."); + } ++ reroute = true; + } + + /* Check if same routing policy already exists. +@@ -3629,12 +3633,14 @@ nbctl_lr_policy_add(struct ctl_context *ctx) + !strcmp(policy->match, ctx->argv[3])) { + ctl_error(ctx, "Same routing policy already existed on the " + "logical router %s.", ctx->argv[1]); ++ return; + } + } +- if (ctx->argc == 6) { ++ if (reroute) { + next_hop = normalize_prefix_str(ctx->argv[5]); + if (!next_hop) { + ctl_error(ctx, "bad next hop argument: %s", ctx->argv[5]); ++ return; + } + } + +@@ -3643,9 +3649,28 @@ nbctl_lr_policy_add(struct ctl_context *ctx) + nbrec_logical_router_policy_set_priority(policy, priority); + nbrec_logical_router_policy_set_match(policy, ctx->argv[3]); + nbrec_logical_router_policy_set_action(policy, action); +- if (ctx->argc == 6) { ++ if (reroute) { + nbrec_logical_router_policy_set_nexthop(policy, next_hop); + } ++ ++ /* Parse the options. */ ++ struct smap options = SMAP_INITIALIZER(&options); ++ for (size_t i = reroute ? 6 : 5; i < ctx->argc; i++) { ++ char *key, *value; ++ value = xstrdup(ctx->argv[i]); ++ key = strsep(&value, "="); ++ if (value && value[0]) { ++ smap_add(&options, key, value); ++ } else { ++ ctl_error(ctx, "No value specified for the option : %s", key); ++ free(key); ++ return; ++ } ++ free(key); ++ } ++ nbrec_logical_router_policy_set_options(policy, &options); ++ smap_destroy(&options); ++ + nbrec_logical_router_verify_policies(lr); + struct nbrec_logical_router_policy **new_policies + = xmalloc(sizeof *new_policies * (lr->n_policies + 1)); +@@ -3773,6 +3798,16 @@ print_routing_policy(const struct nbrec_logical_router_policy *policy, + ds_put_format(s, "%10"PRId64" %50s %15s", policy->priority, + policy->match, policy->action); + } ++ ++ if (!smap_is_empty(&policy->options)) { ++ ds_put_format(s, "%15s", ""); ++ struct smap_node *node; ++ SMAP_FOR_EACH (node, &policy->options) { ++ ds_put_format(s, "%s=%s,", node->key, node->value); ++ } ++ ds_chomp(s, ','); ++ } ++ + ds_put_char(s, '\n'); + } + +@@ -3788,7 +3823,7 @@ nbctl_lr_policy_list(struct ctl_context *ctx) + return; + } + policies = xmalloc(sizeof *policies * lr->n_policies); +- for (int i = 0; i < lr->n_policies; i++) { ++ for (int i = 0; i < lr->n_policies; i++) { + const struct nbrec_logical_router_policy *policy + = lr->policies[i]; + policies[n_policies].priority = policy->priority; +@@ -6362,8 +6397,9 @@ static const struct ctl_command_syntax nbctl_commands[] = { + "", RO }, + + /* Policy commands */ +- { "lr-policy-add", 4, 5, "ROUTER PRIORITY MATCH ACTION [NEXTHOP]", NULL, +- nbctl_lr_policy_add, NULL, "", RW }, ++ { "lr-policy-add", 4, INT_MAX, ++ "ROUTER PRIORITY MATCH ACTION [NEXTHOP] [OPTIONS - KEY=VALUE ...]", ++ NULL, nbctl_lr_policy_add, NULL, "", RW }, + { "lr-policy-del", 1, 3, "ROUTER [{PRIORITY | UUID} [MATCH]]", NULL, + nbctl_lr_policy_del, NULL, "", RW }, + { "lr-policy-list", 1, 1, "ROUTER", NULL, nbctl_lr_policy_list, NULL, +-- +2.26.2 + diff --git a/SOURCES/0004-tests-Fix-occasional-failures-for-test-85.patch b/SOURCES/0004-tests-Fix-occasional-failures-for-test-85.patch deleted file mode 100644 index eaaf3c0..0000000 --- a/SOURCES/0004-tests-Fix-occasional-failures-for-test-85.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 9735eeff2b736145fe7ab444eb77686a595a9c8e Mon Sep 17 00:00:00 2001 -From: Numan Siddique -Date: Fri, 17 Apr 2020 20:09:24 +0530 -Subject: [PATCH 4/4] tests: Fix occasional failures for test 85. - -The test case "85: ovn -- send gratuitous ARP for NAT rules on HA distributed router" -fails occaionally. On faster systems, chances of failure are higher. - -This patch fixes this. - -Tested-by: Flavio Fernandes -Signed-off-by: Numan Siddique ---- - tests/ovn.at | 21 +++++++++++++++++++-- - 1 file changed, 19 insertions(+), 2 deletions(-) - -diff --git a/tests/ovn.at b/tests/ovn.at -index ae3b44cb3..91d0a8fec 100644 ---- a/tests/ovn.at -+++ b/tests/ovn.at -@@ -11062,6 +11062,12 @@ only_broadcast_from_lrp1() { - garp="fffffffffffff0000000000108060001080006040001f00000000001c0a80064000000000000c0a80064" - echo $garp > expout - -+OVS_WAIT_UNTIL( -+ [$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/snoopvif-tx.pcap > rcv_text -+ exp_rcvd=$(cat rcv_text | grep $garp | wc -l) -+ echo "expected received = $exp_rcvd" -+ test $exp_rcvd -ge 1]) -+ - $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/snoopvif-tx.pcap | trim_zeros | only_broadcast_from_lrp1 | uniq > hv1_snoop_tx - echo "packets on hv1-snoopvif:" - cat hv1_snoop_tx -@@ -11090,12 +11096,17 @@ as hv1 reset_pcap_file snoopvif hv1/snoopvif - as hv2 reset_pcap_file br-phys_n1 hv2/br-phys_n1 - as hv3 reset_pcap_file br-phys_n1 hv3/br-phys_n1 - --# Wait for packets to be received. --OVS_WAIT_UNTIL([test `wc -c < "hv1/snoopvif-tx.pcap"` -ge 100]) - trim_zeros() { - sed 's/\(00\)\{1,\}$//' - } - -+# Wait for packets to be received. -+OVS_WAIT_UNTIL( -+ [$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/snoopvif-tx.pcap > rcv_text -+ exp_rcvd=$(cat rcv_text | grep $garp | wc -l) -+ echo "expected received = $exp_rcvd" -+ test $exp_rcvd -ge 1]) -+ - $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/snoopvif-tx.pcap | trim_zeros | only_broadcast_from_lrp1 | uniq > hv1_snoopvif_tx - AT_CHECK([sort hv1_snoopvif_tx], [0], [expout]) - $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv3/br-phys_n1-tx.pcap | trim_zeros | only_broadcast_from_lrp1 | uniq > hv3_br_phys_tx -@@ -11141,6 +11152,12 @@ trim_zeros() { - garp="fffffffffffff00000000001810007de08060001080006040001f00000000001c0a80064000000000000c0a80064" - echo $garp > expout - -+OVS_WAIT_UNTIL( -+ [$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/snoopvif-tx.pcap > rcv_text -+ exp_rcvd=$(cat rcv_text | grep $garp | wc -l) -+ echo "expected received = $exp_rcvd" -+ test $exp_rcvd -ge 1]) -+ - $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/snoopvif-tx.pcap | trim_zeros | only_broadcast_from_lrp1 | uniq > hv1_snoopvif_tx - AT_CHECK([sort hv1_snoopvif_tx], [0], [expout]) - $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv3/br-phys_n1-tx.pcap | trim_zeros | only_broadcast_from_lrp1 | uniq > hv3_br_phys_tx --- -2.26.2 - diff --git a/SOURCES/0005-pinctrl-Support-DHCPRELEASE-and-DHCPINFORM-in-native.patch b/SOURCES/0005-pinctrl-Support-DHCPRELEASE-and-DHCPINFORM-in-native.patch new file mode 100644 index 0000000..b4d6b18 --- /dev/null +++ b/SOURCES/0005-pinctrl-Support-DHCPRELEASE-and-DHCPINFORM-in-native.patch @@ -0,0 +1,438 @@ +From d41a77411d154b9bdfab1a7c78deb63c7c5b990b Mon Sep 17 00:00:00 2001 +From: Numan Siddique +Date: Fri, 29 May 2020 23:29:37 +0530 +Subject: [PATCH 05/22] pinctrl: Support DHCPRELEASE and DHCPINFORM in native + OVN dhcp responder. + +Right now we ignore these dhcp packets. This patch adds the support +as per RFC 2131. + +Acked-by: Lorenzo Bianconi +Acked-by: Mark Michelson +Signed-off-by: Numan Siddique + +(cherry-picked from upstream master commit e008a4d46020a778b8f1f85b9dfd7c9e9b6fde21) + +Change-Id: I1363f99d241f661957147a7c124ac5b1c093ce00 +--- + NEWS | 2 + + controller/pinctrl.c | 125 ++++++++++++++++++++++++++++++++++--------- + lib/ovn-l7.h | 12 +++++ + tests/ovn.at | 125 ++++++++++++++++++++++++++++++++++++++++--- + 4 files changed, 233 insertions(+), 31 deletions(-) + +diff --git a/NEWS b/NEWS +index 8abdc95b9..2ed3d480a 100644 +--- a/NEWS ++++ b/NEWS +@@ -9,6 +9,8 @@ OVN v20.06.1 - 08 Jul 2020 + -------------------------- + - Added packet marking support for traffic routed with + a routing policy. ++ - Added DHCPINFORM and DHCPRELEASE support in native ++ OVN DHCPv4 responder. + + OVN v20.06.0 - 08 Jun 2020 + -------------------------- +diff --git a/controller/pinctrl.c b/controller/pinctrl.c +index dab1782f6..9231efbe3 100644 +--- a/controller/pinctrl.c ++++ b/controller/pinctrl.c +@@ -1682,11 +1682,13 @@ static void + pinctrl_handle_put_dhcp_opts( + struct rconn *swconn, + struct dp_packet *pkt_in, struct ofputil_packet_in *pin, +- struct ofpbuf *userdata, struct ofpbuf *continuation) ++ struct flow *in_flow, struct ofpbuf *userdata, ++ struct ofpbuf *continuation) + { + enum ofp_version version = rconn_get_version(swconn); + enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version); + struct dp_packet *pkt_out_ptr = NULL; ++ struct ofpbuf *dhcp_inform_reply_buf = NULL; + uint32_t success = 0; + + /* Parse result field. */ +@@ -1810,22 +1812,15 @@ pinctrl_handle_put_dhcp_opts( + VLOG_WARN_RL(&rl, "Missing DHCP message type"); + goto exit; + } +- if (*in_dhcp_msg_type != DHCP_MSG_DISCOVER && +- *in_dhcp_msg_type != DHCP_MSG_REQUEST) { +- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); +- VLOG_WARN_RL(&rl, "Invalid DHCP message type: %d", *in_dhcp_msg_type); +- goto exit; +- } + +- uint8_t msg_type; +- if (*in_dhcp_msg_type == DHCP_MSG_DISCOVER) { ++ struct ofpbuf *reply_dhcp_opts_ptr = userdata; ++ uint8_t msg_type = 0; ++ ++ switch (*in_dhcp_msg_type) { ++ case DHCP_MSG_DISCOVER: + msg_type = DHCP_MSG_OFFER; +- } else { +- /* This is a DHCPREQUEST. If the client has requested an IP that +- * does not match the offered IP address, reply with a NAK. The +- * requested IP address may be supplied either via Requested IP Address +- * (opt 50) or via ciaddr, depending on the client's state. +- */ ++ break; ++ case DHCP_MSG_REQUEST: { + msg_type = DHCP_MSG_ACK; + if (request_ip != *offer_ip) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); +@@ -1834,12 +1829,81 @@ pinctrl_handle_put_dhcp_opts( + IP_ARGS(*offer_ip)); + msg_type = DHCP_MSG_NAK; + } ++ break; ++ } ++ case OVN_DHCP_MSG_RELEASE: { ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(20, 40); ++ const struct eth_header *l2 = dp_packet_eth(pkt_in); ++ VLOG_INFO_RL(&rl, "DHCPRELEASE "ETH_ADDR_FMT " "IP_FMT"", ++ ETH_ADDR_ARGS(l2->eth_src), ++ IP_ARGS(in_dhcp_data->ciaddr)); ++ break; ++ } ++ case OVN_DHCP_MSG_INFORM: { ++ /* RFC 2131 section 3.4. ++ * Remove all the offer ip related dhcp options and ++ * all the time related dhcp options. ++ * Loop through the dhcp option defined in the userdata buffer ++ * and copy all the options into dhcp_inform_reply_buf skipping ++ * the not required ones. ++ * */ ++ msg_type = DHCP_MSG_ACK; ++ in_dhcp_ptr = userdata->data; ++ end = (const char *)userdata->data + userdata->size; ++ ++ /* The buf size cannot be greater > userdata->size. */ ++ dhcp_inform_reply_buf = ofpbuf_new(userdata->size); ++ ++ reply_dhcp_opts_ptr = dhcp_inform_reply_buf; ++ while (in_dhcp_ptr < end) { ++ const struct dhcp_opt_header *in_dhcp_opt = ++ (const struct dhcp_opt_header *)in_dhcp_ptr; ++ ++ switch (in_dhcp_opt->code) { ++ case OVN_DHCP_OPT_CODE_NETMASK: ++ case OVN_DHCP_OPT_CODE_LEASE_TIME: ++ case OVN_DHCP_OPT_CODE_T1: ++ case OVN_DHCP_OPT_CODE_T2: ++ break; ++ default: ++ /* Copy the dhcp option to reply_dhcp_opts_ptr. */ ++ ofpbuf_put(reply_dhcp_opts_ptr, in_dhcp_opt, ++ in_dhcp_opt->len + sizeof *in_dhcp_opt); ++ break; ++ } ++ ++ in_dhcp_ptr += sizeof *in_dhcp_opt; ++ if (in_dhcp_ptr > end) { ++ break; ++ } ++ in_dhcp_ptr += in_dhcp_opt->len; ++ if (in_dhcp_ptr > end) { ++ break; ++ } ++ } ++ ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(20, 40); ++ VLOG_INFO_RL(&rl, "DHCPINFORM from "ETH_ADDR_FMT " "IP_FMT"", ++ ETH_ADDR_ARGS(in_flow->dl_src), ++ IP_ARGS(in_flow->nw_src)); ++ ++ break; ++ } ++ default: { ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); ++ VLOG_WARN_RL(&rl, "Invalid DHCP message type: %d", *in_dhcp_msg_type); ++ goto exit; ++ } ++ } ++ ++ if (!msg_type) { ++ goto exit; + } + + /* Frame the DHCP reply packet +- * Total DHCP options length will be options stored in the userdata + +- * 16 bytes. Note that the DHCP options stored in userdata are not included +- * in DHCPNAK messages. ++ * Total DHCP options length will be options stored in the ++ * reply_dhcp_opts_ptr + 16 bytes. Note that the DHCP options stored in ++ * reply_dhcp_opts_ptr are not included in DHCPNAK messages. + * + * -------------------------------------------------------------- + *| 4 Bytes (dhcp cookie) | 3 Bytes (option type) | DHCP options | +@@ -1849,7 +1913,7 @@ pinctrl_handle_put_dhcp_opts( + */ + uint16_t new_l4_size = UDP_HEADER_LEN + DHCP_HEADER_LEN + 16; + if (msg_type != DHCP_MSG_NAK) { +- new_l4_size += userdata->size; ++ new_l4_size += reply_dhcp_opts_ptr->size; + } + size_t new_packet_size = pkt_in->l4_ofs + new_l4_size; + +@@ -1874,12 +1938,18 @@ pinctrl_handle_put_dhcp_opts( + struct dhcp_header *dhcp_data = dp_packet_put( + &pkt_out, dp_packet_pull(pkt_in, DHCP_HEADER_LEN), DHCP_HEADER_LEN); + dhcp_data->op = DHCP_OP_REPLY; +- dhcp_data->yiaddr = (msg_type == DHCP_MSG_NAK) ? 0 : *offer_ip; ++ ++ if (*in_dhcp_msg_type != OVN_DHCP_MSG_INFORM) { ++ dhcp_data->yiaddr = (msg_type == DHCP_MSG_NAK) ? 0 : *offer_ip; ++ } else { ++ dhcp_data->yiaddr = 0; ++ } ++ + dp_packet_put(&pkt_out, &magic_cookie, sizeof(ovs_be32)); + + uint16_t out_dhcp_opts_size = 12; + if (msg_type != DHCP_MSG_NAK) { +- out_dhcp_opts_size += userdata->size; ++ out_dhcp_opts_size += reply_dhcp_opts_ptr->size; + } + uint8_t *out_dhcp_opts = dp_packet_put_zeros(&pkt_out, + out_dhcp_opts_size); +@@ -1890,8 +1960,9 @@ pinctrl_handle_put_dhcp_opts( + out_dhcp_opts += 3; + + if (msg_type != DHCP_MSG_NAK) { +- memcpy(out_dhcp_opts, userdata->data, userdata->size); +- out_dhcp_opts += userdata->size; ++ memcpy(out_dhcp_opts, reply_dhcp_opts_ptr->data, ++ reply_dhcp_opts_ptr->size); ++ out_dhcp_opts += reply_dhcp_opts_ptr->size; + } + + /* Padding */ +@@ -1939,6 +2010,10 @@ exit: + if (pkt_out_ptr) { + dp_packet_uninit(pkt_out_ptr); + } ++ ++ if (dhcp_inform_reply_buf) { ++ ofpbuf_delete(dhcp_inform_reply_buf); ++ } + } + + static bool +@@ -2644,8 +2719,8 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg) + break; + + case ACTION_OPCODE_PUT_DHCP_OPTS: +- pinctrl_handle_put_dhcp_opts(swconn, &packet, &pin, &userdata, +- &continuation); ++ pinctrl_handle_put_dhcp_opts(swconn, &packet, &pin, &headers, ++ &userdata, &continuation); + break; + + case ACTION_OPCODE_ND_NA: +diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h +index cea97b9ce..9acfbe075 100644 +--- a/lib/ovn-l7.h ++++ b/lib/ovn-l7.h +@@ -37,6 +37,14 @@ struct gen_opts_map { + #define DOMAIN_NAME_MAX_LEN 255 + #define DHCP_BROADCAST_FLAG 0x8000 + ++/* These are not defined in ovs/lib/dhcp.h and hence defined here with ++ * OVN_DHCP_OPT_CODE_. ++ */ ++#define OVN_DHCP_OPT_CODE_NETMASK 1 ++#define OVN_DHCP_OPT_CODE_LEASE_TIME 51 ++#define OVN_DHCP_OPT_CODE_T1 58 ++#define OVN_DHCP_OPT_CODE_T2 59 ++ + #define DHCP_OPTION(NAME, CODE, TYPE) \ + {.name = NAME, .code = CODE, .type = TYPE} + +@@ -171,6 +179,10 @@ struct dhcp_opt6_header { + ovs_be16 size; + }; + ++/* These are not defined in ovs/lib/dhcp.h, hence defining here. */ ++#define OVN_DHCP_MSG_RELEASE 7 ++#define OVN_DHCP_MSG_INFORM 8 ++ + /* Supported DHCPv6 Message Types */ + #define DHCPV6_MSG_TYPE_SOLICIT 1 + #define DHCPV6_MSG_TYPE_ADVT 2 +diff --git a/tests/ovn.at b/tests/ovn.at +index b84cf75fd..6866a58da 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -5330,6 +5330,12 @@ test_dhcp() { + done + if test $offer_ip != 0; then + local srv_mac=$1 srv_ip=$2 dhcp_reply_type=$3 expected_dhcp_opts=$4 ++ local offered_ip=$offer_ip ++ if [[ "$dhcp_type" == "08" ]]; then ++ # DHCP ACK for DHCP INFORM should not have any offer ip. ++ offered_ip=00000000 ++ fi ++ + # total IP length will be the IP length of the request packet + # (which is 272 in our case) + 8 (padding bytes) + (expected_dhcp_opts / 2) + ip_len=`expr 280 + ${#expected_dhcp_opts} / 2` +@@ -5345,7 +5351,7 @@ test_dhcp() { + if test $dhcp_reply_type = 06; then + reply=${reply}00000000 + else +- reply=${reply}${offer_ip} ++ reply=${reply}${offered_ip} + fi + # next server ip address, relay agent ip address, client mac address + reply=${reply}0000000000000000${src_mac} +@@ -5485,7 +5491,7 @@ rm -f 2.expected + ciaddr=`ip_to_hex 0 0 0 0` + offer_ip=0 + request_ip=0 +-test_dhcp 2 f00000000002 08 0 $ciaddr $offer_ip $request_ip 0 1 1 ++test_dhcp 2 f00000000002 09 0 $ciaddr $offer_ip $request_ip 0 1 1 + + # NXT_RESUMEs should be 4. + OVS_WAIT_UNTIL([test 4 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) +@@ -5660,6 +5666,113 @@ reset_pcap_file hv1-vif2 hv1/vif2 + rm -f 1.expected + rm -f 2.expected + ++# Send DHCPRELEASE. ++offer_ip=0 ++server_ip=`ip_to_hex 10 0 0 1` ++ciaddr=`ip_to_hex 10 0 0 6` ++request_ip=0 ++expected_dhcp_opts=0 ++test_dhcp 2 f00000000002 07 0 $ciaddr $offer_ip $request_ip 0 ff1000000001 ++ ++# NXT_RESUMEs should be 10. ++OVS_WAIT_UNTIL([test 10 = $(cat ofctl_monitor*.log | grep -c NXT_RESUME)]) ++ ++# There is no reply for this. Check for the INFO log in ovn-controller.log ++AT_CHECK([test 1 = $(cat hv1/ovn-controller.log | \ ++grep "DHCPRELEASE f0:00:00:00:00:02 10.0.0.6" -c)]) ++ ++$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets ++AT_CHECK([cat 2.packets], [0], []) ++ ++reset_pcap_file hv1-vif1 hv1/vif1 ++reset_pcap_file hv1-vif2 hv1/vif2 ++rm -f 1.expected ++rm -f 2.expected ++ ++# Send DHCPINFORM ++offer_ip=`ip_to_hex 10 0 0 6` ++server_ip=`ip_to_hex 10 0 0 1` ++ciaddr=$offer_ip ++request_ip=0 ++src_ip=$offer_ip ++dst_ip=$server_ip ++# In the expected_dhcp_opts we should not see 330400000e10 which is ++# dhcp lease time option and 0104ffffff00 which is subnet mask option. ++expected_dhcp_opts=03040a00000136040a000001 ++test_dhcp 2 f00000000002 08 0 $ciaddr $offer_ip $request_ip 1 $src_ip $dst_ip ff1000000001 $server_ip 05 $expected_dhcp_opts ++ ++# NXT_RESUMEs should be 11. ++OVS_WAIT_UNTIL([test 11 = $(cat ofctl_monitor*.log | grep -c NXT_RESUME)]) ++ ++$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets ++cat 2.expected | cut -c -48 > expout ++AT_CHECK([cat 2.packets | cut -c -48], [0], [expout]) ++# Skipping the IPv4 checksum. ++cat 2.expected | cut -c 53- > expout ++AT_CHECK([cat 2.packets | cut -c 53-], [0], [expout]) ++ ++# Now add the dhcp option T1 to the dhcp options. ++ovn-nbctl set dhcp_options ${d1} options:T1=4000 ++ ++reset_pcap_file hv1-vif1 hv1/vif1 ++reset_pcap_file hv1-vif2 hv1/vif2 ++rm -f 1.expected ++rm -f 2.expected ++ ++# Send DHCPREQUEST to make sure that T1 is in the reply dhcp options. ++offer_ip=`ip_to_hex 10 0 0 6` ++server_ip=`ip_to_hex 10 0 0 1` ++ciaddr=$offer_ip ++request_ip=0 ++src_ip=$offer_ip ++dst_ip=$server_ip ++# In the expected_dhcp_opts we should not see 330400000e10 which is ++# dhcp lease time option. ++expected_dhcp_opts=3a0400000fa0330400000e100104ffffff0003040a00000136040a000001 ++test_dhcp 2 f00000000002 03 0 $ciaddr $offer_ip $request_ip 1 $src_ip $dst_ip ff1000000001 $server_ip 05 $expected_dhcp_opts ++ ++# NXT_RESUMEs should be 12. ++OVS_WAIT_UNTIL([test 12 = $(cat ofctl_monitor*.log | grep -c NXT_RESUME)]) ++ ++$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets ++cat 2.expected | cut -c -48 > expout ++AT_CHECK([cat 2.packets | cut -c -48], [0], [expout]) ++# Skipping the IPv4 checksum. ++cat 2.expected | cut -c 53- > expout ++AT_CHECK([cat 2.packets | cut -c 53-], [0], [expout]) ++ ++reset_pcap_file hv1-vif1 hv1/vif1 ++reset_pcap_file hv1-vif2 hv1/vif2 ++rm -f 1.expected ++rm -f 2.expected ++ ++# Now send DHCPINFORM again. ++offer_ip=`ip_to_hex 10 0 0 6` ++server_ip=`ip_to_hex 10 0 0 1` ++ciaddr=00000000 ++request_ip=0 ++src_ip=$offer_ip ++dst_ip=$server_ip ++# In the expected_dhcp_opts we should not see 330400000e10 which is ++# dhcp lease time option and 0104ffffff00 which is subnet mask option. ++expected_dhcp_opts=03040a00000136040a000001 ++test_dhcp 2 f00000000002 08 0 $ciaddr $offer_ip $request_ip 1 $src_ip $dst_ip ff1000000001 $server_ip 05 $expected_dhcp_opts ++ ++# NXT_RESUMEs should be 13. ++OVS_WAIT_UNTIL([test 13 = $(cat ofctl_monitor*.log | grep -c NXT_RESUME)]) ++ ++$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets ++cat 2.expected | cut -c -48 > expout ++AT_CHECK([cat 2.packets | cut -c -48], [0], [expout]) ++# Skipping the IPv4 checksum. ++cat 2.expected | cut -c 53- > expout ++AT_CHECK([cat 2.packets | cut -c 53-], [0], [expout]) ++ ++reset_pcap_file hv1-vif1 hv1/vif1 ++reset_pcap_file hv1-vif2 hv1/vif2 ++rm -f 1.expected ++rm -f 2.expected ++ + # Set tftp server option (IPv4 address) for ls1 + echo "------ Set tftp server (IPv4 address) --------" + ovn-nbctl dhcp-options-set-options $d1 server_id=10.0.0.1 \ +@@ -5676,8 +5789,8 @@ request_ip=$offer_ip + expected_dhcp_opts=330400000e100104ffffff0003040a00000136040a00000142040a0a0a0a + test_dhcp 2 f00000000002 03 0 $ciaddr $offer_ip $request_ip 0 ff1000000001 $server_ip 05 $expected_dhcp_opts + +-# NXT_RESUMEs should be 10. +-OVS_WAIT_UNTIL([test 10 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) ++# NXT_RESUMEs should be 14. ++OVS_WAIT_UNTIL([test 14 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) + + $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets + cat 2.expected | cut -c -48 > expout +@@ -5707,8 +5820,8 @@ request_ip=$offer_ip + expected_dhcp_opts=330400000e100104ffffff0003040a00000136040a0000014210746573745f746674705f736572766572 + test_dhcp 2 f00000000002 03 0 $ciaddr $offer_ip $request_ip 0 ff1000000001 $server_ip 05 $expected_dhcp_opts + +-# NXT_RESUMEs should be 11. +-OVS_WAIT_UNTIL([test 11 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) ++# NXT_RESUMEs should be 15. ++OVS_WAIT_UNTIL([test 15 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) + + $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > 2.packets + cat 2.expected | cut -c -48 > expout +-- +2.26.2 + diff --git a/SOURCES/0006-Introduce-icmp6_error-action.patch b/SOURCES/0006-Introduce-icmp6_error-action.patch new file mode 100644 index 0000000..e03f1d6 --- /dev/null +++ b/SOURCES/0006-Introduce-icmp6_error-action.patch @@ -0,0 +1,178 @@ +From 13e1aade24b29ce4a6425a355f482b149848928a Mon Sep 17 00:00:00 2001 +From: Lorenzo Bianconi +Date: Tue, 7 Jul 2020 17:18:26 +0200 +Subject: [PATCH 06/22] Introduce icmp6_error action + +Introduce icmp6_error action in order to generate an ICMPv6 packet in +response to an error in original IPv6 packet + +Signed-off-by: Lorenzo Bianconi +Signed-off-by: Numan Siddique +--- + controller/pinctrl.c | 1 + + include/ovn/actions.h | 9 ++++++++- + lib/actions.c | 22 ++++++++++++++++++++++ + ovn-sb.xml | 8 ++++++++ + tests/ovn.at | 10 ++++++++++ + utilities/ovn-trace.c | 5 +++++ + 6 files changed, 54 insertions(+), 1 deletion(-) + +diff --git a/controller/pinctrl.c b/controller/pinctrl.c +index 9231efbe3..f8be22db0 100644 +--- a/controller/pinctrl.c ++++ b/controller/pinctrl.c +@@ -2772,6 +2772,7 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg) + break; + + case ACTION_OPCODE_ICMP4_ERROR: ++ case ACTION_OPCODE_ICMP6_ERROR: + pinctrl_handle_icmp(swconn, &headers, &packet, &pin.flow_metadata, + &userdata, false); + break; +diff --git a/include/ovn/actions.h b/include/ovn/actions.h +index 2b5a63a74..5abf61a40 100644 +--- a/include/ovn/actions.h ++++ b/include/ovn/actions.h +@@ -94,7 +94,8 @@ struct ovn_extend_table; + OVNACT(BIND_VPORT, ovnact_bind_vport) \ + OVNACT(HANDLE_SVC_CHECK, ovnact_handle_svc_check) \ + OVNACT(FWD_GROUP, ovnact_fwd_group) \ +- OVNACT(DHCP6_REPLY, ovnact_null) ++ OVNACT(DHCP6_REPLY, ovnact_null) \ ++ OVNACT(ICMP6_ERROR, ovnact_nest) + + /* enum ovnact_type, with a member OVNACT_ for each action. */ + enum OVS_PACKED_ENUM ovnact_type { +@@ -601,6 +602,12 @@ enum action_opcode { + * The actions, in OpenFlow 1.3 format, follow the action_header. + */ + ACTION_OPCODE_DHCP6_SERVER, ++ ++ /* "icmp6_error { ...actions... }". ++ * ++ * The actions, in OpenFlow 1.3 format, follow the action_header. ++ */ ++ ACTION_OPCODE_ICMP6_ERROR, + }; + + /* Header. */ +diff --git a/lib/actions.c b/lib/actions.c +index e4de97c23..515dbd2db 100644 +--- a/lib/actions.c ++++ b/lib/actions.c +@@ -1408,6 +1408,12 @@ parse_ICMP6(struct action_context *ctx) + parse_nested_action(ctx, OVNACT_ICMP6, "ip6"); + } + ++static void ++parse_ICMP6_ERROR(struct action_context *ctx) ++{ ++ parse_nested_action(ctx, OVNACT_ICMP6_ERROR, "ip6"); ++} ++ + static void + parse_TCP_RESET(struct action_context *ctx) + { +@@ -1471,6 +1477,12 @@ format_ICMP6(const struct ovnact_nest *nest, struct ds *s) + format_nested_action(nest, "icmp6", s); + } + ++static void ++format_ICMP6_ERROR(const struct ovnact_nest *nest, struct ds *s) ++{ ++ format_nested_action(nest, "icmp6_error", s); ++} ++ + static void + format_IGMP(const struct ovnact_null *a OVS_UNUSED, struct ds *s) + { +@@ -1582,6 +1594,14 @@ encode_ICMP6(const struct ovnact_nest *on, + encode_nested_actions(on, ep, ACTION_OPCODE_ICMP, ofpacts); + } + ++static void ++encode_ICMP6_ERROR(const struct ovnact_nest *on, ++ const struct ovnact_encode_params *ep, ++ struct ofpbuf *ofpacts) ++{ ++ encode_nested_actions(on, ep, ACTION_OPCODE_ICMP6_ERROR, ofpacts); ++} ++ + static void + encode_IGMP(const struct ovnact_null *a OVS_UNUSED, + const struct ovnact_encode_params *ep OVS_UNUSED, +@@ -3558,6 +3578,8 @@ parse_action(struct action_context *ctx) + parse_ICMP4_ERROR(ctx); + } else if (lexer_match_id(ctx->lexer, "icmp6")) { + parse_ICMP6(ctx); ++ } else if (lexer_match_id(ctx->lexer, "icmp6_error")) { ++ parse_ICMP6_ERROR(ctx); + } else if (lexer_match_id(ctx->lexer, "igmp")) { + ovnact_put_IGMP(ctx->ovnacts); + } else if (lexer_match_id(ctx->lexer, "tcp_reset")) { +diff --git a/ovn-sb.xml b/ovn-sb.xml +index 709cb4c48..6d6775a45 100644 +--- a/ovn-sb.xml ++++ b/ovn-sb.xml +@@ -2090,6 +2090,9 @@ + + +
            icmp6 { action; ... };
            ++
            ++ icmp6_error { action; ... }; ++
            +
            +

            + Temporarily replaces the IPv6 packet being processed by an ICMPv6 +@@ -2112,6 +2115,11 @@ +

          • icmp6.code = 1 (administratively prohibited)
          • +
          + ++

          ++ icmp6_error action is expected to be used to ++ generate an ICMPv6 packet in response to an error in original ++ IPv6 packet. ++

          +

          Prerequisite: ip6

          + + +diff --git a/tests/ovn.at b/tests/ovn.at +index 6866a58da..cfcfa0915 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -1547,6 +1547,16 @@ icmp6 { }; + encodes as controller(userdata=00.00.00.0a.00.00.00.00) + has prereqs ip6 + ++# icmp6_error ++icmp6_error { eth.dst = ff:ff:ff:ff:ff:ff; output; }; output; ++ encodes as controller(userdata=00.00.00.14.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64) ++ has prereqs ip6 ++ ++icmp6_error { }; ++ formats as icmp6_error { drop; }; ++ encodes as controller(userdata=00.00.00.14.00.00.00.00) ++ has prereqs ip6 ++ + # tcp_reset + tcp_reset { eth.dst = ff:ff:ff:ff:ff:ff; output; }; output; + encodes as controller(userdata=00.00.00.0b.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64) +diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c +index 647cf3075..c34517aaa 100644 +--- a/utilities/ovn-trace.c ++++ b/utilities/ovn-trace.c +@@ -2319,6 +2319,11 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len, + super); + break; + ++ case OVNACT_ICMP6_ERROR: ++ execute_icmp6(ovnact_get_ICMP6_ERROR(a), dp, uflow, table_id, ++ pipeline, super); ++ break; ++ + case OVNACT_IGMP: + /* Nothing to do for tracing. */ + break; +-- +2.26.2 + diff --git a/SOURCES/0007-Introduce-icmp6.frag_mtu-action.patch b/SOURCES/0007-Introduce-icmp6.frag_mtu-action.patch new file mode 100644 index 0000000..7f39cba --- /dev/null +++ b/SOURCES/0007-Introduce-icmp6.frag_mtu-action.patch @@ -0,0 +1,287 @@ +From c659c6ae95e5ffa94e795a0f104c27c3f5523eae Mon Sep 17 00:00:00 2001 +From: Lorenzo Bianconi +Date: Tue, 7 Jul 2020 17:18:27 +0200 +Subject: [PATCH 07/22] Introduce icmp6.frag_mtu action + +Similar to what have been already done for IPv4, introduce +icmp6.frag_mtu action in order to set correct mtu in ICMPv6 "packet too +big" error message + +Signed-off-by: Lorenzo Bianconi +Signed-off-by: Numan Siddique +--- + controller/pinctrl.c | 77 ++++++++++++++++++++++-------------- + include/ovn/actions.h | 4 ++ + include/ovn/logical-fields.h | 7 ++++ + lib/actions.c | 13 +++++- + lib/logical-fields.c | 5 +++ + ovn-sb.xml | 7 ++-- + tests/ovn.at | 8 ++++ + utilities/ovn-trace.c | 7 +++- + 8 files changed, 93 insertions(+), 35 deletions(-) + +diff --git a/controller/pinctrl.c b/controller/pinctrl.c +index f8be22db0..f72ab70e1 100644 +--- a/controller/pinctrl.c ++++ b/controller/pinctrl.c +@@ -228,12 +228,12 @@ static void pinctrl_handle_nd_ns(struct rconn *swconn, + struct dp_packet *pkt_in, + const struct match *md, + struct ofpbuf *userdata); +-static void pinctrl_handle_put_icmp4_frag_mtu(struct rconn *swconn, +- const struct flow *in_flow, +- struct dp_packet *pkt_in, +- struct ofputil_packet_in *pin, +- struct ofpbuf *userdata, +- struct ofpbuf *continuation); ++static void pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn, ++ const struct flow *in_flow, ++ struct dp_packet *pkt_in, ++ struct ofputil_packet_in *pin, ++ struct ofpbuf *userdata, ++ struct ofpbuf *continuation); + static void + pinctrl_handle_event(struct ofpbuf *userdata) + OVS_REQUIRES(pinctrl_mutex); +@@ -2783,8 +2783,9 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg) + break; + + case ACTION_OPCODE_PUT_ICMP4_FRAG_MTU: +- pinctrl_handle_put_icmp4_frag_mtu(swconn, &headers, &packet, +- &pin, &userdata, &continuation); ++ case ACTION_OPCODE_PUT_ICMP6_FRAG_MTU: ++ pinctrl_handle_put_icmp_frag_mtu(swconn, &headers, &packet, &pin, ++ &userdata, &continuation); + break; + + case ACTION_OPCODE_EVENT: +@@ -5475,26 +5476,22 @@ exit: + + /* Called with in the pinctrl_handler thread context. */ + static void +-pinctrl_handle_put_icmp4_frag_mtu(struct rconn *swconn, +- const struct flow *in_flow, +- struct dp_packet *pkt_in, +- struct ofputil_packet_in *pin, +- struct ofpbuf *userdata, +- struct ofpbuf *continuation) ++pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn, ++ const struct flow *in_flow, ++ struct dp_packet *pkt_in, ++ struct ofputil_packet_in *pin, ++ struct ofpbuf *userdata, ++ struct ofpbuf *continuation) + { + enum ofp_version version = rconn_get_version(swconn); + enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version); + struct dp_packet *pkt_out = NULL; + +- /* This action only works for ICMPv4 packets. */ +- if (!is_icmpv4(in_flow, NULL)) { ++ /* This action only works for ICMPv4/v6 packets. */ ++ if (!is_icmpv4(in_flow, NULL) && !is_icmpv6(in_flow, NULL)) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); +- VLOG_WARN_RL(&rl, "put_icmp4_frag_mtu action on non-ICMPv4 packet"); +- goto exit; +- } +- +- ovs_be16 *mtu = ofpbuf_try_pull(userdata, sizeof *mtu); +- if (!mtu) { ++ VLOG_WARN_RL(&rl, ++ "put_icmp(4/6)_frag_mtu action on non-ICMPv4/v6 packet"); + goto exit; + } + +@@ -5504,13 +5501,35 @@ pinctrl_handle_put_icmp4_frag_mtu(struct rconn *swconn, + pkt_out->l3_ofs = pkt_in->l3_ofs; + pkt_out->l4_ofs = pkt_in->l4_ofs; + +- struct ip_header *nh = dp_packet_l3(pkt_out); +- struct icmp_header *ih = dp_packet_l4(pkt_out); +- ovs_be16 old_frag_mtu = ih->icmp_fields.frag.mtu; +- ih->icmp_fields.frag.mtu = *mtu; +- ih->icmp_csum = recalc_csum16(ih->icmp_csum, old_frag_mtu, *mtu); +- nh->ip_csum = 0; +- nh->ip_csum = csum(nh, sizeof *nh); ++ if (is_icmpv4(in_flow, NULL)) { ++ ovs_be16 *mtu = ofpbuf_try_pull(userdata, sizeof *mtu); ++ if (!mtu) { ++ goto exit; ++ } ++ ++ struct ip_header *nh = dp_packet_l3(pkt_out); ++ struct icmp_header *ih = dp_packet_l4(pkt_out); ++ ovs_be16 old_frag_mtu = ih->icmp_fields.frag.mtu; ++ ih->icmp_fields.frag.mtu = *mtu; ++ ih->icmp_csum = recalc_csum16(ih->icmp_csum, old_frag_mtu, *mtu); ++ nh->ip_csum = 0; ++ nh->ip_csum = csum(nh, sizeof *nh); ++ } else { ++ ovs_be32 *mtu = ofpbuf_try_pull(userdata, sizeof *mtu); ++ if (!mtu) { ++ goto exit; ++ } ++ ++ struct icmp6_data_header *ih = dp_packet_l4(pkt_out); ++ put_16aligned_be32(ih->icmp6_data.be32, *mtu); ++ ++ /* compute checksum and set correct mtu */ ++ ih->icmp6_base.icmp6_cksum = 0; ++ uint32_t csum = packet_csum_pseudoheader6(dp_packet_l3(pkt_out)); ++ uint32_t size = (uint8_t *)dp_packet_tail(pkt_out) - (uint8_t *)ih; ++ ih->icmp6_base.icmp6_cksum = csum_finish( ++ csum_continue(csum, ih, size)); ++ } + + pin->packet = dp_packet_data(pkt_out); + pin->packet_len = dp_packet_size(pkt_out); +diff --git a/include/ovn/actions.h b/include/ovn/actions.h +index 5abf61a40..34ba0d880 100644 +--- a/include/ovn/actions.h ++++ b/include/ovn/actions.h +@@ -608,6 +608,10 @@ enum action_opcode { + * The actions, in OpenFlow 1.3 format, follow the action_header. + */ + ACTION_OPCODE_ICMP6_ERROR, ++ ++ /* MTU value (to put in the icmp6 header field - frag_mtu) follow the ++ * action header. */ ++ ACTION_OPCODE_PUT_ICMP6_FRAG_MTU, + }; + + /* Header. */ +diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h +index c7bd2dba9..61d17d14f 100644 +--- a/include/ovn/logical-fields.h ++++ b/include/ovn/logical-fields.h +@@ -108,6 +108,13 @@ enum ovn_field_id { + * packet as per the RFC 1191. + */ + OVN_ICMP4_FRAG_MTU, ++ /* ++ * Name: "icmp6.frag_mtu" - ++ * Type: be32 ++ * Description: Sets the first 32 bits of the ICMPv6 body to the MTU of ++ * next-hop link (RFC 4443) ++ */ ++ OVN_ICMP6_FRAG_MTU, + + OVN_FIELD_N_IDS + }; +diff --git a/lib/actions.c b/lib/actions.c +index 515dbd2db..1deeef799 100644 +--- a/lib/actions.c ++++ b/lib/actions.c +@@ -3159,6 +3159,7 @@ format_OVNFIELD_LOAD(const struct ovnact_load *load , struct ds *s) + const struct ovn_field *f = ovn_field_from_name(load->dst.symbol->name); + switch (f->id) { + case OVN_ICMP4_FRAG_MTU: ++ case OVN_ICMP6_FRAG_MTU: + ds_put_format(s, "%s = %u;", f->name, + ntohs(load->imm.value.be16_int)); + break; +@@ -3178,12 +3179,20 @@ encode_OVNFIELD_LOAD(const struct ovnact_load *load, + switch (f->id) { + case OVN_ICMP4_FRAG_MTU: { + size_t oc_offset = encode_start_controller_op( +- ACTION_OPCODE_PUT_ICMP4_FRAG_MTU, true, NX_CTLR_NO_METER, +- ofpacts); ++ ACTION_OPCODE_PUT_ICMP4_FRAG_MTU, true, ++ NX_CTLR_NO_METER, ofpacts); + ofpbuf_put(ofpacts, &load->imm.value.be16_int, sizeof(ovs_be16)); + encode_finish_controller_op(oc_offset, ofpacts); + break; + } ++ case OVN_ICMP6_FRAG_MTU: { ++ size_t oc_offset = encode_start_controller_op( ++ ACTION_OPCODE_PUT_ICMP6_FRAG_MTU, true, ++ NX_CTLR_NO_METER, ofpacts); ++ ofpbuf_put(ofpacts, &load->imm.value.be32_int, sizeof(ovs_be32)); ++ encode_finish_controller_op(oc_offset, ofpacts); ++ break; ++ } + case OVN_FIELD_N_IDS: + default: + OVS_NOT_REACHED(); +diff --git a/lib/logical-fields.c b/lib/logical-fields.c +index 8ad56aa53..8639523ea 100644 +--- a/lib/logical-fields.c ++++ b/lib/logical-fields.c +@@ -29,6 +29,10 @@ const struct ovn_field ovn_fields[OVN_FIELD_N_IDS] = { + OVN_ICMP4_FRAG_MTU, + "icmp4.frag_mtu", + 2, 16, ++ }, { ++ OVN_ICMP6_FRAG_MTU, ++ "icmp6.frag_mtu", ++ 4, 32, + }, + }; + +@@ -257,6 +261,7 @@ ovn_init_symtab(struct shash *symtab) + expr_symtab_add_field(symtab, "pkt.mark", MFF_PKT_MARK, NULL, false); + + expr_symtab_add_ovn_field(symtab, "icmp4.frag_mtu", OVN_ICMP4_FRAG_MTU); ++ expr_symtab_add_ovn_field(symtab, "icmp6.frag_mtu", OVN_ICMP6_FRAG_MTU); + } + + const char * +diff --git a/ovn-sb.xml b/ovn-sb.xml +index 6d6775a45..fc39b2d03 100644 +--- a/ovn-sb.xml ++++ b/ovn-sb.xml +@@ -1170,10 +1170,11 @@ +
            +
          • + icmp4.frag_mtu ++ icmp6.frag_mtu +

            +- This field sets the low-order 16 bits of the ICMP4 header field +- that is labelled "unused" in the ICMP specification as defined +- in the RFC 1191 with the value specified in ++ This field sets the low-order 16 bits of the ICMP{4,6} header ++ field that is labelled "unused" in the ICMP specification as ++ defined in the RFC 1191 with the value specified in + constant. +

            + +diff --git a/tests/ovn.at b/tests/ovn.at +index cfcfa0915..80cd62c49 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -1557,6 +1557,14 @@ icmp6_error { }; + encodes as controller(userdata=00.00.00.14.00.00.00.00) + has prereqs ip6 + ++# icmp6_error with icmp6.frag_mtu ++icmp6_error { eth.dst = ff:ff:ff:ff:ff:ff; icmp6.frag_mtu = 1500; output; }; output; ++ encodes as controller(userdata=00.00.00.14.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.28.00.00.23.20.00.25.00.00.00.00.00.00.00.03.00.10.00.00.00.15.00.00.00.00.00.00.05.dc.00.04.00.04.00.00.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64) ++ has prereqs ip6 ++ ++icmp6.frag_mtu = 1500; ++ encodes as controller(userdata=00.00.00.15.00.00.00.00.00.00.05.dc,pause) ++ + # tcp_reset + tcp_reset { eth.dst = ff:ff:ff:ff:ff:ff; output; }; output; + encodes as controller(userdata=00.00.00.0b.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64) +diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c +index c34517aaa..50a32b714 100644 +--- a/utilities/ovn-trace.c ++++ b/utilities/ovn-trace.c +@@ -2119,7 +2119,12 @@ execute_ovnfield_load(const struct ovnact_load *load, + ntohs(load->imm.value.be16_int)); + break; + } +- ++ case OVN_ICMP6_FRAG_MTU: { ++ ovntrace_node_append(super, OVNTRACE_NODE_MODIFY, ++ "icmp6.frag_mtu = %u", ++ ntohs(load->imm.value.be16_int)); ++ break; ++ } + case OVN_FIELD_N_IDS: + default: + OVS_NOT_REACHED(); +-- +2.26.2 + diff --git a/SOURCES/0008-northd-introduce-icmp6_error-logical-flows-in-router.patch b/SOURCES/0008-northd-introduce-icmp6_error-logical-flows-in-router.patch new file mode 100644 index 0000000..246780e --- /dev/null +++ b/SOURCES/0008-northd-introduce-icmp6_error-logical-flows-in-router.patch @@ -0,0 +1,263 @@ +From a1a0c7061850d78edb74a7977d0241121575be0e Mon Sep 17 00:00:00 2001 +From: Lorenzo Bianconi +Date: Tue, 7 Jul 2020 17:18:28 +0200 +Subject: [PATCH 08/22] northd: introduce icmp6_error logical flows in router + pipeline + +Introduce icmp6_error logical flows in router pipeline if gateway_mtu +has been added to logical router port option column in order to perform +IPv6 PMTU discovery + +Signed-off-by: Lorenzo Bianconi +Signed-off-by: Numan Siddique +--- + northd/ovn-northd.8.xml | 24 +++++++++--- + northd/ovn-northd.c | 84 ++++++++++++++++++++++++++++------------- + tests/ovn.at | 48 +++++++++++++++++++++-- + 3 files changed, 120 insertions(+), 36 deletions(-) + +diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml +index 67481f458..623768988 100644 +--- a/northd/ovn-northd.8.xml ++++ b/northd/ovn-northd.8.xml +@@ -2922,11 +2922,11 @@ REGBIT_PKT_LARGER = check_pkt_larger(L); next; + For distributed logical routers with distributed gateway port configured + with options:gateway_mtu to a valid integer value, this + table adds the following priority-50 logical flow for each +- logical router port with the match ip4 && +- inport == LRP && outport == GW_PORT +- && REGBIT_PKT_LARGER, where LRP is the logical +- router port and GW_PORT is the distributed gateway router port +- and applies the following action ++ logical router port with the match inport == LRP ++ && outport == GW_PORT && ++ REGBIT_PKT_LARGER, where LRP is the logical ++ router port and GW_PORT is the distributed gateway router ++ port and applies the following action for ipv4 and ipv6 respectively: +

            + +
            +@@ -2941,6 +2941,18 @@ icmp4 {
            +     REGBIT_EGRESS_LOOPBACK = 1;
            +     next(pipeline=ingress, table=0);
            + };
            ++
            ++icmp6 {
            ++    icmp6.type = 2;
            ++    icmp6.code = 0;
            ++    icmp6.frag_mtu = M;
            ++    eth.dst = E;
            ++    ip6.dst = ip6.src;
            ++    ip6.src = I;
            ++    ip.ttl = 255;
            ++    REGBIT_EGRESS_LOOPBACK = 1;
            ++    next(pipeline=ingress, table=0);
            ++};
            +     
            + +
              +@@ -2956,7 +2968,7 @@ icmp4 { + + +
            • +- I is the IPv4 address of the logical router port. ++ I is the IPv4/IPv6 address of the logical router port. +
            • +
            + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 2b1257114..6375aee8d 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -10495,8 +10495,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + } + + ds_clear(&match); +- ds_put_format(&match, "outport == %s && ip4", +- od->l3dgw_port->json_key); ++ ds_put_format(&match, "outport == %s", od->l3dgw_port->json_key); + + ds_clear(&actions); + ds_put_format(&actions, +@@ -10509,34 +10508,65 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + for (size_t i = 0; i < od->nbr->n_ports; i++) { + struct ovn_port *rp = ovn_port_find(ports, + od->nbr->ports[i]->name); +- if (!rp || rp == od->l3dgw_port || +- !rp->lrp_networks.ipv4_addrs) { ++ if (!rp || rp == od->l3dgw_port) { + continue; + } +- ds_clear(&match); +- ds_put_format(&match, "inport == %s && outport == %s && ip4 " +- "&& "REGBIT_PKT_LARGER, +- rp->json_key, od->l3dgw_port->json_key); + +- ds_clear(&actions); +- /* Set icmp4.frag_mtu to gw_mtu */ +- ds_put_format(&actions, +- "icmp4_error {" +- REGBIT_EGRESS_LOOPBACK" = 1; " +- "eth.dst = %s; " +- "ip4.dst = ip4.src; " +- "ip4.src = %s; " +- "ip.ttl = 255; " +- "icmp4.type = 3; /* Destination Unreachable. */ " +- "icmp4.code = 4; /* Frag Needed and DF was Set. */ " +- "icmp4.frag_mtu = %d; " +- "next(pipeline=ingress, table=0); };", +- rp->lrp_networks.ea_s, +- rp->lrp_networks.ipv4_addrs[0].addr_s, +- gw_mtu); +- ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_LARGER_PKTS, +- 50, ds_cstr(&match), ds_cstr(&actions), +- &rp->nbrp->header_); ++ if (rp->lrp_networks.ipv4_addrs) { ++ ds_clear(&match); ++ ds_put_format(&match, "inport == %s && outport == %s" ++ " && ip4 && "REGBIT_PKT_LARGER, ++ rp->json_key, od->l3dgw_port->json_key); ++ ++ ds_clear(&actions); ++ /* Set icmp4.frag_mtu to gw_mtu */ ++ ds_put_format(&actions, ++ "icmp4_error {" ++ REGBIT_EGRESS_LOOPBACK" = 1; " ++ "eth.dst = %s; " ++ "ip4.dst = ip4.src; " ++ "ip4.src = %s; " ++ "ip.ttl = 255; " ++ "icmp4.type = 3; /* Destination Unreachable. */ " ++ "icmp4.code = 4; /* Frag Needed and DF was Set. */ " ++ "icmp4.frag_mtu = %d; " ++ "next(pipeline=ingress, table=0); };", ++ rp->lrp_networks.ea_s, ++ rp->lrp_networks.ipv4_addrs[0].addr_s, ++ gw_mtu); ++ ovn_lflow_add_with_hint(lflows, od, ++ S_ROUTER_IN_LARGER_PKTS, 50, ++ ds_cstr(&match), ds_cstr(&actions), ++ &rp->nbrp->header_); ++ } ++ ++ if (rp->lrp_networks.ipv6_addrs) { ++ ds_clear(&match); ++ ds_put_format(&match, "inport == %s && outport == %s" ++ " && ip6 && "REGBIT_PKT_LARGER, ++ rp->json_key, od->l3dgw_port->json_key); ++ ++ ds_clear(&actions); ++ /* Set icmp6.frag_mtu to gw_mtu */ ++ ds_put_format(&actions, ++ "icmp6_error {" ++ REGBIT_EGRESS_LOOPBACK" = 1; " ++ "eth.dst = %s; " ++ "ip6.dst = ip6.src; " ++ "ip6.src = %s; " ++ "ip.ttl = 255; " ++ "icmp6.type = 2; /* Packet Too Big. */ " ++ "icmp6.code = 0; " ++ "icmp6.frag_mtu = %d; " ++ "next(pipeline=ingress, table=0); };", ++ rp->lrp_networks.ea_s, ++ rp->lrp_networks.ipv6_addrs[0].addr_s, ++ gw_mtu); ++ ovn_lflow_add_with_hint(lflows, od, ++ S_ROUTER_IN_LARGER_PKTS, 50, ++ ds_cstr(&match), ds_cstr(&actions), ++ &rp->nbrp->header_); ++ } + } + } + } +diff --git a/tests/ovn.at b/tests/ovn.at +index 80cd62c49..905112a8d 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -15115,17 +15115,17 @@ ovn_start + + ovn-nbctl ls-add sw0 + ovn-nbctl lsp-add sw0 sw0-port1 +-ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:01 10.0.0.3" ++ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:01 10.0.0.3 1000::3" + + ovn-nbctl lr-add lr0 +-ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 ++ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::1/64 + ovn-nbctl lsp-add sw0 sw0-lr0 + ovn-nbctl lsp-set-type sw0-lr0 router + ovn-nbctl lsp-set-addresses sw0-lr0 router + ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 + + ovn-nbctl ls-add public +-ovn-nbctl lrp-add lr0 lr0-public 00:00:20:20:12:13 172.168.0.100/24 ++ovn-nbctl lrp-add lr0 lr0-public 00:00:20:20:12:13 172.168.0.100/24 2000::1/64 + ovn-nbctl lsp-add public public-lr0 + ovn-nbctl lsp-set-type public-lr0 router + ovn-nbctl lsp-set-addresses public-lr0 router +@@ -15139,6 +15139,7 @@ ovn-nbctl lsp-set-options ln-public network_name=phys + + ovn-nbctl lrp-set-gateway-chassis lr0-public hv1 20 + ovn-nbctl lr-nat-add lr0 snat 172.168.0.100 10.0.0.0/24 ++ovn-nbctl lr-nat-add lr0 snat 2000::1 1000::/64 + + net_add n1 + +@@ -15249,6 +15250,41 @@ test_ip_packet_larger() { + fi + } + ++test_ip6_packet_larger() { ++ local icmp_pmtu_reply_expected=$1 ++ ++ local eth_src=505400000001 ++ local eth_dst=00000000ff01 ++ ++ local ipv6_src=10000000000000000000000000000003 ++ local ipv6_dst=20000000000000000000000000000002 ++ local ipv6_rt=10000000000000000000000000000001 ++ ++ local payload=0000000000000000000000000000000000000000 ++ local payload=${payload}0000000000000000000000000000000000000000 ++ local payload=${payload}0000000000000000000000000000000000000000 ++ local payload=${payload}0000000000000000000000000000000000000000 ++ ++ local ip6_hdr=6000000000583aff${ipv6_src}${ipv6_dst} ++ local packet=${eth_dst}${eth_src}86dd${ip6_hdr}8000ec7662f00001${payload} ++ ++ as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1 ++ as hv1 reset_pcap_file hv1-vif1 hv1/vif1 ++ ++ # Send packet from sw0-port1 to outside ++ as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $packet ++ ++ if test $icmp_pmtu_reply_expected = 1; then ++ icmp6_reply=${eth_src}${eth_dst}86dd6000000000883afe ++ icmp6_reply=${icmp6_reply}${ipv6_rt}${ipv6_src}020041ff00000076 ++ icmp6_reply=${icmp6_reply}6000000000583afe${ipv6_src}${ipv6_dst} ++ icmp6_reply=${icmp6_reply}8000ec7662f00001${payload} ++ echo $icmp6_reply > hv1-vif1.expected ++ ++ OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [hv1-vif1.expected]) ++ fi ++} ++ + ovn-nbctl show + ovn-sbctl show + +@@ -15283,6 +15319,12 @@ OVS_WAIT_UNTIL([ + # Now the packet should be sent via the localnet port to br-phys. + icmp_reply_expected=0 + test_ip_packet_larger $icmp_reply_expected ++ ++# Set the gateway mtu to 118 ++ovn-nbctl --wait=hv set logical_router_port lr0-public options:gateway_mtu=118 ++icmp_reply_expected=1 ++test_ip6_packet_larger $icmp_reply_expected ++ + OVN_CLEANUP([hv1]) + AT_CLEANUP + +-- +2.26.2 + diff --git a/SOURCES/0009-Add-IP-address-normalization-to-ovn-utils.patch b/SOURCES/0009-Add-IP-address-normalization-to-ovn-utils.patch new file mode 100644 index 0000000..e7a69ce --- /dev/null +++ b/SOURCES/0009-Add-IP-address-normalization-to-ovn-utils.patch @@ -0,0 +1,126 @@ +From 6d0628f34e7462cbe7c580774b19644609757b85 Mon Sep 17 00:00:00 2001 +From: Mark Michelson +Date: Mon, 20 Jul 2020 14:53:15 -0400 +Subject: [PATCH 09/22] Add IP address normalization to ovn-utils. + +This moves a couple of existing IP address normalization routines from +ovn-nbctl.c to ovn-util.c. It also adds a new normalization function for +a v46_address. This new function is not used in this commit but will be +used in a future commit in this series. + +Signed-off-by: Mark Michelson +Acked-by: Numan Siddique +--- + lib/ovn-util.c | 39 +++++++++++++++++++++++++++++++++++++++ + lib/ovn-util.h | 4 ++++ + utilities/ovn-nbctl.c | 29 ----------------------------- + 3 files changed, 43 insertions(+), 29 deletions(-) + +diff --git a/lib/ovn-util.c b/lib/ovn-util.c +index f09fdaffe..cdb5e18fb 100644 +--- a/lib/ovn-util.c ++++ b/lib/ovn-util.c +@@ -589,6 +589,45 @@ ip46_equals(const struct v46_ip *addr1, const struct v46_ip *addr2) + IN6_ARE_ADDR_EQUAL(&addr1->ipv6, &addr2->ipv6))); + } + ++/* The caller must free the returned string. */ ++char * ++normalize_ipv4_prefix(ovs_be32 ipv4, unsigned int plen) ++{ ++ ovs_be32 network = ipv4 & be32_prefix_mask(plen); ++ if (plen == 32) { ++ return xasprintf(IP_FMT, IP_ARGS(network)); ++ } else { ++ return xasprintf(IP_FMT "/%d", IP_ARGS(network), plen); ++ } ++} ++ ++/* The caller must free the returned string. */ ++char * ++normalize_ipv6_prefix(struct in6_addr ipv6, unsigned int plen) ++{ ++ char network_s[INET6_ADDRSTRLEN]; ++ ++ struct in6_addr mask = ipv6_create_mask(plen); ++ struct in6_addr network = ipv6_addr_bitand(&ipv6, &mask); ++ ++ inet_ntop(AF_INET6, &network, network_s, INET6_ADDRSTRLEN); ++ if (plen == 128) { ++ return xasprintf("%s", network_s); ++ } else { ++ return xasprintf("%s/%d", network_s, plen); ++ } ++} ++ ++char * ++normalize_v46_prefix(const struct v46_ip *prefix, unsigned int plen) ++{ ++ if (prefix->family == AF_INET) { ++ return normalize_ipv4_prefix(prefix->ipv4, plen); ++ } else { ++ return normalize_ipv6_prefix(prefix->ipv6, plen); ++ } ++} ++ + char * + str_tolower(const char *orig) + { +diff --git a/lib/ovn-util.h b/lib/ovn-util.h +index 4e08ee01e..0f7b501f1 100644 +--- a/lib/ovn-util.h ++++ b/lib/ovn-util.h +@@ -144,6 +144,10 @@ bool ip46_parse_cidr(const char *str, struct v46_ip *prefix, + unsigned int *plen); + bool ip46_equals(const struct v46_ip *addr1, const struct v46_ip *addr2); + ++char *normalize_ipv4_prefix(ovs_be32 ipv4, unsigned int plen); ++char *normalize_ipv6_prefix(struct in6_addr ipv6, unsigned int plen); ++char *normalize_v46_prefix(const struct v46_ip *prefix, unsigned int plen); ++ + /* Returns a lowercase copy of orig. + * Caller must free the returned string. + */ +diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c +index 7578b9928..0079ad5a6 100644 +--- a/utilities/ovn-nbctl.c ++++ b/utilities/ovn-nbctl.c +@@ -3482,35 +3482,6 @@ nbctl_dhcp_options_list(struct ctl_context *ctx) + free(nodes); + } + +-/* The caller must free the returned string. */ +-static char * +-normalize_ipv4_prefix(ovs_be32 ipv4, unsigned int plen) +-{ +- ovs_be32 network = ipv4 & be32_prefix_mask(plen); +- if (plen == 32) { +- return xasprintf(IP_FMT, IP_ARGS(network)); +- } else { +- return xasprintf(IP_FMT"/%d", IP_ARGS(network), plen); +- } +-} +- +-/* The caller must free the returned string. */ +-static char * +-normalize_ipv6_prefix(struct in6_addr ipv6, unsigned int plen) +-{ +- char network_s[INET6_ADDRSTRLEN]; +- +- struct in6_addr mask = ipv6_create_mask(plen); +- struct in6_addr network = ipv6_addr_bitand(&ipv6, &mask); +- +- inet_ntop(AF_INET6, &network, network_s, INET6_ADDRSTRLEN); +- if (plen == 128) { +- return xasprintf("%s", network_s); +- } else { +- return xasprintf("%s/%d", network_s, plen); +- } +-} +- + static char * + normalize_ipv4_prefix_str(const char *orig_prefix) + { +-- +2.26.2 + diff --git a/SOURCES/0010-Don-t-check-for-writeability-of-rhs-during-assignmen.patch b/SOURCES/0010-Don-t-check-for-writeability-of-rhs-during-assignmen.patch new file mode 100644 index 0000000..90d5f06 --- /dev/null +++ b/SOURCES/0010-Don-t-check-for-writeability-of-rhs-during-assignmen.patch @@ -0,0 +1,31 @@ +From 5b5b130408eebf999edacfe298c2fedc40d6e603 Mon Sep 17 00:00:00 2001 +From: Mark Michelson +Date: Mon, 27 Jul 2020 16:04:06 -0400 +Subject: [PATCH 10/22] Don't check for writeability of rhs during assignment. + +The only condition under which the right-hand side of an assignment +needs to be checked for writeability is if it is an exchange (<->) +operation. + +Signed-off-by: Mark Michelson +Acked-by: Numan Siddique +--- + lib/actions.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/actions.c b/lib/actions.c +index 1deeef799..23e334404 100644 +--- a/lib/actions.c ++++ b/lib/actions.c +@@ -515,7 +515,7 @@ parse_assignment_action(struct action_context *ctx, bool exchange, + + char *error = expr_type_check(lhs, lhs->n_bits, true); + if (!error) { +- error = expr_type_check(&rhs, rhs.n_bits, true); ++ error = expr_type_check(&rhs, rhs.n_bits, exchange); + } + if (error) { + lexer_error(ctx->lexer, "%s", error); +-- +2.26.2 + diff --git a/SOURCES/0011-Add-expression-writeability-scopes.patch b/SOURCES/0011-Add-expression-writeability-scopes.patch new file mode 100644 index 0000000..c5d0f1c --- /dev/null +++ b/SOURCES/0011-Add-expression-writeability-scopes.patch @@ -0,0 +1,423 @@ +From 2bbba1279ae7b197d1b21dfb2ef7e45d0bcd8000 Mon Sep 17 00:00:00 2001 +From: Mark Michelson +Date: Mon, 27 Jul 2020 16:08:21 -0400 +Subject: [PATCH 11/22] Add expression writeability scopes. + +Logical fields are defined as either being writeable or read-only. There +is no way to make fields writeable only in specific scenarios. + +This commit changes the boolean writeability field to a field of flags +indicating contexts where a field is writeable. Any time that nested +actions are used (i.e. actions enclosed in curly braces), a new scope +may be set for the nested action. For this particular commit, no +functionality is changed, and only a "default" scope is added +that mirrors the current setup. A future commit will make use of this +feature. + +Change-Id: Id7a8dbedb862e8274c70597251233eeb35f81af6 +Signed-off-by: Mark Michelson +Acked-by: Numan Siddique +--- + include/ovn/expr.h | 52 ++++++++++++++++++++++++++++++++++------------ + lib/actions.c | 46 +++++++++++++++++++++------------------- + lib/expr.c | 35 ++++++++++++++++++------------- + 3 files changed, 84 insertions(+), 49 deletions(-) + +diff --git a/include/ovn/expr.h b/include/ovn/expr.h +index 9838251c1..11bfdad5b 100644 +--- a/include/ovn/expr.h ++++ b/include/ovn/expr.h +@@ -83,6 +83,10 @@ enum expr_level { + EXPR_L_ORDINAL + }; + ++enum expr_write_scope { ++ WR_DEFAULT = (1 << 0), /* Writeable at "global" level */ ++}; ++ + const char *expr_level_to_string(enum expr_level); + + /* A symbol. +@@ -255,7 +259,8 @@ struct expr_symbol { + + char *prereqs; + bool must_crossproduct; +- bool rw; ++ enum expr_write_scope rw; /* Bit map indicating in which nested contexts ++ * the symbol is writeable */ + }; + + void expr_symbol_format(const struct expr_symbol *, struct ds *); +@@ -273,20 +278,40 @@ bool expr_field_parse(struct lexer *, const struct shash *symtab, + struct expr_field *, struct expr **prereqsp); + void expr_field_format(const struct expr_field *, struct ds *); + +-struct expr_symbol *expr_symtab_add_field(struct shash *symtab, +- const char *name, enum mf_field_id, +- const char *prereqs, +- bool must_crossproduct); +-struct expr_symbol *expr_symtab_add_subfield(struct shash *symtab, +- const char *name, +- const char *prereqs, +- const char *subfield); +-struct expr_symbol *expr_symtab_add_string(struct shash *symtab, +- const char *name, enum mf_field_id, +- const char *prereqs); ++struct expr_symbol *expr_symtab_add_field_scoped(struct shash *symtab, ++ const char *name, ++ enum mf_field_id, ++ const char *prereqs, ++ bool must_crossproduct, ++ enum expr_write_scope scope); ++ ++#define expr_symtab_add_field(SYMTAB, NAME, MF_FIELD_ID, PREREQS, \ ++ MUST_CROSSPRODUCT) \ ++ expr_symtab_add_field_scoped((SYMTAB), (NAME), (MF_FIELD_ID), (PREREQS), \ ++ (MUST_CROSSPRODUCT), WR_DEFAULT) ++ ++struct expr_symbol *expr_symtab_add_subfield_scoped(struct shash *symtab, ++ const char *name, const char *prereqs, const char *subfield, ++ enum expr_write_scope scope); ++ ++#define expr_symtab_add_subfield(SYMTAB, NAME, PREREQS, SUBFIELD) \ ++ expr_symtab_add_subfield_scoped((SYMTAB), (NAME), (PREREQS), \ ++ (SUBFIELD), WR_DEFAULT) ++ ++struct expr_symbol *expr_symtab_add_string_scoped(struct shash *symtab, ++ const char *name, ++ enum mf_field_id, ++ const char *prereqs, ++ enum expr_write_scope scope); ++ ++#define expr_symtab_add_string(SYMTAB, NAME, MF_FIELD_ID, PREREQS) \ ++ expr_symtab_add_string_scoped((SYMTAB), (NAME), (MF_FIELD_ID), (PREREQS), \ ++ WR_DEFAULT) ++ + struct expr_symbol *expr_symtab_add_predicate(struct shash *symtab, + const char *name, + const char *expansion); ++ + struct expr_symbol *expr_symtab_add_ovn_field(struct shash *symtab, + const char *name, + enum ovn_field_id id); +@@ -452,7 +477,8 @@ void expr_matches_print(const struct hmap *matches, FILE *); + + /* Action parsing helper. */ + +-char *expr_type_check(const struct expr_field *, int n_bits, bool rw) ++char *expr_type_check(const struct expr_field *, int n_bits, bool rw, ++ enum expr_write_scope scope) + OVS_WARN_UNUSED_RESULT; + struct mf_subfield expr_resolve_field(const struct expr_field *); + +diff --git a/lib/actions.c b/lib/actions.c +index 23e334404..460ab0cf5 100644 +--- a/lib/actions.c ++++ b/lib/actions.c +@@ -195,6 +195,7 @@ struct action_context { + struct ofpbuf *ovnacts; /* Actions. */ + struct expr *prereqs; /* Prerequisites to apply to match. */ + int depth; /* Current nested action depth. */ ++ enum expr_write_scope scope; /* Current writeability scope */ + }; + + static void parse_actions(struct action_context *, enum lex_type sentinel); +@@ -207,7 +208,7 @@ action_parse_field(struct action_context *ctx, + return false; + } + +- char *error = expr_type_check(f, n_bits, rw); ++ char *error = expr_type_check(f, n_bits, rw, ctx->scope); + if (error) { + lexer_error(ctx->lexer, "%s", error); + free(error); +@@ -374,7 +375,7 @@ parse_LOAD(struct action_context *ctx, const struct expr_field *lhs) + + load->dst = *lhs; + +- char *error = expr_type_check(lhs, lhs->n_bits, true); ++ char *error = expr_type_check(lhs, lhs->n_bits, true, ctx->scope); + if (error) { + ctx->ovnacts->size = ofs; + lexer_error(ctx->lexer, "%s", error); +@@ -513,9 +514,9 @@ parse_assignment_action(struct action_context *ctx, bool exchange, + return; + } + +- char *error = expr_type_check(lhs, lhs->n_bits, true); ++ char *error = expr_type_check(lhs, lhs->n_bits, true, ctx->scope); + if (!error) { +- error = expr_type_check(&rhs, rhs.n_bits, exchange); ++ error = expr_type_check(&rhs, rhs.n_bits, exchange, ctx->scope); + } + if (error) { + lexer_error(ctx->lexer, "%s", error); +@@ -1186,7 +1187,8 @@ static void + parse_select_action(struct action_context *ctx, struct expr_field *res_field) + { + /* Check if the result field is modifiable. */ +- char *error = expr_type_check(res_field, res_field->n_bits, true); ++ char *error = expr_type_check(res_field, res_field->n_bits, true, ++ ctx->scope); + if (error) { + lexer_error(ctx->lexer, "%s", error); + free(error); +@@ -1337,7 +1339,7 @@ encode_CT_CLEAR(const struct ovnact_null *null OVS_UNUSED, + * actions on a packet derived from the one being processed. */ + static void + parse_nested_action(struct action_context *ctx, enum ovnact_type type, +- const char *prereq) ++ const char *prereq, enum expr_write_scope scope) + { + if (!lexer_force_match(ctx->lexer, LEX_T_LCURLY)) { + return; +@@ -1357,6 +1359,7 @@ parse_nested_action(struct action_context *ctx, enum ovnact_type type, + .ovnacts = &nested, + .prereqs = NULL, + .depth = ctx->depth + 1, ++ .scope = scope, + }; + parse_actions(&inner_ctx, LEX_T_RCURLY); + +@@ -1387,61 +1390,61 @@ parse_nested_action(struct action_context *ctx, enum ovnact_type type, + static void + parse_ARP(struct action_context *ctx) + { +- parse_nested_action(ctx, OVNACT_ARP, "ip4"); ++ parse_nested_action(ctx, OVNACT_ARP, "ip4", ctx->scope); + } + + static void + parse_ICMP4(struct action_context *ctx) + { +- parse_nested_action(ctx, OVNACT_ICMP4, "ip4"); ++ parse_nested_action(ctx, OVNACT_ICMP4, "ip4", ctx->scope); + } + + static void + parse_ICMP4_ERROR(struct action_context *ctx) + { +- parse_nested_action(ctx, OVNACT_ICMP4_ERROR, "ip4"); ++ parse_nested_action(ctx, OVNACT_ICMP4_ERROR, "ip4", ctx->scope); + } + + static void + parse_ICMP6(struct action_context *ctx) + { +- parse_nested_action(ctx, OVNACT_ICMP6, "ip6"); ++ parse_nested_action(ctx, OVNACT_ICMP6, "ip6", ctx->scope); + } + + static void + parse_ICMP6_ERROR(struct action_context *ctx) + { +- parse_nested_action(ctx, OVNACT_ICMP6_ERROR, "ip6"); ++ parse_nested_action(ctx, OVNACT_ICMP6_ERROR, "ip6", ctx->scope); + } + + static void + parse_TCP_RESET(struct action_context *ctx) + { +- parse_nested_action(ctx, OVNACT_TCP_RESET, "tcp"); ++ parse_nested_action(ctx, OVNACT_TCP_RESET, "tcp", ctx->scope); + } + + static void + parse_ND_NA(struct action_context *ctx) + { +- parse_nested_action(ctx, OVNACT_ND_NA, "nd_ns"); ++ parse_nested_action(ctx, OVNACT_ND_NA, "nd_ns", ctx->scope); + } + + static void + parse_ND_NA_ROUTER(struct action_context *ctx) + { +- parse_nested_action(ctx, OVNACT_ND_NA_ROUTER, "nd_ns"); ++ parse_nested_action(ctx, OVNACT_ND_NA_ROUTER, "nd_ns", ctx->scope); + } + + static void + parse_ND_NS(struct action_context *ctx) + { +- parse_nested_action(ctx, OVNACT_ND_NS, "ip6"); ++ parse_nested_action(ctx, OVNACT_ND_NS, "ip6", ctx->scope); + } + + static void + parse_CLONE(struct action_context *ctx) + { +- parse_nested_action(ctx, OVNACT_CLONE, NULL); ++ parse_nested_action(ctx, OVNACT_CLONE, NULL, WR_DEFAULT); + } + + static void +@@ -1947,7 +1950,7 @@ parse_lookup_mac_bind(struct action_context *ctx, + struct ovnact_lookup_mac_bind *lookup_mac) + { + /* Validate that the destination is a 1-bit, modifiable field. */ +- char *error = expr_type_check(dst, 1, true); ++ char *error = expr_type_check(dst, 1, true, ctx->scope); + if (error) { + lexer_error(ctx->lexer, "%s", error); + free(error); +@@ -2053,7 +2056,7 @@ parse_lookup_mac_bind_ip(struct action_context *ctx, + struct ovnact_lookup_mac_bind_ip *lookup_mac) + { + /* Validate that the destination is a 1-bit, modifiable field. */ +- char *error = expr_type_check(dst, 1, true); ++ char *error = expr_type_check(dst, 1, true, ctx->scope); + if (error) { + lexer_error(ctx->lexer, "%s", error); + free(error); +@@ -2283,7 +2286,7 @@ parse_put_opts(struct action_context *ctx, const struct expr_field *dst, + lexer_get(ctx->lexer); /* Skip '('. */ + + /* Validate that the destination is a 1-bit, modifiable field. */ +- char *error = expr_type_check(dst, 1, true); ++ char *error = expr_type_check(dst, 1, true, ctx->scope); + if (error) { + lexer_error(ctx->lexer, "%s", error); + free(error); +@@ -2680,7 +2683,7 @@ parse_dns_lookup(struct action_context *ctx, const struct expr_field *dst, + return; + } + /* Validate that the destination is a 1-bit, modifiable field. */ +- char *error = expr_type_check(dst, 1, true); ++ char *error = expr_type_check(dst, 1, true, ctx->scope); + if (error) { + lexer_error(ctx->lexer, "%s", error); + free(error); +@@ -3205,7 +3208,7 @@ parse_check_pkt_larger(struct action_context *ctx, + struct ovnact_check_pkt_larger *cipl) + { + /* Validate that the destination is a 1-bit, modifiable field. */ +- char *error = expr_type_check(dst, 1, true); ++ char *error = expr_type_check(dst, 1, true, ctx->scope); + if (error) { + lexer_error(ctx->lexer, "%s", error); + free(error); +@@ -3677,6 +3680,7 @@ ovnacts_parse(struct lexer *lexer, const struct ovnact_parse_params *pp, + .lexer = lexer, + .ovnacts = ovnacts, + .prereqs = NULL, ++ .scope = WR_DEFAULT, + }; + if (!lexer->error) { + parse_actions(&ctx, LEX_T_END); +diff --git a/lib/expr.c b/lib/expr.c +index 497b2accc..c07e7dd4d 100644 +--- a/lib/expr.c ++++ b/lib/expr.c +@@ -1447,7 +1447,7 @@ expr_symbol_format(const struct expr_symbol *symbol, struct ds *s) + static struct expr_symbol * + add_symbol(struct shash *symtab, const char *name, int width, + const char *prereqs, enum expr_level level, +- bool must_crossproduct, bool rw) ++ bool must_crossproduct, enum expr_write_scope rw) + { + struct expr_symbol *symbol = xzalloc(sizeof *symbol); + symbol->name = xstrdup(name); +@@ -1471,9 +1471,10 @@ add_symbol(struct shash *symtab, const char *name, int width, + * Use subfields to duplicate or subset a field (you can even make a subfield + * include all the bits of the "parent" field if you like). */ + struct expr_symbol * +-expr_symtab_add_field(struct shash *symtab, const char *name, +- enum mf_field_id id, const char *prereqs, +- bool must_crossproduct) ++expr_symtab_add_field_scoped(struct shash *symtab, const char *name, ++ enum mf_field_id id, const char *prereqs, ++ bool must_crossproduct, ++ enum expr_write_scope scope) + { + const struct mf_field *field = mf_from_id(id); + struct expr_symbol *symbol; +@@ -1482,7 +1483,8 @@ expr_symtab_add_field(struct shash *symtab, const char *name, + (field->maskable == MFM_FULLY + ? EXPR_L_ORDINAL + : EXPR_L_NOMINAL), +- must_crossproduct, field->writable); ++ must_crossproduct, ++ field->writable ? scope : 0); + symbol->field = field; + return symbol; + } +@@ -1511,8 +1513,9 @@ parse_field_from_string(const char *s, const struct shash *symtab, + * 'subfield' must describe the subfield as a string, e.g. "vlan.tci[0..11]" + * for the low 12 bits of a larger field named "vlan.tci". */ + struct expr_symbol * +-expr_symtab_add_subfield(struct shash *symtab, const char *name, +- const char *prereqs, const char *subfield) ++expr_symtab_add_subfield_scoped(struct shash *symtab, const char *name, ++ const char *prereqs, const char *subfield, ++ enum expr_write_scope scope) + { + struct expr_symbol *symbol; + struct expr_field f; +@@ -1531,7 +1534,7 @@ expr_symtab_add_subfield(struct shash *symtab, const char *name, + } + + symbol = add_symbol(symtab, name, f.n_bits, prereqs, level, false, +- f.symbol->rw); ++ f.symbol->rw ? scope : 0); + symbol->parent = f.symbol; + symbol->parent_ofs = f.ofs; + return symbol; +@@ -1540,14 +1543,15 @@ expr_symtab_add_subfield(struct shash *symtab, const char *name, + /* Adds a string-valued symbol named 'name' to 'symtab' with the specified + * 'prereqs'. */ + struct expr_symbol * +-expr_symtab_add_string(struct shash *symtab, const char *name, +- enum mf_field_id id, const char *prereqs) ++expr_symtab_add_string_scoped(struct shash *symtab, const char *name, ++ enum mf_field_id id, const char *prereqs, ++ enum expr_write_scope scope) + { + const struct mf_field *field = mf_from_id(id); + struct expr_symbol *symbol; + + symbol = add_symbol(symtab, name, 0, prereqs, EXPR_L_NOMINAL, false, +- field->writable); ++ field->writable ? scope : 0); + symbol->field = field; + return symbol; + } +@@ -1610,7 +1614,7 @@ expr_symtab_add_predicate(struct shash *symtab, const char *name, + return NULL; + } + +- symbol = add_symbol(symtab, name, 1, NULL, level, false, false); ++ symbol = add_symbol(symtab, name, 1, NULL, level, false, 0); + symbol->predicate = xstrdup(expansion); + return symbol; + } +@@ -1623,7 +1627,7 @@ expr_symtab_add_ovn_field(struct shash *symtab, const char *name, + struct expr_symbol *symbol; + + symbol = add_symbol(symtab, name, ovn_field->n_bits, NULL, +- EXPR_L_NOMINAL, false, true); ++ EXPR_L_NOMINAL, false, UINT32_MAX); + symbol->ovn_field = ovn_field; + return symbol; + } +@@ -3322,7 +3326,8 @@ expr_evaluate(const struct expr *e, const struct flow *uflow, + * if 'f' is acceptable, otherwise a malloc()'d error message that the caller + * must free(). */ + char * OVS_WARN_UNUSED_RESULT +-expr_type_check(const struct expr_field *f, int n_bits, bool rw) ++expr_type_check(const struct expr_field *f, int n_bits, bool rw, ++ uint32_t write_scope) + { + if (n_bits != f->n_bits) { + if (n_bits && f->n_bits) { +@@ -3340,7 +3345,7 @@ expr_type_check(const struct expr_field *f, int n_bits, bool rw) + } + } + +- if (rw && !f->symbol->rw) { ++ if (rw && !(f->symbol->rw & write_scope)) { + return xasprintf("Field %s is not modifiable.", f->symbol->name); + } + +-- +2.26.2 + diff --git a/SOURCES/0012-Used-nested-actions-in-ct_commit.patch b/SOURCES/0012-Used-nested-actions-in-ct_commit.patch new file mode 100644 index 0000000..4c8db8e --- /dev/null +++ b/SOURCES/0012-Used-nested-actions-in-ct_commit.patch @@ -0,0 +1,401 @@ +From b74a558d0a5e4c217f966d9615611e3dca1d6c23 Mon Sep 17 00:00:00 2001 +From: Mark Michelson +Date: Mon, 27 Jul 2020 16:11:56 -0400 +Subject: [PATCH 12/22] Used nested actions in ct_commit + +ct_commit allows for ct_label and ct_mark to be set within. However, +there are some restrictions with the current implementation: + +* It is not possible to address the indiviual bits within the ct_mark or + ct_label. +* It is not possible to set these to the value of a register. Only + explicit integer setting can be used. + +With this change, ct_commit now can have arbitrary nested actions +inside. This makes it similar to how the "exec" option works in OVS's +ct() action. + +ct_commit now also sets a writeability scope so that ct_mark and +ct_label are the only symbols that are writeable. The positive side +effect is that ct_mark and ct_label are no longer writeable except for +inside ct_commit. + +In this commit, the only noticeable effect is that it allows for +slightly more expressive setting of ct_label.blocked. A future commit +will take further advantage of this. + +Signed-off-by: Mark Michelson +--- + include/ovn/actions.h | 9 +--- + include/ovn/expr.h | 1 + + lib/actions.c | 110 +++++++----------------------------------- + lib/logical-fields.c | 9 ++-- + northd/ovn-northd.c | 8 +-- + ovn-sb.xml | 11 +++-- + tests/ovn.at | 59 ++++++++++++---------- + 7 files changed, 72 insertions(+), 135 deletions(-) + +diff --git a/include/ovn/actions.h b/include/ovn/actions.h +index 34ba0d880..636cb4bc1 100644 +--- a/include/ovn/actions.h ++++ b/include/ovn/actions.h +@@ -57,7 +57,7 @@ struct ovn_extend_table; + OVNACT(EXCHANGE, ovnact_move) \ + OVNACT(DEC_TTL, ovnact_null) \ + OVNACT(CT_NEXT, ovnact_ct_next) \ +- OVNACT(CT_COMMIT, ovnact_ct_commit) \ ++ OVNACT(CT_COMMIT, ovnact_nest) \ + OVNACT(CT_DNAT, ovnact_ct_nat) \ + OVNACT(CT_SNAT, ovnact_ct_nat) \ + OVNACT(CT_LB, ovnact_ct_lb) \ +@@ -222,13 +222,6 @@ struct ovnact_ct_next { + uint8_t ltable; /* Logical table ID of next table. */ + }; + +-/* OVNACT_CT_COMMIT. */ +-struct ovnact_ct_commit { +- struct ovnact ovnact; +- uint32_t ct_mark, ct_mark_mask; +- ovs_be128 ct_label, ct_label_mask; +-}; +- + /* OVNACT_CT_DNAT, OVNACT_CT_SNAT. */ + struct ovnact_ct_nat { + struct ovnact ovnact; +diff --git a/include/ovn/expr.h b/include/ovn/expr.h +index 11bfdad5b..b34fb0e81 100644 +--- a/include/ovn/expr.h ++++ b/include/ovn/expr.h +@@ -85,6 +85,7 @@ enum expr_level { + + enum expr_write_scope { + WR_DEFAULT = (1 << 0), /* Writeable at "global" level */ ++ WR_CT_COMMIT = (1 << 1), /* Writeable in "ct_commit" action */ + }; + + const char *expr_level_to_string(enum expr_level); +diff --git a/lib/actions.c b/lib/actions.c +index 460ab0cf5..79ac79a95 100644 +--- a/lib/actions.c ++++ b/lib/actions.c +@@ -200,6 +200,15 @@ struct action_context { + + static void parse_actions(struct action_context *, enum lex_type sentinel); + ++static void parse_nested_action(struct action_context *ctx, ++ enum ovnact_type type, ++ const char *prereq, ++ enum expr_write_scope scope); ++ ++static void format_nested_action(const struct ovnact_nest *on, ++ const char *name, ++ struct ds *s); ++ + static bool + action_parse_field(struct action_context *ctx, + int n_bits, bool rw, struct expr_field *f) +@@ -618,125 +627,42 @@ ovnact_ct_next_free(struct ovnact_ct_next *a OVS_UNUSED) + { + } + +-static void +-parse_ct_commit_arg(struct action_context *ctx, +- struct ovnact_ct_commit *cc) +-{ +- if (lexer_match_id(ctx->lexer, "ct_mark")) { +- if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { +- return; +- } +- if (ctx->lexer->token.type == LEX_T_INTEGER) { +- cc->ct_mark = ntohll(ctx->lexer->token.value.integer); +- cc->ct_mark_mask = UINT32_MAX; +- } else if (ctx->lexer->token.type == LEX_T_MASKED_INTEGER) { +- cc->ct_mark = ntohll(ctx->lexer->token.value.integer); +- cc->ct_mark_mask = ntohll(ctx->lexer->token.mask.integer); +- } else { +- lexer_syntax_error(ctx->lexer, "expecting integer"); +- return; +- } +- lexer_get(ctx->lexer); +- } else if (lexer_match_id(ctx->lexer, "ct_label")) { +- if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { +- return; +- } +- if (ctx->lexer->token.type == LEX_T_INTEGER) { +- cc->ct_label = ctx->lexer->token.value.be128_int; +- cc->ct_label_mask = OVS_BE128_MAX; +- } else if (ctx->lexer->token.type == LEX_T_MASKED_INTEGER) { +- cc->ct_label = ctx->lexer->token.value.be128_int; +- cc->ct_label_mask = ctx->lexer->token.mask.be128_int; +- } else { +- lexer_syntax_error(ctx->lexer, "expecting integer"); +- return; +- } +- lexer_get(ctx->lexer); +- } else { +- lexer_syntax_error(ctx->lexer, NULL); +- } +-} +- + static void + parse_CT_COMMIT(struct action_context *ctx) + { +- add_prerequisite(ctx, "ip"); + +- struct ovnact_ct_commit *ct_commit = ovnact_put_CT_COMMIT(ctx->ovnacts); +- if (lexer_match(ctx->lexer, LEX_T_LPAREN)) { +- while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) { +- parse_ct_commit_arg(ctx, ct_commit); +- if (ctx->lexer->error) { +- return; +- } +- lexer_match(ctx->lexer, LEX_T_COMMA); +- } +- } ++ parse_nested_action(ctx, OVNACT_CT_COMMIT, "ip", ++ WR_CT_COMMIT); + } + + static void +-format_CT_COMMIT(const struct ovnact_ct_commit *cc, struct ds *s) ++format_CT_COMMIT(const struct ovnact_nest *on, struct ds *s) + { +- ds_put_cstr(s, "ct_commit("); +- if (cc->ct_mark_mask) { +- ds_put_format(s, "ct_mark=%#"PRIx32, cc->ct_mark); +- if (cc->ct_mark_mask != UINT32_MAX) { +- ds_put_format(s, "/%#"PRIx32, cc->ct_mark_mask); +- } +- } +- if (!ovs_be128_is_zero(cc->ct_label_mask)) { +- if (ds_last(s) != '(') { +- ds_put_cstr(s, ", "); +- } +- +- ds_put_format(s, "ct_label="); +- ds_put_hex(s, &cc->ct_label, sizeof cc->ct_label); +- if (!ovs_be128_equals(cc->ct_label_mask, OVS_BE128_MAX)) { +- ds_put_char(s, '/'); +- ds_put_hex(s, &cc->ct_label_mask, sizeof cc->ct_label_mask); +- } +- } +- if (!ds_chomp(s, '(')) { +- ds_put_char(s, ')'); +- } +- ds_put_char(s, ';'); ++ format_nested_action(on, "ct_commit", s); + } + + static void +-encode_CT_COMMIT(const struct ovnact_ct_commit *cc, ++encode_CT_COMMIT(const struct ovnact_nest *on, + const struct ovnact_encode_params *ep OVS_UNUSED, + struct ofpbuf *ofpacts) + { + struct ofpact_conntrack *ct = ofpact_put_CT(ofpacts); + ct->flags = NX_CT_F_COMMIT; + ct->recirc_table = NX_CT_RECIRC_NONE; +- ct->zone_src.field = mf_from_id(MFF_LOG_CT_ZONE); ++ ct->zone_src.field = ep->is_switch ++ ? mf_from_id(MFF_LOG_CT_ZONE) ++ : mf_from_id(MFF_LOG_DNAT_ZONE); + ct->zone_src.ofs = 0; + ct->zone_src.n_bits = 16; + + size_t set_field_offset = ofpacts->size; + ofpbuf_pull(ofpacts, set_field_offset); + +- if (cc->ct_mark_mask) { +- const ovs_be32 value = htonl(cc->ct_mark); +- const ovs_be32 mask = htonl(cc->ct_mark_mask); +- ofpact_put_set_field(ofpacts, mf_from_id(MFF_CT_MARK), &value, &mask); +- } +- +- if (!ovs_be128_is_zero(cc->ct_label_mask)) { +- ofpact_put_set_field(ofpacts, mf_from_id(MFF_CT_LABEL), &cc->ct_label, +- &cc->ct_label_mask); +- } +- ++ ovnacts_encode(on->nested, on->nested_len, ep, ofpacts); + ofpacts->header = ofpbuf_push_uninit(ofpacts, set_field_offset); + ct = ofpacts->header; + ofpact_finish(ofpacts, &ct->ofpact); + } +- +-static void +-ovnact_ct_commit_free(struct ovnact_ct_commit *cc OVS_UNUSED) +-{ +-} + + static void + parse_ct_nat(struct action_context *ctx, const char *name, +diff --git a/lib/logical-fields.c b/lib/logical-fields.c +index 8639523ea..fde53a47e 100644 +--- a/lib/logical-fields.c ++++ b/lib/logical-fields.c +@@ -123,10 +123,13 @@ ovn_init_symtab(struct shash *symtab) + flags_str); + + /* Connection tracking state. */ +- expr_symtab_add_field(symtab, "ct_mark", MFF_CT_MARK, NULL, false); ++ expr_symtab_add_field_scoped(symtab, "ct_mark", MFF_CT_MARK, NULL, false, ++ WR_CT_COMMIT); + +- expr_symtab_add_field(symtab, "ct_label", MFF_CT_LABEL, NULL, false); +- expr_symtab_add_subfield(symtab, "ct_label.blocked", NULL, "ct_label[0]"); ++ expr_symtab_add_field_scoped(symtab, "ct_label", MFF_CT_LABEL, NULL, false, ++ WR_CT_COMMIT); ++ expr_symtab_add_subfield_scoped(symtab, "ct_label.blocked", NULL, ++ "ct_label[0]", WR_CT_COMMIT); + + expr_symtab_add_field(symtab, "ct_state", MFF_CT_STATE, NULL, false); + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 6375aee8d..44e7d9365 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -5356,7 +5356,7 @@ consider_acl(struct hmap *lflows, struct ovn_datapath *od, + ds_clear(&match); + ds_clear(&actions); + ds_put_cstr(&match, "ct.est && ct_label.blocked == 0"); +- ds_put_cstr(&actions, "ct_commit(ct_label=1/1); "); ++ ds_put_cstr(&actions, "ct_commit { ct_label.blocked = 1; }; "); + if (!strcmp(acl->action, "reject")) { + build_reject_acl_rules(od, lflows, stage, acl, &match, + &actions, &acl->header_); +@@ -5880,9 +5880,11 @@ build_stateful(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs) + * any packet that makes it this far is part of a connection we + * want to allow to continue. */ + ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100, +- REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;"); ++ REGBIT_CONNTRACK_COMMIT" == 1", ++ "ct_commit { ct_label.blocked = 0; }; next;"); + ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100, +- REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;"); ++ REGBIT_CONNTRACK_COMMIT" == 1", ++ "ct_commit { ct_label.blocked = 0; }; next;"); + + /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent + * through nat (without committing). +diff --git a/ovn-sb.xml b/ovn-sb.xml +index fc39b2d03..a74d9c3ea 100644 +--- a/ovn-sb.xml ++++ b/ovn-sb.xml +@@ -1261,10 +1261,10 @@ +

            + + +-
            ct_commit;
            +-
            ct_commit(ct_mark=value[/mask]);
            +-
            ct_commit(ct_label=value[/mask]);
            +-
            ct_commit(ct_mark=value[/mask], ct_label=value[/mask]);
            ++
            ct_commit { };
            ++
            ct_commit { ct_mark=value[/mask]; };
            ++
            ct_commit { ct_label=value[/mask]; };
            ++
            ct_commit { ct_mark=value[/mask]; ct_label=value[/mask]; };
            +
            +

            + Commit the flow to the connection tracking entry associated with it +@@ -1276,6 +1276,9 @@ + tracking entry. ct_mark is a 32-bit field. + ct_label is a 128-bit field. The value[/mask] + should be specified in hex string if more than 64bits are to be used. ++ Registers and other named fields can be used for value. ++ ct_mark and ct_label may be sub-addressed ++ in order to have specific bits set. +

            + +

            +diff --git a/tests/ovn.at b/tests/ovn.at +index 905112a8d..4c68b77d8 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -1045,51 +1045,60 @@ ct_next; + has prereqs ip + + # ct_commit +-ct_commit; ++ct_commit { }; ++ formats as ct_commit { drop; }; + encodes as ct(commit,zone=NXM_NX_REG13[0..15]) + has prereqs ip +-ct_commit(); +- formats as ct_commit; +- encodes as ct(commit,zone=NXM_NX_REG13[0..15]) +- has prereqs ip +-ct_commit(ct_mark=1); +- formats as ct_commit(ct_mark=0x1); ++ct_commit { ct_mark=1; }; ++ formats as ct_commit { ct_mark = 1; }; + encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0x1->ct_mark)) + has prereqs ip +-ct_commit(ct_mark=1/1); +- formats as ct_commit(ct_mark=0x1/0x1); ++ct_commit { ct_mark=1/1; }; ++ formats as ct_commit { ct_mark = 1/1; }; + encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0x1/0x1->ct_mark)) + has prereqs ip +-ct_commit(ct_label=1); +- formats as ct_commit(ct_label=0x1); ++ct_commit { ct_label=1; }; ++ formats as ct_commit { ct_label = 1; }; + encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0x1->ct_label)) + has prereqs ip +-ct_commit(ct_label=1/1); +- formats as ct_commit(ct_label=0x1/0x1); ++ct_commit { ct_label=1/1; }; ++ formats as ct_commit { ct_label = 1/1; }; + encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0x1/0x1->ct_label)) + has prereqs ip +-ct_commit(ct_mark=1, ct_label=2); +- formats as ct_commit(ct_mark=0x1, ct_label=0x2); ++ct_commit { ct_mark=1; ct_label=2; }; ++ formats as ct_commit { ct_mark = 1; ct_label = 2; }; + encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0x1->ct_mark,set_field:0x2->ct_label)) + has prereqs ip + +-ct_commit(ct_label=0x01020304050607080910111213141516); +- formats as ct_commit(ct_label=0x1020304050607080910111213141516); ++ct_commit { ct_label=0x01020304050607080910111213141516; }; ++ formats as ct_commit { ct_label = 0x1020304050607080910111213141516; }; + encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0x1020304050607080910111213141516->ct_label)) + has prereqs ip +-ct_commit(ct_label=0x181716151413121110090807060504030201); +- formats as ct_commit(ct_label=0x16151413121110090807060504030201); +- encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0x16151413121110090807060504030201->ct_label)) +- has prereqs ip +-ct_commit(ct_label=0x1000000000000000000000000000000/0x1000000000000000000000000000000); ++ct_commit { ct_label=0x1000000000000000000000000000000/0x1000000000000000000000000000000; }; ++ formats as ct_commit { ct_label = 0x1000000000000000000000000000000/0x1000000000000000000000000000000; }; + encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0x1000000000000000000000000000000/0x1000000000000000000000000000000->ct_label)) + has prereqs ip +-ct_commit(ct_label=18446744073709551615); +- formats as ct_commit(ct_label=0xffffffffffffffff); ++ct_commit { ct_label=18446744073709551615; }; ++ formats as ct_commit { ct_label = 18446744073709551615; }; + encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0xffffffffffffffff->ct_label)) + has prereqs ip +-ct_commit(ct_label=18446744073709551616); ++ct_commit { ct_label[0..47] = 0x00000f040201; ct_label[48..63] = 0x0002; }; ++ formats as ct_commit { ct_label[0..47] = 0xf040201; ct_label[48..63] = 0x2; }; ++ encodes as ct(commit,zone=NXM_NX_REG13[0..15],exec(set_field:0xf040201/0xffffffffffff->ct_label,set_field:0x2000000000000/0xffff000000000000->ct_label)) ++ has prereqs ip ++ct_commit { ct_label=18446744073709551616; }; + Decimal constants must be less than 2**64. ++ct_commit { ct_label=0x181716151413121110090807060504030201; }; ++ 141-bit constant is not compatible with 128-bit field ct_label. ++ct_commit { ip4.dst = 192.168.0.1; }; ++ Field ip4.dst is not modifiable. ++ ++ct_mark = 12345 ++ Field ct_mark is not modifiable. ++ct_label = 0xcafe ++ Field ct_label is not modifiable. ++ct_label.blocked = 1/1 ++ Field ct_label.blocked is not modifiable. + + # ct_dnat + ct_dnat; +-- +2.26.2 + diff --git a/SOURCES/0013-Add-ECMP-symmetric-replies.patch b/SOURCES/0013-Add-ECMP-symmetric-replies.patch new file mode 100644 index 0000000..6a0a858 --- /dev/null +++ b/SOURCES/0013-Add-ECMP-symmetric-replies.patch @@ -0,0 +1,922 @@ +From 750e47ec508977af7bb37e9d0c98dd13984e9002 Mon Sep 17 00:00:00 2001 +From: Mark Michelson +Date: Mon, 20 Jul 2020 15:01:32 -0400 +Subject: [PATCH 13/22] Add ECMP symmetric replies. + +When traffic arrives over an ECMP route, there is no guarantee that the +reply traffic will egress over the same route. Sometimes, the nature of +the traffic (or the intervening equipment) means that it is important +for reply traffic to go out the same route it came in. + +This commit introduces optional ECMP symmetric reply behavior. If +configured, then traffic to or from the ECMP route will be sent to +conntrack. New incoming traffic over the route will have the source MAC +address and incoming port saved in the ct_label. Reply traffic then uses +this saved information to send the packet back out the same way it came +in. + +To facilitate this, a new table was added to the ingress logical router +pipeline. The ECMP_STATEFUL table is responsible for committing to +conntrack and setting the ct_label when it detects new incoming traffic +from the route. + +Since ingress pipeline logic on the logical router depends on ct state +of a particular hypervisor, this feature is only usable on gateway +routers. + +Change-Id: I6e5177a6de2258286869114ab6b4028667fee009 +Signed-off-by: Mark Michelson +Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1849683 +Acked-by: Numan Siddique +--- + lib/logical-fields.c | 4 + + northd/ovn-northd.8.xml | 49 +++++++++--- + northd/ovn-northd.c | 123 ++++++++++++++++++++++++++---- + ovn-architecture.7.xml | 7 +- + ovn-nb.ovsschema | 7 +- + ovn-nb.xml | 16 ++++ + tests/ovn.at | 152 ++++++++++++++++++++++++++++++++++---- + tests/system-ovn.at | 143 +++++++++++++++++++++++++++++++++++ + utilities/ovn-nbctl.8.xml | 31 ++++++-- + utilities/ovn-nbctl.c | 18 ++++- + 10 files changed, 496 insertions(+), 54 deletions(-) + +diff --git a/lib/logical-fields.c b/lib/logical-fields.c +index fde53a47e..15342dded 100644 +--- a/lib/logical-fields.c ++++ b/lib/logical-fields.c +@@ -130,6 +130,10 @@ ovn_init_symtab(struct shash *symtab) + WR_CT_COMMIT); + expr_symtab_add_subfield_scoped(symtab, "ct_label.blocked", NULL, + "ct_label[0]", WR_CT_COMMIT); ++ expr_symtab_add_subfield_scoped(symtab, "ct_label.ecmp_reply_eth", NULL, ++ "ct_label[32..79]", WR_CT_COMMIT); ++ expr_symtab_add_subfield_scoped(symtab, "ct_label.ecmp_reply_port", NULL, ++ "ct_label[80..95]", WR_CT_COMMIT); + + expr_symtab_add_field(symtab, "ct_state", MFF_CT_STATE, NULL, false); + +diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml +index 623768988..f35a035fd 100644 +--- a/northd/ovn-northd.8.xml ++++ b/northd/ovn-northd.8.xml +@@ -2175,15 +2175,31 @@ icmp6 { +

            + This is to send packets to connection tracker for tracking and + defragmentation. It contains a priority-0 flow that simply moves traffic +- to the next table. If load balancing rules with virtual IP addresses +- (and ports) are configured in OVN_Northbound database for a +- Gateway router, a priority-100 flow is added for each configured virtual +- IP address VIP. For IPv4 VIPs the flow matches +- ip && ip4.dst == VIP. For IPv6 +- VIPs, the flow matches ip && ip6.dst == +- VIP. The flow uses the action ct_next; +- to send IP packets to the connection tracker for packet de-fragmentation +- and tracking before sending it to the next table. ++ to the next table. ++

            ++ ++

            ++ If load balancing rules with virtual IP addresses (and ports) are ++ configured in OVN_Northbound database for a Gateway router, ++ a priority-100 flow is added for each configured virtual IP address ++ VIP. For IPv4 VIPs the flow matches ip ++ && ip4.dst == VIP. For IPv6 VIPs, ++ the flow matches ip && ip6.dst == VIP. ++ The flow uses the action ct_next; to send IP packets to the ++ connection tracker for packet de-fragmentation and tracking before ++ sending it to the next table. ++

            ++ ++

            ++ If ECMP routes with symmetric reply are configured in the ++ OVN_Northbound database for a gateway router, a priority-100 ++ flow is added for each router port on which symmetric replies are ++ configured. The matching logic for these ports essentially reverses the ++ configured logic of the ECMP route. So for instance, a route with a ++ destination routing policy will instead match if the source IP address ++ matches the static route's prefix. The flow uses the action ++ ct_next to send IP packets to the connection tracker for ++ packet de-fragmentation and tracking before sending it to the next table. +

            + +

            Ingress Table 5: UNSNAT

            +@@ -2544,7 +2560,15 @@ output; + table. This table, instead, is responsible for determine the ECMP + group id and select a member id within the group based on 5-tuple + hashing. It stores group id in reg8[0..15] and member id in +- reg8[16..31]. ++ reg8[16..31]. This step is skipped if the traffic going ++ out the ECMP route is reply traffic, and the ECMP route was configured ++ to use symmetric replies. Instead, the stored ct_label value ++ is used to choose the destination. The least significant 48 bits of the ++ ct_label tell the destination MAC address to which the ++ packet should be sent. The next 16 bits tell the logical router port on ++ which the packet should be sent. These values in the ++ ct_label are set when the initial ingress traffic is ++ received over the ECMP route. +

            + +

            +@@ -2694,6 +2718,11 @@ select(reg8[16..31], MID1, MID2, ...); + address and reg1 as the source protocol address). +

            + ++

            ++ This processing is skipped for reply traffic being sent out of an ECMP ++ route if the route was configured to use symmetric replies. ++

            ++ +

            + This table contains the following logical flows: +

            +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 44e7d9365..cb8e25bdf 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -171,16 +171,17 @@ enum ovn_stage { + PIPELINE_STAGE(ROUTER, IN, DEFRAG, 4, "lr_in_defrag") \ + PIPELINE_STAGE(ROUTER, IN, UNSNAT, 5, "lr_in_unsnat") \ + PIPELINE_STAGE(ROUTER, IN, DNAT, 6, "lr_in_dnat") \ +- PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 7, "lr_in_nd_ra_options") \ +- PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 8, "lr_in_nd_ra_response") \ +- PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 9, "lr_in_ip_routing") \ +- PIPELINE_STAGE(ROUTER, IN, IP_ROUTING_ECMP, 10, "lr_in_ip_routing_ecmp") \ +- PIPELINE_STAGE(ROUTER, IN, POLICY, 11, "lr_in_policy") \ +- PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 12, "lr_in_arp_resolve") \ +- PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN , 13, "lr_in_chk_pkt_len") \ +- PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 14,"lr_in_larger_pkts") \ +- PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 15, "lr_in_gw_redirect") \ +- PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 16, "lr_in_arp_request") \ ++ PIPELINE_STAGE(ROUTER, IN, ECMP_STATEFUL, 7, "lr_in_ecmp_stateful") \ ++ PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 8, "lr_in_nd_ra_options") \ ++ PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 9, "lr_in_nd_ra_response") \ ++ PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 10, "lr_in_ip_routing") \ ++ PIPELINE_STAGE(ROUTER, IN, IP_ROUTING_ECMP, 11, "lr_in_ip_routing_ecmp") \ ++ PIPELINE_STAGE(ROUTER, IN, POLICY, 12, "lr_in_policy") \ ++ PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 13, "lr_in_arp_resolve") \ ++ PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN , 14, "lr_in_chk_pkt_len") \ ++ PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 15,"lr_in_larger_pkts") \ ++ PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 16, "lr_in_gw_redirect") \ ++ PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 17, "lr_in_arp_request") \ + \ + /* Logical router egress stages. */ \ + PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \ +@@ -7430,6 +7431,7 @@ struct parsed_route { + bool is_src_route; + uint32_t hash; + const struct nbrec_logical_router_static_route *route; ++ bool ecmp_symmetric_reply; + }; + + static uint32_t +@@ -7491,6 +7493,8 @@ parsed_routes_add(struct ovs_list *routes, + "src-ip")); + pr->hash = route_hash(pr); + pr->route = route; ++ pr->ecmp_symmetric_reply = smap_get_bool(&route->options, ++ "ecmp_symmetric_reply", false); + ovs_list_insert(routes, &pr->list_node); + return pr; + } +@@ -7739,18 +7743,95 @@ find_static_route_outport(struct ovn_datapath *od, struct hmap *ports, + return true; + } + ++static void ++add_ecmp_symmetric_reply_flows(struct hmap *lflows, ++ struct ovn_datapath *od, ++ const char *port_ip, ++ struct ovn_port *out_port, ++ const struct parsed_route *route, ++ struct ds *route_match) ++{ ++ const struct nbrec_logical_router_static_route *st_route = route->route; ++ struct ds match = DS_EMPTY_INITIALIZER; ++ struct ds actions = DS_EMPTY_INITIALIZER; ++ struct ds ecmp_reply = DS_EMPTY_INITIALIZER; ++ char *cidr = normalize_v46_prefix(&route->prefix, route->plen); ++ ++ /* If symmetric ECMP replies are enabled, then packets that arrive over ++ * an ECMP route need to go through conntrack. ++ */ ++ ds_put_format(&match, "inport == %s && ip%s.%s == %s", ++ out_port->json_key, ++ route->prefix.family == AF_INET ? "4" : "6", ++ route->is_src_route ? "dst" : "src", ++ cidr); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, 100, ++ ds_cstr(&match), "ct_next;", ++ &st_route->header_); ++ ++ /* And packets that go out over an ECMP route need conntrack */ ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, 100, ++ ds_cstr(route_match), "ct_next;", ++ &st_route->header_); ++ ++ /* Save src eth and inport in ct_label for packets that arrive over ++ * an ECMP route. ++ * ++ * NOTE: we purposely are not clearing match before this ++ * ds_put_cstr() call. The previous contents are needed. ++ */ ++ ds_put_cstr(&match, " && (ct.new && !ct.est)"); ++ ++ ds_put_format(&actions, "ct_commit { ct_label.ecmp_reply_eth = eth.src;" ++ " ct_label.ecmp_reply_port = %" PRId64 ";}; next;", ++ out_port->sb->tunnel_key); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 100, ++ ds_cstr(&match), ds_cstr(&actions), ++ &st_route->header_); ++ ++ /* Bypass ECMP selection if we already have ct_label information ++ * for where to route the packet. ++ */ ++ ds_put_format(&ecmp_reply, "ct.rpl && ct_label.ecmp_reply_port == %" ++ PRId64, out_port->sb->tunnel_key); ++ ds_clear(&match); ++ ds_put_format(&match, "%s && %s", ds_cstr(&ecmp_reply), ++ ds_cstr(route_match)); ++ ds_clear(&actions); ++ ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; " ++ "eth.src = %s; %sreg1 = %s; outport = %s; next;", ++ out_port->lrp_networks.ea_s, ++ route->prefix.family == AF_INET ? "" : "xx", ++ port_ip, out_port->json_key); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 100, ++ ds_cstr(&match), ds_cstr(&actions), ++ &st_route->header_); ++ ++ /* Egress reply traffic for symmetric ECMP routes skips router policies. */ ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY, 65535, ++ ds_cstr(&ecmp_reply), "next;", ++ &st_route->header_); ++ ++ ds_clear(&actions); ++ ds_put_cstr(&actions, "eth.dst = ct_label.ecmp_reply_eth; next;"); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ARP_RESOLVE, ++ 200, ds_cstr(&ecmp_reply), ++ ds_cstr(&actions), &st_route->header_); ++} ++ + static void + build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od, + struct hmap *ports, struct ecmp_groups_node *eg) + + { + bool is_ipv4 = (eg->prefix.family == AF_INET); +- struct ds match = DS_EMPTY_INITIALIZER; + uint16_t priority; ++ struct ecmp_route_list_node *er; ++ struct ds route_match = DS_EMPTY_INITIALIZER; + + char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen); + build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route, is_ipv4, +- &match, &priority); ++ &route_match, &priority); + free(prefix_s); + + struct ds actions = DS_EMPTY_INITIALIZER; +@@ -7758,7 +7839,6 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od, + "; %s = select(", REG_ECMP_GROUP_ID, eg->id, + REG_ECMP_MEMBER_ID); + +- struct ecmp_route_list_node *er; + bool is_first = true; + LIST_FOR_EACH (er, list_node, &eg->route_list) { + if (is_first) { +@@ -7772,11 +7852,12 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od, + ds_put_cstr(&actions, ");"); + + ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, priority, +- ds_cstr(&match), ds_cstr(&actions)); ++ ds_cstr(&route_match), ds_cstr(&actions)); + + /* Add per member flow */ ++ struct ds match = DS_EMPTY_INITIALIZER; ++ struct sset visited_ports = SSET_INITIALIZER(&visited_ports); + LIST_FOR_EACH (er, list_node, &eg->route_list) { +- + const struct parsed_route *route_ = er->route; + const struct nbrec_logical_router_static_route *route = route_->route; + /* Find the outgoing port. */ +@@ -7786,6 +7867,15 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od, + &out_port)) { + continue; + } ++ /* Symmetric ECMP reply is only usable on gateway routers. ++ * It is NOT usable on distributed routers with a gateway port. ++ */ ++ if (smap_get(&od->nbr->options, "chassis") && ++ route_->ecmp_symmetric_reply && sset_add(&visited_ports, ++ out_port->key)) { ++ add_ecmp_symmetric_reply_flows(lflows, od, lrp_addr_s, out_port, ++ route_, &route_match); ++ } + ds_clear(&match); + ds_put_format(&match, REG_ECMP_GROUP_ID" == %"PRIu16" && " + REG_ECMP_MEMBER_ID" == %"PRIu16, +@@ -7806,7 +7896,9 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od, + ds_cstr(&match), ds_cstr(&actions), + &route->header_); + } ++ sset_destroy(&visited_ports); + ds_destroy(&match); ++ ds_destroy(&route_match); + ds_destroy(&actions); + } + +@@ -9161,6 +9253,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;"); + ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;"); + ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;"); + + /* Send the IPv6 NS packets to next table. When ovn-controller + * generates IPv6 NS (for the action - nd_ns{}), the injected +diff --git a/ovn-architecture.7.xml b/ovn-architecture.7.xml +index 246cebc19..b1a462933 100644 +--- a/ovn-architecture.7.xml ++++ b/ovn-architecture.7.xml +@@ -1210,11 +1210,12 @@ +
            + Fields that denote the connection tracking zones for routers. These + values only have local significance and are not meaningful between +- chassis. OVN stores the zone information for DNATting in Open vSwitch ++ chassis. OVN stores the zone information for north to south traffic ++ (for DNATting or ECMP symmetric replies) in Open vSwitch + +- extension register number 11 and zone information for SNATing in +- Open vSwitch extension register number 12. ++ extension register number 11 and zone information for south to north ++ traffic (for SNATing) in Open vSwitch extension register number 12. +
            + +
            logical flow flags
            +diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema +index da9af7157..0c939b715 100644 +--- a/ovn-nb.ovsschema ++++ b/ovn-nb.ovsschema +@@ -1,7 +1,7 @@ + { + "name": "OVN_Northbound", +- "version": "5.24.0", +- "cksum": "1092394564 25961", ++ "version": "5.25.0", ++ "cksum": "1354137211 26116", + "tables": { + "NB_Global": { + "columns": { +@@ -365,6 +365,9 @@ + "min": 0, "max": 1}}, + "nexthop": {"type": "string"}, + "output_port": {"type": {"key": "string", "min": 0, "max": 1}}, ++ "options": { ++ "type": {"key": "string", "value": "string", ++ "min": 0, "max": "unlimited"}}, + "external_ids": { + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}}, +diff --git a/ovn-nb.xml b/ovn-nb.xml +index 02161372a..98d36b270 100644 +--- a/ovn-nb.xml ++++ b/ovn-nb.xml +@@ -2520,6 +2520,22 @@ + + + ++ ++ ++ This column provides general key/value settings. The supported ++ options are described individually below. ++ ++ ++ ++ It true, then new traffic that arrives over this route will have ++ its reply traffic bypass ECMP route selection and will be sent out ++ this route instead. Note that this option overrides any rules set ++ in the table. This option ++ only works on gateway routers (routers that have ++ set). ++ ++ ++ + + + +diff --git a/tests/ovn.at b/tests/ovn.at +index 4c68b77d8..b626bcfcc 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -192,6 +192,8 @@ ct.snat = ct_state[6] + ct.trk = ct_state[5] + ct_label = NXM_NX_CT_LABEL + ct_label.blocked = ct_label[0] ++ct_label.ecmp_reply_eth = ct_label[32..79] ++ct_label.ecmp_reply_port = ct_label[80..95] + ct_mark = NXM_NX_CT_MARK + ct_state = NXM_NX_CT_STATE + ]]) +@@ -16128,7 +16130,7 @@ ovn-sbctl dump-flows lr0 | grep lr_in_arp_resolve | grep "reg0 == 10.0.0.10" \ + # Since the sw0-vir is not claimed by any chassis, eth.dst should be set to + # zero if the ip4.dst is the virtual ip in the router pipeline. + AT_CHECK([cat lflows.txt], [0], [dnl +- table=12(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 00:00:00:00:00:00; next;) ++ table=13(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 00:00:00:00:00:00; next;) + ]) + + ip_to_hex() { +@@ -16179,7 +16181,7 @@ ovn-sbctl dump-flows lr0 | grep lr_in_arp_resolve | grep "reg0 == 10.0.0.10" \ + # There should be an arp resolve flow to resolve the virtual_ip with the + # sw0-p1's MAC. + AT_CHECK([cat lflows.txt], [0], [dnl +- table=12(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:03; next;) ++ table=13(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:03; next;) + ]) + + # Forcibly clear virtual_parent. ovn-controller should release the binding +@@ -16220,7 +16222,7 @@ ovn-sbctl dump-flows lr0 | grep lr_in_arp_resolve | grep "reg0 == 10.0.0.10" \ + # There should be an arp resolve flow to resolve the virtual_ip with the + # sw0-p2's MAC. + AT_CHECK([cat lflows.txt], [0], [dnl +- table=12(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:05; next;) ++ table=13(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:05; next;) + ]) + + # send the garp from sw0-p2 (in hv2). hv2 should claim sw0-vir +@@ -16243,7 +16245,7 @@ ovn-sbctl dump-flows lr0 | grep lr_in_arp_resolve | grep "reg0 == 10.0.0.10" \ + # There should be an arp resolve flow to resolve the virtual_ip with the + # sw0-p3's MAC. + AT_CHECK([cat lflows.txt], [0], [dnl +- table=12(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:04; next;) ++ table=13(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:04; next;) + ]) + + # Now send arp reply from sw0-p1. hv1 should claim sw0-vir +@@ -16264,7 +16266,7 @@ ovn-sbctl dump-flows lr0 | grep lr_in_arp_resolve | grep "reg0 == 10.0.0.10" \ + > lflows.txt + + AT_CHECK([cat lflows.txt], [0], [dnl +- table=12(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:03; next;) ++ table=13(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:03; next;) + ]) + + # Delete hv1-vif1 port. hv1 should release sw0-vir +@@ -16282,7 +16284,7 @@ ovn-sbctl dump-flows lr0 | grep lr_in_arp_resolve | grep "reg0 == 10.0.0.10" \ + > lflows.txt + + AT_CHECK([cat lflows.txt], [0], [dnl +- table=12(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 00:00:00:00:00:00; next;) ++ table=13(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 00:00:00:00:00:00; next;) + ]) + + # Now send arp reply from sw0-p2. hv2 should claim sw0-vir +@@ -16303,7 +16305,7 @@ ovn-sbctl dump-flows lr0 | grep lr_in_arp_resolve | grep "reg0 == 10.0.0.10" \ + > lflows.txt + + AT_CHECK([cat lflows.txt], [0], [dnl +- table=12(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:04; next;) ++ table=13(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:04; next;) + ]) + + # Delete sw0-p2 logical port +@@ -20483,22 +20485,22 @@ ovn-nbctl set logical_router_policy $pol5 options:pkt_mark=5 + ovn-nbctl --wait=hv sync + + OVS_WAIT_UNTIL([ +- test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=19 | \ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=20 | \ + grep "load:0x64->NXM_NX_PKT_MARK" -c) + ]) + + OVS_WAIT_UNTIL([ +- test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=19 | \ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=20 | \ + grep "load:0x3->NXM_NX_PKT_MARK" -c) + ]) + + OVS_WAIT_UNTIL([ +- test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=19 | \ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=20 | \ + grep "load:0x4->NXM_NX_PKT_MARK" -c) + ]) + + OVS_WAIT_UNTIL([ +- test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=19 | \ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=20 | \ + grep "load:0x5->NXM_NX_PKT_MARK" -c) + ]) + +@@ -20589,12 +20591,12 @@ send_ipv4_pkt hv1 hv1-vif1 505400000003 00000000ff01 \ + $(ip_to_hex 10 0 0 3) $(ip_to_hex 172 168 0 120) + + OVS_WAIT_UNTIL([ +- test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=19 | \ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=20 | \ + grep "load:0x2->NXM_NX_PKT_MARK" -c) + ]) + + AT_CHECK([ +- test 0 -eq $(as hv1 ovs-ofctl dump-flows br-int table=19 | \ ++ test 0 -eq $(as hv1 ovs-ofctl dump-flows br-int table=20 | \ + grep "load:0x64->NXM_NX_PKT_MARK" -c) + ]) + +@@ -20927,7 +20929,6 @@ AT_CHECK([test "$hv2_offlows" = "$hv2_offlows_mon"]) + OVN_CLEANUP([hv1], [hv2]) + AT_CLEANUP + +- + AT_SETUP([ovn -- controller I-P handling when lrp added last]) + + ovn_start +@@ -21111,6 +21112,129 @@ AT_CHECK([test ! -z $p1_zoneid]) + OVN_CLEANUP([hv1]) + AT_CLEANUP + ++AT_SETUP([ovn -- Symmetric ECMP reply flows]) ++ovn_start ++ ++net_add n1 ++sim_add hv1 ++as hv1 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.1 ++ ++sim_add hv2 ++as hv2 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.2 ++ ++# Logical network ++# ++# ls1 \ ++# \ ++# DR -- join -- GW -- ext ++# / ++# ls2 / ++# ++# ls1 and ls2 are internal switches connected to distributed router ++# DR. DR is then connected via a join switch to gateway router GW. ++# GW is then connected to external switch ext. In real life, this ++# would likely have a localnet port, but for the purposes of this test ++# it is unnecessary. ++ ++ovn-nbctl create Logical_Router name=DR ++gw_uuid=$(ovn-nbctl create Logical_Router name=GW) ++ ++ovn-nbctl ls-add ls1 ++ovn-nbctl ls-add ls2 ++ovn-nbctl ls-add join ++ovn-nbctl ls-add ext ++ ++# Connect ls1 to DR ++ovn-nbctl lrp-add DR dr-ls1 00:00:01:01:02:03 10.0.0.1/24 ++ovn-nbctl lsp-add ls1 ls1-dr -- set Logical_Switch_Port ls1-dr \ ++ type=router options:router-port=dr-ls1 addresses='"00:00:01:01:02:03"' ++ ++# Connect ls2 to DR ++ovn-nbctl lrp-add DR dr-ls2 00:00:01:01:02:04 10.0.0.2/24 ++ovn-nbctl lsp-add ls2 ls2-dr -- set Logical_Switch_Port ls2-dr \ ++ type=router options:router-port=dr-ls2 addresses='"00:00:01:01:02:04"' ++ ++# Connect join to DR ++ovn-nbctl lrp-add DR dr-join 00:00:02:01:02:03 20.0.0.1/24 ++ovn-nbctl lsp-add join join-dr -- set Logical_Switch_Port join-dr \ ++ type=router options:router-port=dr-join addresses='"00:00:02:01:02:03"' ++ ++# Connect join to GW ++ovn-nbctl lrp-add GW gw-join 00:00:02:01:02:04 20.0.0.2/24 ++ovn-nbctl lsp-add join join-gw -- set Logical_Switch_Port join-gw \ ++ type=router options:router-port=gw-join addresses='"00:00:02:01:02:04"' ++ ++# Connect ext to GW ++ovn-nbctl lrp-add GW gw-ext 00:00:03:01:02:03 172.16.0.1/16 ++ovn-nbctl lsp-add ext ext-gw -- set Logical_Switch_Port ext-gw \ ++ type=router options:router-port=gw-ext addresses='"00:00:03:01:02:03"' ++ ++ovn-nbctl lr-route-add GW 10.0.0.0/24 20.0.0.1 ++ovn-nbctl --policy="src-ip" lr-route-add DR 10.0.0.0/24 20.0.0.2 ++ ++# Now add some ECMP routes to the GW router. ++ovn-nbctl --ecmp-symmetric-reply --policy="src-ip" lr-route-add GW 10.0.0.0/24 172.16.0.2 ++ovn-nbctl --ecmp-symmetric-reply --policy="src-ip" lr-route-add GW 10.0.0.0/24 172.16.0.3 ++ ++ovn-nbctl --wait=hv sync ++ ++# Ensure ECMP symmetric reply flows are not present on any hypervisor. ++AT_CHECK([ ++ test 0 -eq $(as hv1 ovs-ofctl dump-flows br-int table=15 | \ ++ grep "priority=100" | \ ++ grep "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" -c) ++]) ++AT_CHECK([ ++ test 0 -eq $(as hv1 ovs-ofctl dump-flows br-int table=21 | \ ++ grep "priority=200" | \ ++ grep "actions=move:NXM_NX_CT_LABEL\\[[32..79\\]]->NXM_OF_ETH_DST\\[[\\]]" -c) ++]) ++ ++AT_CHECK([ ++ test 0 -eq $(as hv2 ovs-ofctl dump-flows br-int table=15 | \ ++ grep "priority=100" | \ ++ grep "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" -c) ++]) ++AT_CHECK([ ++ test 0 -eq $(as hv2 ovs-ofctl dump-flows br-int table=21 | \ ++ grep "priority=200" | \ ++ grep "actions=move:NXM_NX_CT_LABEL\\[[32..79\\]]->NXM_OF_ETH_DST\\[[\\]]" -c) ++]) ++ ++# Now make GW a gateway router on hv1 ++ovn-nbctl set Logical_Router $gw_uuid options:chassis=hv1 ++ovn-nbctl --wait=hv sync ++ ++# And ensure that ECMP symmetric reply flows are present only on hv1 ++AT_CHECK([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=15 | \ ++ grep "priority=100" | \ ++ grep "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" -c) ++]) ++AT_CHECK([ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=21 | \ ++ grep "priority=200" | \ ++ grep "actions=move:NXM_NX_CT_LABEL\\[[32..79\\]]->NXM_OF_ETH_DST\\[[\\]]" -c) ++]) ++ ++AT_CHECK([ ++ test 0 -eq $(as hv2 ovs-ofctl dump-flows br-int table=15 | \ ++ grep "priority=100" | \ ++ grep "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" -c) ++]) ++AT_CHECK([ ++ test 0 -eq $(as hv2 ovs-ofctl dump-flows br-int table=21 | \ ++ grep "priority=200" | \ ++ grep "actions=move:NXM_NX_CT_LABEL\\[[32..79\\]]->NXM_OF_ETH_DST\\[[\\]]" -c) ++]) ++ ++OVN_CLEANUP([hv1], [hv2]) ++AT_CLEANUP ++ + # Test option:dynamic_neigh_routers. No static neighbor flows when enabled, and + # traffic should still work, with the help of dynamic mac_bindings. + AT_SETUP([ovn -- Dynamic neighbor between LRs]) +diff --git a/tests/system-ovn.at b/tests/system-ovn.at +index 94e3964e5..bce097b17 100644 +--- a/tests/system-ovn.at ++++ b/tests/system-ovn.at +@@ -4555,6 +4555,149 @@ NS_CHECK_EXEC([sw0-p1-f], [ping -q -c 3 -i 0.3 -w 2 10.0.0.5 | FORMAT_PING], \ + 3 packets transmitted, 3 received, 0% packet loss, time 0ms + ]) + ++OVS_APP_EXIT_AND_WAIT([ovn-controller]) ++ ++as ovn-sb ++OVS_APP_EXIT_AND_WAIT([ovsdb-server]) ++ ++as ovn-nb ++OVS_APP_EXIT_AND_WAIT([ovsdb-server]) ++ ++as northd ++OVS_APP_EXIT_AND_WAIT([ovn-northd]) ++ ++as ++OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d ++/connection dropped.*/d"]) ++ ++AT_CLEANUP ++ ++AT_SETUP([ovn -- ECMP symmetric reply]) ++AT_KEYWORDS([ecmp]) ++ ++CHECK_CONNTRACK() ++ovn_start ++ ++OVS_TRAFFIC_VSWITCHD_START() ++ADD_BR([br-int]) ++ ++# Set external-ids in br-int needed for ovn-controller ++ovs-vsctl \ ++ -- set Open_vSwitch . external-ids:system-id=hv1 \ ++ -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ ++ -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ ++ -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ ++ -- set bridge br-int fail-mode=secure other-config:disable-in-band=true ++ ++# Start ovn-controller ++start_daemon ovn-controller ++ ++# Logical network: ++# Alice is connected to gateway router R1. R1 is connected to two "external" ++# routers, R2 and R3 via an "ext" switch. ++# Bob is connected to both R2 and R3. R1 contains two ECMP routes, one through R2 ++# and one through R3, to Bob. ++# ++# alice -- R1 -- ext ---- R2 ++# | \ ++# | bob ++# | / ++# + ----- R3 ++# ++# For this test, Bob sends request traffic through R2 to Alice. We want to ensure that ++# all response traffic from Alice is routed through R2 as well. ++ ++ovn-nbctl create Logical_Router name=R1 options:chassis=hv1 ++ovn-nbctl create Logical_Router name=R2 ++ovn-nbctl create Logical_Router name=R3 ++ ++ovn-nbctl ls-add alice ++ovn-nbctl ls-add bob ++ovn-nbctl ls-add ext ++ ++# connect alice to R1 ++ovn-nbctl lrp-add R1 alice 00:00:01:01:02:03 10.0.0.1/24 ++ovn-nbctl lsp-add alice rp-alice -- set Logical_Switch_Port rp-alice \ ++ type=router options:router-port=alice addresses='"00:00:01:01:02:03"' ++ ++# connect bob to R2 ++ovn-nbctl lrp-add R2 R2_bob 00:00:02:01:02:03 172.16.0.2/16 ++ovn-nbctl lsp-add bob rp2-bob -- set Logical_Switch_Port rp2-bob \ ++ type=router options:router-port=R2_bob addresses='"00:00:02:01:02:03"' ++ ++# connect bob to R3 ++ovn-nbctl lrp-add R3 R3_bob 00:00:02:01:02:04 172.16.0.3/16 ++ovn-nbctl lsp-add bob rp3-bob -- set Logical_Switch_Port rp3-bob \ ++ type=router options:router-port=R3_bob addresses='"00:00:02:01:02:04"' ++ ++# Connect R1 to ext ++ovn-nbctl lrp-add R1 R1_ext 00:00:04:01:02:03 20.0.0.1/24 ++ovn-nbctl lsp-add ext r1-ext -- set Logical_Switch_Port r1-ext \ ++ type=router options:router-port=R1_ext addresses='"00:00:04:01:02:03"' ++ ++# Connect R2 to ext ++ovn-nbctl lrp-add R2 R2_ext 00:00:04:01:02:04 20.0.0.2/24 ++ovn-nbctl lsp-add ext r2-ext -- set Logical_Switch_Port r2-ext \ ++ type=router options:router-port=R2_ext addresses='"00:00:04:01:02:04"' ++ ++# Connect R3 to ext ++ovn-nbctl lrp-add R3 R3_ext 00:00:04:01:02:05 20.0.0.3/24 ++ovn-nbctl lsp-add ext r3-ext -- set Logical_Switch_Port r3-ext \ ++ type=router options:router-port=R3_ext addresses='"00:00:04:01:02:05"' ++ ++# Install ECMP routes for alice. ++ovn-nbctl --ecmp-symmetric-reply --policy="src-ip" lr-route-add R1 10.0.0.0/24 20.0.0.2 ++ovn-nbctl --ecmp-symmetric-reply --policy="src-ip" lr-route-add R1 10.0.0.0/24 20.0.0.3 ++ ++# Static Routes ++ovn-nbctl lr-route-add R2 10.0.0.0/24 20.0.0.1 ++ovn-nbctl lr-route-add R3 10.0.0.0/24 20.0.0.1 ++ ++# Logical port 'alice1' in switch 'alice'. ++ADD_NAMESPACES(alice1) ++ADD_VETH(alice1, alice1, br-int, "10.0.0.2/24", "f0:00:00:01:02:04", \ ++ "10.0.0.1") ++ovn-nbctl lsp-add alice alice1 \ ++-- lsp-set-addresses alice1 "f0:00:00:01:02:04 10.0.0.2" ++ ++# Logical port 'bob1' in switch 'bob'. ++ADD_NAMESPACES(bob1) ++ADD_VETH(bob1, bob1, br-int, "172.16.0.1/16", "f0:00:00:01:02:06", \ ++ "172.16.0.2") ++ovn-nbctl lsp-add bob bob1 \ ++-- lsp-set-addresses bob1 "f0:00:00:01:02:06 172.16.0.1" ++ ++# Ensure ovn-controller is caught up ++ovn-nbctl --wait=hv sync ++ ++on_exit 'ovs-ofctl dump-flows br-int' ++ ++# 'bob1' should be able to ping 'alice1' directly. ++NS_CHECK_EXEC([bob1], [ping -q -c 20 -i 0.3 -w 15 10.0.0.2 | FORMAT_PING], \ ++[0], [dnl ++20 packets transmitted, 20 received, 0% packet loss, time 0ms ++]) ++ ++# Ensure conntrack entry is present. We should not try to predict ++# the tunnel key for the output port, so we strip it from the labels ++# and just ensure that the known ethernet address is present. ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.0.1) | \ ++sed -e 's/zone=[[0-9]]*/zone=/' | ++sed -e 's/labels=0x[[0-9a-f]]*00000401020400000000/labels=0x00000401020400000000/'], [0], [dnl ++icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=,type=0,code=0),zone=,labels=0x00000401020400000000 ++]) ++ ++# Ensure datapaths show conntrack states as expected ++# Like with conntrack entries, we shouldn't try to predict ++# port binding tunnel keys. So omit them from expected labels. ++AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x.*00000401020400000000/0xffffffffffffffff00000000)' -c], [0], [dnl ++1 ++]) ++AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x.*00000401020400000000/0xffffffffffffffff00000000)' -c], [0], [dnl ++1 ++]) ++ ++ovs-ofctl dump-flows br-int + + OVS_APP_EXIT_AND_WAIT([ovn-controller]) + +diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml +index de86b70e6..18bf90e08 100644 +--- a/utilities/ovn-nbctl.8.xml ++++ b/utilities/ovn-nbctl.8.xml +@@ -658,7 +658,8 @@ + +
            +
            [--may-exist] [--policy=POLICY] +- [--ecmp] lr-route-add router ++ [--ecmp] [--ecmp-symmetric-reply] ++ lr-route-add router + prefix nexthop [port]
            +
            +

            +@@ -680,15 +681,31 @@ + specified, the default is "dst-ip". +

            + ++

            ++ The --ecmp option allows for multiple routes with the ++ same prefix POLICY but different ++ nexthop and port to be added. ++

            ++ ++

            ++ The --ecmp-symmetric-reply option makes it so that ++ traffic that arrives over an ECMP route will have its reply traffic ++ sent out over that same route. Setting ++ --ecmp-symmetric-reply implies --ecmp so ++ it is not necessary to set both. ++

            ++ +

            + It is an error if a route with prefix and +- POLICY already exists, unless --may-exist or +- --ecmp is specified. If --may-exist is +- specified but not --ecmp, the existed route will be +- updated with the new nexthop and port. If --ecmp is ++ POLICY already exists, unless --may-exist, ++ --ecmp, or --ecmp-symmetric-reply is ++ specified. If --may-exist is specified but not ++ --ecmp or --ecmp-symmetric-reply, the ++ existed route will be updated with the new nexthop and port. If ++ --ecmp or --ecmp-symmetric-reply is + specified, a new route will be added, regardless of the existed +- route, which is useful when adding ECMP routes, i.e. routes with same +- POLICY and prefix but different ++ route., which is useful when adding ECMP routes, i.e. routes with ++ same POLICY and prefix but different + nexthop and port. +

            +
            +diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c +index 0079ad5a6..e6d8dbe63 100644 +--- a/utilities/ovn-nbctl.c ++++ b/utilities/ovn-nbctl.c +@@ -687,7 +687,8 @@ Logical router port commands:\n\ + ('overlay' or 'bridged')\n\ + \n\ + Route commands:\n\ +- [--policy=POLICY] [--ecmp] lr-route-add ROUTER PREFIX NEXTHOP [PORT]\n\ ++ [--policy=POLICY] [--ecmp] [--ecmp-symmetric-reply] lr-route-add ROUTER \n\ ++ PREFIX NEXTHOP [PORT]\n\ + add a route to ROUTER\n\ + [--policy=POLICY] lr-route-del ROUTER [PREFIX [NEXTHOP [PORT]]]\n\ + remove routes from ROUTER\n\ +@@ -3855,7 +3856,10 @@ nbctl_lr_route_add(struct ctl_context *ctx) + } + + bool may_exist = shash_find(&ctx->options, "--may-exist") != NULL; +- bool ecmp = shash_find(&ctx->options, "--ecmp") != NULL; ++ bool ecmp_symmetric_reply = shash_find(&ctx->options, ++ "--ecmp-symmetric-reply") != NULL; ++ bool ecmp = shash_find(&ctx->options, "--ecmp") != NULL || ++ ecmp_symmetric_reply; + if (!ecmp) { + for (int i = 0; i < lr->n_static_routes; i++) { + const struct nbrec_logical_router_static_route *route +@@ -3920,6 +3924,13 @@ nbctl_lr_route_add(struct ctl_context *ctx) + nbrec_logical_router_static_route_set_policy(route, policy); + } + ++ if (ecmp_symmetric_reply) { ++ const struct smap options = SMAP_CONST1(&options, ++ "ecmp_symmetric_reply", ++ "true"); ++ nbrec_logical_router_static_route_set_options(route, &options); ++ } ++ + nbrec_logical_router_verify_static_routes(lr); + struct nbrec_logical_router_static_route **new_routes + = xmalloc(sizeof *new_routes * (lr->n_static_routes + 1)); +@@ -6361,7 +6372,8 @@ static const struct ctl_command_syntax nbctl_commands[] = { + + /* logical router route commands. */ + { "lr-route-add", 3, 4, "ROUTER PREFIX NEXTHOP [PORT]", NULL, +- nbctl_lr_route_add, NULL, "--may-exist,--ecmp,--policy=", RW }, ++ nbctl_lr_route_add, NULL, "--may-exist,--ecmp,--ecmp-symmetric-reply," ++ "--policy=", RW }, + { "lr-route-del", 1, 4, "ROUTER [PREFIX [NEXTHOP [PORT]]]", NULL, + nbctl_lr_route_del, NULL, "--if-exists,--policy=", RW }, + { "lr-route-list", 1, 1, "ROUTER", NULL, nbctl_lr_route_list, NULL, +-- +2.26.2 + diff --git a/SOURCES/0014-expr.c-Fix-argument-type-of-expr_write_scope.patch b/SOURCES/0014-expr.c-Fix-argument-type-of-expr_write_scope.patch new file mode 100644 index 0000000..5e9b04a --- /dev/null +++ b/SOURCES/0014-expr.c-Fix-argument-type-of-expr_write_scope.patch @@ -0,0 +1,39 @@ +From 677e51935f9030a779956cf3c6fb0f4d2fb19c10 Mon Sep 17 00:00:00 2001 +From: Han Zhou +Date: Wed, 29 Jul 2020 18:36:38 -0700 +Subject: [PATCH 14/22] expr.c: Fix argument type of expr_write_scope(). + +There is compile error introduced by the commit 2054d01247. +*** +../lib/expr.c:3328:6: error: symbol 'expr_type_check' redeclared with different +type (originally declared at ../include/ovn/expr.h:481) - incompatible argument +4 (different signedness) +Makefile:1971: recipe for target 'lib/expr.lo' failed +*** + +Fixes: 2054d01247 ("Add expression writeability scopes.") +Acked-by: Ankur Sharma +Signed-off-by: Han Zhou +(cherry picked from upstream commit c64ed2a9bb175539ed9493a08e31cc29d10aa8df) + +Change-Id: I1de528787319b21a50bdcebbe60894026b32ab06 +--- + lib/expr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/expr.c b/lib/expr.c +index c07e7dd4d..6fb96757a 100644 +--- a/lib/expr.c ++++ b/lib/expr.c +@@ -3327,7 +3327,7 @@ expr_evaluate(const struct expr *e, const struct flow *uflow, + * must free(). */ + char * OVS_WARN_UNUSED_RESULT + expr_type_check(const struct expr_field *f, int n_bits, bool rw, +- uint32_t write_scope) ++ enum expr_write_scope write_scope) + { + if (n_bits != f->n_bits) { + if (n_bits && f->n_bits) { +-- +2.26.2 + diff --git a/SOURCES/0015-Allow-bare-ct_commits-when-no-nested-actions-are-req.patch b/SOURCES/0015-Allow-bare-ct_commits-when-no-nested-actions-are-req.patch new file mode 100644 index 0000000..e0e34c9 --- /dev/null +++ b/SOURCES/0015-Allow-bare-ct_commits-when-no-nested-actions-are-req.patch @@ -0,0 +1,75 @@ +From bc0e773fa3620fa3c4fef817d3b2256542be6b11 Mon Sep 17 00:00:00 2001 +From: Mark Michelson +Date: Thu, 6 Aug 2020 10:48:12 -0400 +Subject: [PATCH 15/22] Allow bare ct_commits when no nested actions are + required. + +In the fixes commit below, ct_commit was changed to use nested actions. +This requires that curly braces be present for all ct_commits. When +adjusting ovn-northd, some ct_commits were not updated to have them. +This commit changes the behavior of the ct_commit action not to require +curly braces if there are no nested actions required. + +Fixes: 6cfb44a76c61("Used nested actions in ct_commit") +Signed-off-by: Mark Michelson +--- + lib/actions.c | 20 ++++++++++++++++---- + tests/ovn.at | 5 ++++- + 2 files changed, 20 insertions(+), 5 deletions(-) + +diff --git a/lib/actions.c b/lib/actions.c +index 79ac79a95..245486b0a 100644 +--- a/lib/actions.c ++++ b/lib/actions.c +@@ -630,15 +630,27 @@ ovnact_ct_next_free(struct ovnact_ct_next *a OVS_UNUSED) + static void + parse_CT_COMMIT(struct action_context *ctx) + { +- +- parse_nested_action(ctx, OVNACT_CT_COMMIT, "ip", +- WR_CT_COMMIT); ++ if (ctx->lexer->token.type == LEX_T_LCURLY) { ++ parse_nested_action(ctx, OVNACT_CT_COMMIT, "ip", ++ WR_CT_COMMIT); ++ } else { ++ /* Add an empty nested action to allow for "ct_commit;" syntax */ ++ add_prerequisite(ctx, "ip"); ++ struct ovnact_nest *on = ovnact_put(ctx->ovnacts, OVNACT_CT_COMMIT, ++ OVNACT_ALIGN(sizeof *on)); ++ on->nested_len = 0; ++ on->nested = NULL; ++ } + } + + static void + format_CT_COMMIT(const struct ovnact_nest *on, struct ds *s) + { +- format_nested_action(on, "ct_commit", s); ++ if (on->nested_len) { ++ format_nested_action(on, "ct_commit", s); ++ } else { ++ ds_put_cstr(s, "ct_commit;"); ++ } + } + + static void +diff --git a/tests/ovn.at b/tests/ovn.at +index b626bcfcc..2651b3eac 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -1047,8 +1047,11 @@ ct_next; + has prereqs ip + + # ct_commit ++ct_commit; ++ encodes as ct(commit,zone=NXM_NX_REG13[0..15]) ++ has prereqs ip + ct_commit { }; +- formats as ct_commit { drop; }; ++ formats as ct_commit; + encodes as ct(commit,zone=NXM_NX_REG13[0..15]) + has prereqs ip + ct_commit { ct_mark=1; }; +-- +2.26.2 + diff --git a/SOURCES/0016-Allow-force_snat-options-to-work-for-dual-stack-rout.patch b/SOURCES/0016-Allow-force_snat-options-to-work-for-dual-stack-rout.patch new file mode 100644 index 0000000..f9cf520 --- /dev/null +++ b/SOURCES/0016-Allow-force_snat-options-to-work-for-dual-stack-rout.patch @@ -0,0 +1,887 @@ +From 3aeb8b73e0947010945aea8bf8fa6df1b7a558a7 Mon Sep 17 00:00:00 2001 +From: Mark Michelson +Date: Thu, 6 Aug 2020 15:04:15 -0400 +Subject: [PATCH 16/22] Allow force_snat options to work for dual-stack + routers. + +The lb_force_snat and dnat_force_snat options could accept only a single +IP address. For routers that only route traffic of a single IP address +family, this is fine. However, if a router routes both IPv4 and IPv6 +traffic, then this limitation is a problem. + +This patch addresses this problem by allowing for these options to +specify both an IPv4 and IPv6 address. + +Signed-off-by: Mark Michelson +Acked-by: Dumitru Ceara +Signed-off-by: Numan Siddique + +(cherry-picked from upstream master commit 474821c55608cbad5bdb8deee468827ab489c02b) + +Change-Id: I42266af72622d1f15ec94d68f106954cc49979bd +--- + northd/ovn-northd.c | 187 ++++++++------- + ovn-nb.xml | 24 +- + tests/system-ovn.at | 541 ++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 657 insertions(+), 95 deletions(-) + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index cb8e25bdf..1f5433d9d 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -8013,44 +8013,37 @@ op_put_v6_networks(struct ds *ds, const struct ovn_port *op) + ds_put_cstr(ds, "}"); + } + +-static const char * ++static bool + get_force_snat_ip(struct ovn_datapath *od, const char *key_type, +- struct v46_ip *ip) ++ struct lport_addresses *laddrs) + { + char *key = xasprintf("%s_force_snat_ip", key_type); +- const char *ip_address = smap_get(&od->nbr->options, key); ++ const char *addresses = smap_get(&od->nbr->options, key); + free(key); + +- if (ip_address) { +- ovs_be32 mask; +- ip->family = AF_INET; +- char *error = ip_parse_masked(ip_address, &ip->ipv4, &mask); +- if (error || mask != OVS_BE32_MAX) { +- free(error); +- struct in6_addr mask_v6, v6_exact = IN6ADDR_EXACT_INIT; +- ip->family = AF_INET6; +- error = ipv6_parse_masked(ip_address, &ip->ipv6, &mask_v6); +- if (error || memcmp(&mask_v6, &v6_exact, sizeof(mask_v6))) { +- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); +- VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"", +- ip_address, UUID_ARGS(&od->key)); +- memset(ip, 0, sizeof *ip); +- ip->family = AF_UNSPEC; +- return NULL; +- } +- } +- return ip_address; ++ if (!addresses) { ++ return false; + } + +- memset(ip, 0, sizeof *ip); +- ip->family = AF_UNSPEC; +- return NULL; ++ if (!extract_ip_addresses(addresses, laddrs) || ++ laddrs->n_ipv4_addrs > 1 || ++ laddrs->n_ipv6_addrs > 1 || ++ (laddrs->n_ipv4_addrs && laddrs->ipv4_addrs[0].plen != 32) || ++ (laddrs->n_ipv6_addrs && laddrs->ipv6_addrs[0].plen != 128)) { ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); ++ VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"", ++ addresses, UUID_ARGS(&od->key)); ++ destroy_lport_addresses(laddrs); ++ return false; ++ } ++ ++ return true; + } + + static void + add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, + struct ds *match, struct ds *actions, int priority, +- const char *lb_force_snat_ip, struct lb_vip *lb_vip, ++ bool lb_force_snat_ip, struct lb_vip *lb_vip, + const char *proto, struct nbrec_load_balancer *lb, + struct shash *meter_groups, struct sset *nat_entries) + { +@@ -8371,6 +8364,32 @@ build_lrouter_nd_flow(struct ovn_datapath *od, struct ovn_port *op, + ds_destroy(&actions); + } + ++static void ++build_lrouter_force_snat_flows(struct hmap *lflows, struct ovn_datapath *od, ++ const char *ip_version, const char *ip_addr, ++ const char *context) ++{ ++ struct ds match = DS_EMPTY_INITIALIZER; ++ struct ds actions = DS_EMPTY_INITIALIZER; ++ ds_put_format(&match, "ip%s && ip%s.dst == %s", ++ ip_version, ip_version, ip_addr); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 110, ++ ds_cstr(&match), "ct_snat;"); ++ ++ /* Higher priority rules to force SNAT with the IP addresses ++ * configured in the Gateway router. This only takes effect ++ * when the packet has already been DNATed or load balanced once. */ ++ ds_clear(&match); ++ ds_put_format(&match, "flags.force_snat_for_%s == 1 && ip%s", ++ context, ip_version); ++ ds_put_format(&actions, "ct_snat(%s);", ip_addr); ++ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100, ++ ds_cstr(&match), ds_cstr(&actions)); ++ ++ ds_destroy(&match); ++ ds_destroy(&actions); ++} ++ + static void + build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + struct hmap *lflows, struct shash *meter_groups, +@@ -8892,24 +8911,37 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + } + } + +- /* A gateway router can have 2 SNAT IP addresses to force DNATed and ++ /* A gateway router can have 4 SNAT IP addresses to force DNATed and + * LBed traffic respectively to be SNATed. In addition, there can be + * a number of SNAT rules in the NAT table. */ + struct v46_ip *snat_ips = xmalloc(sizeof *snat_ips +- * (op->od->nbr->n_nat + 2)); ++ * (op->od->nbr->n_nat + 4)); + size_t n_snat_ips = 0; ++ struct lport_addresses snat_addrs; + +- struct v46_ip snat_ip; +- const char *dnat_force_snat_ip = get_force_snat_ip(op->od, "dnat", +- &snat_ip); +- if (dnat_force_snat_ip) { +- snat_ips[n_snat_ips++] = snat_ip; ++ if (get_force_snat_ip(op->od, "dnat", &snat_addrs)) { ++ if (snat_addrs.n_ipv4_addrs) { ++ snat_ips[n_snat_ips].family = AF_INET; ++ snat_ips[n_snat_ips++].ipv4 = snat_addrs.ipv4_addrs[0].addr; ++ } ++ if (snat_addrs.n_ipv6_addrs) { ++ snat_ips[n_snat_ips].family = AF_INET6; ++ snat_ips[n_snat_ips++].ipv6 = snat_addrs.ipv6_addrs[0].addr; ++ } ++ destroy_lport_addresses(&snat_addrs); + } + +- const char *lb_force_snat_ip = get_force_snat_ip(op->od, "lb", +- &snat_ip); +- if (lb_force_snat_ip) { +- snat_ips[n_snat_ips++] = snat_ip; ++ memset(&snat_addrs, 0, sizeof(snat_addrs)); ++ if (get_force_snat_ip(op->od, "lb", &snat_addrs)) { ++ if (snat_addrs.n_ipv4_addrs) { ++ snat_ips[n_snat_ips].family = AF_INET; ++ snat_ips[n_snat_ips++].ipv4 = snat_addrs.ipv4_addrs[0].addr; ++ } ++ if (snat_addrs.n_ipv6_addrs) { ++ snat_ips[n_snat_ips].family = AF_INET6; ++ snat_ips[n_snat_ips++].ipv6 = snat_addrs.ipv6_addrs[0].addr; ++ } ++ destroy_lport_addresses(&snat_addrs); + } + + for (size_t i = 0; i < op->od->nbr->n_nat; i++) { +@@ -9269,11 +9301,12 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + + struct sset nat_entries = SSET_INITIALIZER(&nat_entries); + +- struct v46_ip snat_ip, lb_snat_ip; +- const char *dnat_force_snat_ip = get_force_snat_ip(od, "dnat", +- &snat_ip); +- const char *lb_force_snat_ip = get_force_snat_ip(od, "lb", +- &lb_snat_ip); ++ struct lport_addresses dnat_force_snat_addrs; ++ struct lport_addresses lb_force_snat_addrs; ++ bool dnat_force_snat_ip = get_force_snat_ip(od, "dnat", ++ &dnat_force_snat_addrs); ++ bool lb_force_snat_ip = get_force_snat_ip(od, "lb", ++ &lb_force_snat_addrs); + + for (int i = 0; i < od->nbr->n_nat; i++) { + const struct nbrec_nat *nat; +@@ -9739,49 +9772,28 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + } + + /* Handle force SNAT options set in the gateway router. */ +- if (dnat_force_snat_ip && !od->l3dgw_port) { +- /* If a packet with destination IP address as that of the +- * gateway router (as set in options:dnat_force_snat_ip) is seen, +- * UNSNAT it. */ +- ds_clear(&match); +- ds_put_format(&match, "ip && ip%s.dst == %s", +- snat_ip.family == AF_INET ? "4" : "6", +- dnat_force_snat_ip); +- ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 110, +- ds_cstr(&match), "ct_snat;"); +- +- /* Higher priority rules to force SNAT with the IP addresses +- * configured in the Gateway router. This only takes effect +- * when the packet has already been DNATed once. */ +- ds_clear(&match); +- ds_put_format(&match, "flags.force_snat_for_dnat == 1 && ip"); +- ds_clear(&actions); +- ds_put_format(&actions, "ct_snat(%s);", dnat_force_snat_ip); +- ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100, +- ds_cstr(&match), ds_cstr(&actions)); +- } +- if (lb_force_snat_ip && !od->l3dgw_port) { +- /* If a packet with destination IP address as that of the +- * gateway router (as set in options:lb_force_snat_ip) is seen, +- * UNSNAT it. */ +- ds_clear(&match); +- ds_put_format(&match, "ip && ip%s.dst == %s", +- lb_snat_ip.family == AF_INET ? "4" : "6", +- lb_force_snat_ip); +- ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100, +- ds_cstr(&match), "ct_snat;"); +- +- /* Load balanced traffic will have flags.force_snat_for_lb set. +- * Force SNAT it. */ +- ds_clear(&match); +- ds_put_format(&match, "flags.force_snat_for_lb == 1 && ip"); +- ds_clear(&actions); +- ds_put_format(&actions, "ct_snat(%s);", lb_force_snat_ip); +- ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100, +- ds_cstr(&match), ds_cstr(&actions)); +- } +- + if (!od->l3dgw_port) { ++ if (dnat_force_snat_ip) { ++ if (dnat_force_snat_addrs.n_ipv4_addrs) { ++ build_lrouter_force_snat_flows(lflows, od, "4", ++ dnat_force_snat_addrs.ipv4_addrs[0].addr_s, "dnat"); ++ } ++ if (dnat_force_snat_addrs.n_ipv6_addrs) { ++ build_lrouter_force_snat_flows(lflows, od, "6", ++ dnat_force_snat_addrs.ipv6_addrs[0].addr_s, "dnat"); ++ } ++ } ++ if (lb_force_snat_ip) { ++ if (lb_force_snat_addrs.n_ipv4_addrs) { ++ build_lrouter_force_snat_flows(lflows, od, "4", ++ lb_force_snat_addrs.ipv4_addrs[0].addr_s, "lb"); ++ } ++ if (lb_force_snat_addrs.n_ipv6_addrs) { ++ build_lrouter_force_snat_flows(lflows, od, "6", ++ lb_force_snat_addrs.ipv6_addrs[0].addr_s, "lb"); ++ } ++ } ++ + /* For gateway router, re-circulate every packet through + * the DNAT zone. This helps with the following. + * +@@ -9795,6 +9807,13 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + "ip", "flags.loopback = 1; ct_dnat;"); + } + ++ if (dnat_force_snat_ip) { ++ destroy_lport_addresses(&dnat_force_snat_addrs); ++ } ++ if (lb_force_snat_ip) { ++ destroy_lport_addresses(&lb_force_snat_addrs); ++ } ++ + /* Load balancing and packet defrag are only valid on + * Gateway routers or router with gateway port. */ + if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { +diff --git a/ovn-nb.xml b/ovn-nb.xml +index 98d36b270..5f4291559 100644 +--- a/ovn-nb.xml ++++ b/ovn-nb.xml +@@ -1816,27 +1816,29 @@ + + +

            +- If set, indicates the IP address to use to force SNAT a packet +- that has already been DNATed in the gateway router. When multiple +- gateway routers are configured, a packet can potentially enter any +- of the gateway router, get DNATted and eventually reach the logical +- switch port. For the return traffic to go back to the same gateway +- router (for unDNATing), the packet needs a SNAT in the first place. +- This can be achieved by setting the above option with a gateway +- specific IP address. ++ If set, indicates a set of IP addresses to use to force SNAT a ++ packet that has already been DNATed in the gateway router. When ++ multiple gateway routers are configured, a packet can potentially ++ enter any of the gateway router, get DNATted and eventually reach the ++ logical switch port. For the return traffic to go back to the same ++ gateway router (for unDNATing), the packet needs a SNAT in the first ++ place. This can be achieved by setting the above option with a ++ gateway specific set of IP addresses. This option may have exactly ++ one IPv4 and/or one IPv6 address on it, separated by a a space. +

            +
            + +

            +- If set, indicates the IP address to use to force SNAT a packet ++ If set, indicates a set of IP addresses to use to force SNAT a packet + that has already been load-balanced in the gateway router. When + multiple gateway routers are configured, a packet can potentially + enter any of the gateway routers, get DNATted as part of the load- + balancing and eventually reach the logical switch port. + For the return traffic to go back to the same gateway router (for + unDNATing), the packet needs a SNAT in the first place. This can be +- achieved by setting the above option with a gateway specific IP +- address. ++ achieved by setting the above option with a gateway specific set of ++ IP addresses. This option may have exactly one IPv4 and/or one IPv6 ++ address on it, separated by a space character. +

            +
            + +diff --git a/tests/system-ovn.at b/tests/system-ovn.at +index bce097b17..0d478b4aa 100644 +--- a/tests/system-ovn.at ++++ b/tests/system-ovn.at +@@ -1026,6 +1026,323 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d + /connection dropped.*/d"]) + AT_CLEANUP + ++AT_SETUP([ovn -- multiple gateway routers, SNAT and DNAT - Dual Stack]) ++AT_KEYWORDS([ovnnat]) ++ ++CHECK_CONNTRACK() ++CHECK_CONNTRACK_NAT() ++ovn_start ++OVS_TRAFFIC_VSWITCHD_START() ++ADD_BR([br-int]) ++ ++# Set external-ids in br-int needed for ovn-controller ++ovs-vsctl \ ++ -- set Open_vSwitch . external-ids:system-id=hv1 \ ++ -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ ++ -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ ++ -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ ++ -- set bridge br-int fail-mode=secure other-config:disable-in-band=true ++ ++# Start ovn-controller ++start_daemon ovn-controller ++ ++# Logical network: ++# Three LRs - R1, R2 and R3 that are connected to each other via LS "join" ++# in 20.0.0.0/24 and fd20::/64 networks. R1 has switches foo (192.168.1.0/24 ++# and fd11::/64) and bar (192.168.2.0/24 and fd12::/64) connected to it. R2 ++# has alice (172.16.1.0/24 and fd30::/64) connected to it. R3 has bob ++# (172.16.1.0/24 andfd30::/64) connected to it. Note how both alice and bob ++# have the same subnets behind them. We are trying to simulate external network ++# via those 2 switches. In real world the switch ports of these switches will ++# have addresses set as "unknown" to make them learning switches. Or those ++# switches will be "localnet" ones. ++# ++# foo -- R1 -- join - R2 -- alice ++# | | ++# bar ---- - R3 --- bob ++ ++ovn-nbctl create Logical_Router name=R1 ++ovn-nbctl create Logical_Router name=R2 options:chassis=hv1 ++ovn-nbctl create Logical_Router name=R3 options:chassis=hv1 ++ ++ovn-nbctl ls-add foo ++ovn-nbctl ls-add bar ++ovn-nbctl ls-add alice ++ovn-nbctl ls-add bob ++ovn-nbctl ls-add join ++ ++# Connect foo to R1 ++ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24 fd11::1/64 ++ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \ ++ type=router options:router-port=foo addresses=\"00:00:01:01:02:03\" ++ ++# Connect bar to R1 ++ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 192.168.2.1/24 fd12::1/64 ++ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \ ++ type=router options:router-port=bar addresses=\"00:00:01:01:02:04\" ++ ++# Connect alice to R2 ++ovn-nbctl lrp-add R2 alice 00:00:02:01:02:03 172.16.1.1/24 fd30::1/64 ++ovn-nbctl lsp-add alice rp-alice -- set Logical_Switch_Port rp-alice \ ++ type=router options:router-port=alice addresses=\"00:00:02:01:02:03\" ++ ++# Connect bob to R3 ++ovn-nbctl lrp-add R3 bob 00:00:03:01:02:03 172.16.1.2/24 fd30::2/64 ++ovn-nbctl lsp-add bob rp-bob -- set Logical_Switch_Port rp-bob \ ++ type=router options:router-port=bob addresses=\"00:00:03:01:02:03\" ++ ++# Connect R1 to join ++ovn-nbctl lrp-add R1 R1_join 00:00:04:01:02:03 20.0.0.1/24 fd20::1/64 ++ovn-nbctl lsp-add join r1-join -- set Logical_Switch_Port r1-join \ ++ type=router options:router-port=R1_join addresses='"00:00:04:01:02:03"' ++ ++# Connect R2 to join ++ovn-nbctl lrp-add R2 R2_join 00:00:04:01:02:04 20.0.0.2/24 fd20::2/64 ++ovn-nbctl lsp-add join r2-join -- set Logical_Switch_Port r2-join \ ++ type=router options:router-port=R2_join addresses='"00:00:04:01:02:04"' ++ ++# Connect R3 to join ++ovn-nbctl lrp-add R3 R3_join 00:00:04:01:02:05 20.0.0.3/24 fd20::3/64 ++ovn-nbctl lsp-add join r3-join -- set Logical_Switch_Port r3-join \ ++ type=router options:router-port=R3_join addresses='"00:00:04:01:02:05"' ++ ++# Install static routes with source ip address as the policy for routing. ++# We want traffic from 'foo' to go via R2 and traffic of 'bar' to go via R3. ++ovn-nbctl --policy="src-ip" lr-route-add R1 fd11::/64 fd20::2 ++ovn-nbctl --policy="src-ip" lr-route-add R1 fd12::/64 fd20::3 ++ovn-nbctl --policy="src-ip" lr-route-add R1 192.168.1.0/24 20.0.0.2 ++ovn-nbctl --policy="src-ip" lr-route-add R1 192.168.2.0/24 20.0.0.3 ++ ++# Static routes. ++ovn-nbctl lr-route-add R2 fd11::/64 fd20::1 ++ovn-nbctl lr-route-add R2 fd12::/64 fd20::1 ++ovn-nbctl lr-route-add R2 192.168.0.0/16 20.0.0.1 ++ovn-nbctl lr-route-add R3 fd11::/64 fd20::1 ++ovn-nbctl lr-route-add R3 fd12::/64 fd20::1 ++ovn-nbctl lr-route-add R3 192.168.0.0/16 20.0.0.1 ++ ++# For gateway routers R2 and R3, set a force SNAT rule. ++ovn-nbctl set logical_router R2 options:dnat_force_snat_ip="20.0.0.2 fd20::2" ++ovn-nbctl set logical_router R3 options:dnat_force_snat_ip="20.0.0.3 fd20::3" ++ ++# Logical port 'foo1' in switch 'foo'. ++ADD_NAMESPACES(foo1) ++ADD_VETH(foo1, foo1, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \ ++ "192.168.1.1") ++ovn-nbctl lsp-add foo foo1 \ ++-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2" ++ ++ADD_NAMESPACES(foo16) ++ADD_VETH(foo16, foo16, br-int, "fd11::2/64", "f0:00:00:02:02:03", \ ++ "fd11::1") ++OVS_WAIT_UNTIL([test "$(ip netns exec foo16 ip a | grep fd11::2 | grep tentative)" = ""]) ++ovn-nbctl lsp-add foo foo16 \ ++-- lsp-set-addresses foo16 "f0:00:00:02:02:03 fd11::2" ++ ++# Logical port 'alice1' in switch 'alice'. ++ADD_NAMESPACES(alice1) ++ADD_VETH(alice1, alice1, br-int, "172.16.1.3/24", "f0:00:00:01:02:04", \ ++ "172.16.1.1") ++ovn-nbctl lsp-add alice alice1 \ ++-- lsp-set-addresses alice1 "f0:00:00:01:02:04 172.16.1.3" ++ ++ADD_NAMESPACES(alice16) ++ADD_VETH(alice16, alice16, br-int, "fd30::3/64", "f0:00:00:02:02:04", \ ++ "fd30::1") ++OVS_WAIT_UNTIL([test "$(ip netns exec alice16 ip a | grep fd30::3 | grep tentative)" = ""]) ++ovn-nbctl lsp-add alice alice16 \ ++-- lsp-set-addresses alice16 "f0:00:00:02:02:04 fd30::3" ++ ++# Logical port 'bar1' in switch 'bar'. ++ADD_NAMESPACES(bar1) ++ADD_VETH(bar1, bar1, br-int, "192.168.2.2/24", "f0:00:00:01:02:05", \ ++"192.168.2.1") ++ovn-nbctl lsp-add bar bar1 \ ++-- lsp-set-addresses bar1 "f0:00:00:01:02:05 192.168.2.2" ++ ++ADD_NAMESPACES(bar16) ++ADD_VETH(bar16, bar16, br-int, "fd12::2/64", "f0:00:00:02:02:05", \ ++ "fd12::1") ++OVS_WAIT_UNTIL([test "$(ip netns exec bar16 ip a | grep fd12::2 | grep tentative)" = ""]) ++ovn-nbctl lsp-add bar bar16 \ ++-- lsp-set-addresses bar16 "f0:00:00:02:02:05 fd12::2" ++ ++# Logical port 'bob1' in switch 'bob'. ++ADD_NAMESPACES(bob1) ++ADD_VETH(bob1, bob1, br-int, "172.16.1.4/24", "f0:00:00:01:02:06", \ ++ "172.16.1.2") ++ovn-nbctl lsp-add bob bob1 \ ++-- lsp-set-addresses bob1 "f0:00:00:01:02:06 172.16.1.4" ++ ++ADD_NAMESPACES(bob16) ++ADD_VETH(bob16, bob16, br-int, "fd30::4/64", "f0:00:00:02:02:06", \ ++ "fd30::2") ++OVS_WAIT_UNTIL([test "$(ip netns exec bob16 ip a | grep fd30::4 | grep tentative)" = ""]) ++ovn-nbctl lsp-add bob bob16 \ ++-- lsp-set-addresses bob16 "f0:00:00:02:02:06 fd30::4" ++ ++# Router R2 ++# Add a DNAT rule. ++ovn-nbctl -- --id=@nat create nat type="dnat" logical_ip=192.168.1.2 \ ++ external_ip=30.0.0.2 -- add logical_router R2 nat @nat ++ovn-nbctl -- --id=@nat create nat type="dnat" logical_ip='"fd11::2"' \ ++ external_ip='"fd40::2"' -- add logical_router R2 nat @nat ++ ++# Add a SNAT rule ++ovn-nbctl -- --id=@nat create nat type="snat" logical_ip=192.168.1.2 \ ++ external_ip=30.0.0.1 -- add logical_router R2 nat @nat ++ovn-nbctl -- --id=@nat create nat type="snat" logical_ip='"fd11::2"' \ ++ external_ip='"fd40::1"' -- add logical_router R2 nat @nat ++ ++# Router R3 ++# Add a DNAT rule. ++ovn-nbctl -- --id=@nat create nat type="dnat" logical_ip=192.168.1.2 \ ++ external_ip=30.0.0.3 -- add logical_router R3 nat @nat ++ovn-nbctl -- --id=@nat create nat type="dnat" logical_ip='"fd11::2"' \ ++ external_ip='"fd40::3"' -- add logical_router R3 nat @nat ++ ++# Add a SNAT rule ++ovn-nbctl -- --id=@nat create nat type="snat" logical_ip=192.168.2.2 \ ++ external_ip=30.0.0.4 -- add logical_router R3 nat @nat ++ovn-nbctl -- --id=@nat create nat type="snat" logical_ip='"fd12::2"' \ ++ external_ip='"fd40::4"' -- add logical_router R3 nat @nat ++ ++# wait for ovn-controller to catch up. ++ovn-nbctl --wait=hv sync ++OVS_WAIT_UNTIL([ovs-ofctl dump-flows br-int | grep 'nat(src=fd40::4)']) ++OVS_WAIT_UNTIL([ovs-ofctl dump-flows br-int | grep 'nat(src=30.0.0.4)']) ++ ++# North-South DNAT: 'alice1' should be able to ping 'foo1' via 30.0.0.2 ++NS_CHECK_EXEC([alice1], [ping -q -c 3 -i 0.3 -w 2 30.0.0.2 | FORMAT_PING], \ ++[0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++ ++# North-South DNAT: 'alice16' should be able to ping 'foo16' via fd30::2 ++NS_CHECK_EXEC([alice16], [ping -6 -q -c 3 -i 0.3 -w 2 fd40::2 | FORMAT_PING], \ ++[0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++ ++# Check conntrack entries. ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.1.3) | \ ++sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl ++icmp,orig=(src=172.16.1.3,dst=30.0.0.2,id=,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.1.3,id=,type=0,code=0),zone= ++]) ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd30::3) | \ ++sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl ++icmpv6,orig=(src=fd30::3,dst=fd40::2,id=,type=128,code=0),reply=(src=fd11::2,dst=fd30::3,id=,type=129,code=0),zone= ++]) ++ ++# But foo1 should receive traffic from 20.0.0.2 ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(20.0.0.2) | \ ++sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl ++icmp,orig=(src=172.16.1.3,dst=192.168.1.2,id=,type=8,code=0),reply=(src=192.168.1.2,dst=20.0.0.2,id=,type=0,code=0),zone= ++]) ++# But foo16 should receive traffic from fd20::2 ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd20::2) | \ ++sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl ++icmpv6,orig=(src=fd30::3,dst=fd11::2,id=,type=128,code=0),reply=(src=fd11::2,dst=fd20::2,id=,type=129,code=0),zone= ++]) ++ ++# North-South DNAT: 'bob1' should be able to ping 'foo1' via 30.0.0.3 ++NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 30.0.0.3 | FORMAT_PING], \ ++[0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++ ++# North-South DNAT: 'bob16' should be able to ping 'foo16' via fd40::3 ++NS_CHECK_EXEC([bob16], [ping -6 -q -c 3 -i 0.3 -w 2 fd40::3 | FORMAT_PING], \ ++[0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++ ++# Check conntrack entries. ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.1.4) | \ ++sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl ++icmp,orig=(src=172.16.1.4,dst=30.0.0.3,id=,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.1.4,id=,type=0,code=0),zone= ++]) ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd30::4) | \ ++sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl ++icmpv6,orig=(src=fd30::4,dst=fd40::3,id=,type=128,code=0),reply=(src=fd11::2,dst=fd30::4,id=,type=129,code=0),zone= ++]) ++ ++# But foo1 should receive traffic from 20.0.0.3 ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(20.0.0.3) | \ ++sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl ++icmp,orig=(src=172.16.1.4,dst=192.168.1.2,id=,type=8,code=0),reply=(src=192.168.1.2,dst=20.0.0.3,id=,type=0,code=0),zone= ++]) ++ ++# But foo16 should receive traffic from fd20::3 ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd20::3) | \ ++sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl ++icmpv6,orig=(src=fd30::4,dst=fd11::2,id=,type=128,code=0),reply=(src=fd11::2,dst=fd20::3,id=,type=129,code=0),zone= ++]) ++ ++# South-North SNAT: 'bar1' pings 'bob1'. But 'bob1' receives traffic ++# from 30.0.0.4 ++NS_CHECK_EXEC([bar1], [ping -q -c 3 -i 0.3 -w 2 172.16.1.4 | FORMAT_PING], \ ++[0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++# South-North SNAT: 'bar16' pings 'bob16'. But 'bob16' receives traffic ++# from fd40::4 ++NS_CHECK_EXEC([bar16], [ping -6 -q -c 3 -i 0.3 -w 2 fd30::4 | FORMAT_PING], \ ++[0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++ ++# We verify that SNAT indeed happened via 'dump-conntrack' command. ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.4) | \ ++sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl ++icmp,orig=(src=192.168.2.2,dst=172.16.1.4,id=,type=8,code=0),reply=(src=172.16.1.4,dst=30.0.0.4,id=,type=0,code=0),zone= ++]) ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd40::4) | \ ++sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl ++icmpv6,orig=(src=fd12::2,dst=fd30::4,id=,type=128,code=0),reply=(src=fd30::4,dst=fd40::4,id=,type=129,code=0),zone= ++]) ++ ++# South-North SNAT: 'foo1' pings 'alice1'. But 'alice1' receives traffic ++# from 30.0.0.1 ++NS_CHECK_EXEC([foo1], [ping -q -c 3 -i 0.3 -w 2 172.16.1.3 | FORMAT_PING], \ ++[0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++ ++# South-North SNAT: 'foo16' pings 'alice16'. But 'alice16' receives traffic ++# from fd40::1 ++NS_CHECK_EXEC([foo16], [ping -6 -q -c 3 -i 0.3 -w 2 fd30::3 | FORMAT_PING], \ ++[0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++ ++# We verify that SNAT indeed happened via 'dump-conntrack' command. ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) | \ ++sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl ++icmp,orig=(src=192.168.1.2,dst=172.16.1.3,id=,type=8,code=0),reply=(src=172.16.1.3,dst=30.0.0.1,id=,type=0,code=0),zone= ++]) ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd40::1) | \ ++sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl ++icmpv6,orig=(src=fd11::2,dst=fd30::3,id=,type=128,code=0),reply=(src=fd30::3,dst=fd40::1,id=,type=129,code=0),zone= ++]) ++ ++OVS_APP_EXIT_AND_WAIT([ovn-controller]) ++ ++as ovn-sb ++OVS_APP_EXIT_AND_WAIT([ovsdb-server]) ++ ++as ovn-nb ++OVS_APP_EXIT_AND_WAIT([ovsdb-server]) ++ ++as northd ++OVS_APP_EXIT_AND_WAIT([ovn-northd]) ++ ++as ++OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d ++/connection dropped.*/d"]) ++AT_CLEANUP ++ ++ + AT_SETUP([ovn -- load-balancing]) + AT_KEYWORDS([ovnlb]) + +@@ -2405,6 +2722,230 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d + /connection dropped.*/d"]) + AT_CLEANUP + ++AT_SETUP([ovn -- multiple gateway routers, load-balancing - Dual Stack]) ++AT_KEYWORDS([ovnlb]) ++ ++CHECK_CONNTRACK() ++CHECK_CONNTRACK_NAT() ++ovn_start ++OVS_TRAFFIC_VSWITCHD_START() ++ADD_BR([br-int]) ++ ++# Set external-ids in br-int needed for ovn-controller ++ovs-vsctl \ ++ -- set Open_vSwitch . external-ids:system-id=hv1 \ ++ -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ ++ -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ ++ -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ ++ -- set bridge br-int fail-mode=secure other-config:disable-in-band=true ++ ++# Start ovn-controller ++start_daemon ovn-controller ++ ++# Logical network: ++# Three LRs - R1, R2 and R3 that are connected to each other via LS "join" ++# in 20.0.0.0/24 and fd20::/64 networks. R1 has switches foo (192.168.1.0/24 ++# and fd11::/64) and bar (192.168.2.0/24 and fd12::/64) connected to it. R2 ++# has alice (172.16.1.0/24 and fd72::/64) connected to it. R3 has bob ++# (172.16.1.0/24 and fd72::/64) connected to it. Note how both alice and ++# bob have the same subnets behind them. We are trying to simulate external ++# network via those 2 switches. In real world the switch ports of these ++# switches will have addresses set as "unknown" to make them learning switches. ++# Or those switches will be "localnet" ones. ++# ++# foo -- R1 -- join - R2 -- alice ++# | | ++# bar ---- - R3 --- bob ++ ++ovn-nbctl create Logical_Router name=R1 ++ovn-nbctl create Logical_Router name=R2 options:chassis=hv1 ++ovn-nbctl create Logical_Router name=R3 options:chassis=hv1 ++ ++ovn-nbctl ls-add foo ++ovn-nbctl ls-add bar ++ovn-nbctl ls-add alice ++ovn-nbctl ls-add bob ++ovn-nbctl ls-add join ++ ++# Connect foo to R1 ++ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24 fd11::1/64 ++ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \ ++ type=router options:router-port=foo addresses=\"00:00:01:01:02:03\" ++ ++# Connect bar to R1 ++ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 192.168.2.1/24 fd12::1/64 ++ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \ ++ type=router options:router-port=bar addresses=\"00:00:01:01:02:04\" ++ ++# Connect alice to R2 ++ovn-nbctl lrp-add R2 alice 00:00:02:01:02:03 172.16.1.1/24 fd72::1/64 ++ovn-nbctl lsp-add alice rp-alice -- set Logical_Switch_Port rp-alice \ ++ type=router options:router-port=alice addresses=\"00:00:02:01:02:03\" ++ ++# Connect bob to R3 ++ovn-nbctl lrp-add R3 bob 00:00:03:01:02:03 172.16.1.2/24 fd72::2/64 ++ovn-nbctl lsp-add bob rp-bob -- set Logical_Switch_Port rp-bob \ ++ type=router options:router-port=bob addresses=\"00:00:03:01:02:03\" ++ ++# Connect R1 to join ++ovn-nbctl lrp-add R1 R1_join 00:00:04:01:02:03 20.0.0.1/24 fd20::1/64 ++ovn-nbctl lsp-add join r1-join -- set Logical_Switch_Port r1-join \ ++ type=router options:router-port=R1_join addresses='"00:00:04:01:02:03"' ++ ++# Connect R2 to join ++ovn-nbctl lrp-add R2 R2_join 00:00:04:01:02:04 20.0.0.2/24 fd20::2/64 ++ovn-nbctl lsp-add join r2-join -- set Logical_Switch_Port r2-join \ ++ type=router options:router-port=R2_join addresses='"00:00:04:01:02:04"' ++ ++# Connect R3 to join ++ovn-nbctl lrp-add R3 R3_join 00:00:04:01:02:05 20.0.0.3/24 fd20::3/64 ++ovn-nbctl lsp-add join r3-join -- set Logical_Switch_Port r3-join \ ++ type=router options:router-port=R3_join addresses='"00:00:04:01:02:05"' ++ ++# Install static routes with source ip address as the policy for routing. ++# We want traffic from 'foo' to go via R2 and traffic of 'bar' to go via R3. ++ovn-nbctl --policy="src-ip" lr-route-add R1 192.168.1.0/24 20.0.0.2 ++ovn-nbctl --policy="src-ip" lr-route-add R1 192.168.2.0/24 20.0.0.3 ++ovn-nbctl --policy="src-ip" lr-route-add R1 fd11::/64 fd20::2 ++ovn-nbctl --policy="src-ip" lr-route-add R1 fd12::/64 fd20::3 ++ ++# Static routes. ++ovn-nbctl lr-route-add R2 192.168.0.0/16 20.0.0.1 ++ovn-nbctl lr-route-add R3 192.168.0.0/16 20.0.0.1 ++ovn-nbctl lr-route-add R2 fd11::/64 fd20::1 ++ovn-nbctl lr-route-add R2 fd12::/64 fd20::1 ++ovn-nbctl lr-route-add R3 fd11::/64 fd20::1 ++ovn-nbctl lr-route-add R3 fd12::/64 fd20::1 ++ ++# For gateway routers R2 and R3, set a force SNAT rule. ++ovn-nbctl set logical_router R2 options:lb_force_snat_ip="20.0.0.2 fd20::2" ++ovn-nbctl set logical_router R3 options:lb_force_snat_ip="20.0.0.3 fd20::3" ++ ++# Logical port 'foo1' in switch 'foo'. ++ADD_NAMESPACES(foo1) ++ADD_VETH(foo1, foo1, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \ ++ "192.168.1.1") ++ovn-nbctl lsp-add foo foo1 \ ++-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2" ++ ++# Logical port 'foo16' in switch 'foo'. ++ADD_NAMESPACES(foo16) ++ADD_VETH(foo16, foo16, br-int, "fd11::2/64", "f0:00:06:01:02:03", \ ++ "fd11::1") ++ovn-nbctl lsp-add foo foo16 \ ++-- lsp-set-addresses foo16 "f0:00:06:01:02:03 fd11::2" ++ ++# Logical port 'alice1' in switch 'alice'. ++ADD_NAMESPACES(alice1) ++ADD_VETH(alice1, alice1, br-int, "172.16.1.3/24", "f0:00:00:01:02:04", \ ++ "172.16.1.1") ++ovn-nbctl lsp-add alice alice1 \ ++-- lsp-set-addresses alice1 "f0:00:00:01:02:04 172.16.1.3" ++ ++# Logical port 'alice16' in switch 'alice'. ++ADD_NAMESPACES(alice16) ++ADD_VETH(alice16, alice16, br-int, "fd72::3/64", "f0:00:06:01:02:04", \ ++ "fd72::1") ++ovn-nbctl lsp-add alice alice16 \ ++-- lsp-set-addresses alice16 "f0:00:06:01:02:04 fd72::3" ++ ++# Logical port 'bar1' in switch 'bar'. ++ADD_NAMESPACES(bar1) ++ADD_VETH(bar1, bar1, br-int, "192.168.2.2/24", "f0:00:00:01:02:05", \ ++"192.168.2.1") ++ovn-nbctl lsp-add bar bar1 \ ++-- lsp-set-addresses bar1 "f0:00:00:01:02:05 192.168.2.2" ++ ++# Logical port 'bar16' in switch 'bar'. ++ADD_NAMESPACES(bar16) ++ADD_VETH(bar16, bar16, br-int, "fd12::2/64", "f0:00:06:01:02:05", \ ++"fd12::1") ++ovn-nbctl lsp-add bar bar16 \ ++-- lsp-set-addresses bar16 "f0:00:06:01:02:05 fd12::2" ++ ++# Logical port 'bob1' in switch 'bob'. ++ADD_NAMESPACES(bob1) ++ADD_VETH(bob1, bob1, br-int, "172.16.1.4/24", "f0:00:00:01:02:06", \ ++ "172.16.1.2") ++ovn-nbctl lsp-add bob bob1 \ ++-- lsp-set-addresses bob1 "f0:00:00:01:02:06 172.16.1.4" ++ ++# Logical port 'bob16' in switch 'bob'. ++ADD_NAMESPACES(bob16) ++ADD_VETH(bob16, bob16, br-int, "fd72::4/64", "f0:00:06:01:02:06", \ ++ "fd72::2") ++ovn-nbctl lsp-add bob bob16 \ ++-- lsp-set-addresses bob16 "f0:00:06:01:02:06 fd72::4" ++ ++# Config OVN load-balancer with a VIP. ++uuid=`ovn-nbctl create load_balancer vips:30.0.0.1="192.168.1.2,192.168.2.2" \ ++vips:\"fd30::1\"=\"fd11::2,fd12::2\"` ++ovn-nbctl set logical_router R2 load_balancer=$uuid ++ovn-nbctl set logical_router R3 load_balancer=$uuid ++ ++# Wait for ovn-controller to catch up. ++ovn-nbctl --wait=hv sync ++OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ ++grep 'nat(dst=192.168.2.2)']) ++OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ ++grep 'nat(dst=fd12::2)']) ++ ++# Start webservers in 'foo1', 'foo16, 'bar1', and 'bar16'. ++OVS_START_L7([foo1], [http]) ++OVS_START_L7([bar1], [http]) ++OVS_START_L7([foo16], [http6]) ++OVS_START_L7([bar16], [http6]) ++ ++dnl Should work with the virtual IP address through NAT ++for i in `seq 1 20`; do ++ echo Request $i ++ NS_CHECK_EXEC([alice1], [wget 30.0.0.1 -t 5 -T 1 --retry-connrefused -v -o wget$i.log]) ++done ++ ++for i in `seq 1 20`; do ++ echo Request ${i}_6 ++ NS_CHECK_EXEC([alice16], [wget http://[[fd30::1]] -t 5 -T 1 --retry-connrefused -v -o wget${i}_6.log]) ++done ++ ++dnl Each server should have at least one connection. ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) | ++sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl ++tcp,orig=(src=172.16.1.3,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.1.3,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=172.16.1.3,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.3,sport=,dport=),zone=,protoinfo=(state=) ++]) ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd30::1) | grep -v fe80 | ++sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl ++tcp,orig=(src=fd72::3,dst=fd30::1,sport=,dport=),reply=(src=fd11::2,dst=fd72::3,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=fd72::3,dst=fd30::1,sport=,dport=),reply=(src=fd12::2,dst=fd72::3,sport=,dport=),zone=,protoinfo=(state=) ++]) ++ ++dnl Force SNAT should have worked. ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(20.0.0) | ++sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl ++tcp,orig=(src=172.16.1.3,dst=192.168.1.2,sport=,dport=),reply=(src=192.168.1.2,dst=20.0.0.2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=172.16.1.3,dst=192.168.2.2,sport=,dport=),reply=(src=192.168.2.2,dst=20.0.0.2,sport=,dport=),zone=,protoinfo=(state=) ++]) ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd20::2) | grep -v fe80 | ++sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl ++tcp,orig=(src=fd72::3,dst=fd11::2,sport=,dport=),reply=(src=fd11::2,dst=fd20::2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=fd72::3,dst=fd12::2,sport=,dport=),reply=(src=fd12::2,dst=fd20::2,sport=,dport=),zone=,protoinfo=(state=) ++]) ++OVS_APP_EXIT_AND_WAIT([ovn-controller]) ++ ++as ovn-sb ++OVS_APP_EXIT_AND_WAIT([ovsdb-server]) ++ ++as ovn-nb ++OVS_APP_EXIT_AND_WAIT([ovsdb-server]) ++ ++as northd ++OVS_APP_EXIT_AND_WAIT([ovn-northd]) ++ ++as ++OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d ++/connection dropped.*/d"]) ++AT_CLEANUP ++ + AT_SETUP([ovn -- load balancing in router with gateway router port]) + AT_KEYWORDS([ovnlb]) + +-- +2.26.2 + diff --git a/SOURCES/0017-ovn-northd-Don-t-send-the-pkt-to-conntrack-if-it-is-.patch b/SOURCES/0017-ovn-northd-Don-t-send-the-pkt-to-conntrack-if-it-is-.patch new file mode 100644 index 0000000..8be01b7 --- /dev/null +++ b/SOURCES/0017-ovn-northd-Don-t-send-the-pkt-to-conntrack-if-it-is-.patch @@ -0,0 +1,187 @@ +From ac218b6e00ca97ddb54362d71435c4ea2d47cfb7 Mon Sep 17 00:00:00 2001 +From: Numan Siddique +Date: Tue, 4 Aug 2020 12:49:34 +0530 +Subject: [PATCH 17/22] ovn-northd: Don't send the pkt to conntrack if it is to + be routed in egress stage. + +If there is a logical port 'P1' with the IP - 10.0.0.3 and a logical port 'P2' with +the IP 20.0.0.3 and if the logical switch of 'P1' has atleast one load balancer +associated with it and atleast one ACL with allow-related action associated with it. +Then for every packet from 'P1' to 'P2' after the TCP connection +is established we see a total of 4 recirculations in the datapath on the chassis +claiming 'P1'. This is because, + +In the ingress logical switch pipeline, below logical flows are hit + - table=9 (ls_in_lb ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv), action=(reg0[2] = 1; next;) + - table=10(ls_in_stateful ), priority=100 , match=(reg0[2] == 1), action=(ct_lb;) + +And in the egress logical switch pipeline, below logical flows are hit + - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[0] = 1; next;) + - table=2 (ls_out_pre_stateful), priority=100 , match=(reg0[0] == 1), action=(ct_next;) + - table=3 (ls_out_lb ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv), action=(reg0[2] = 1; next;) + - table=7 (ls_out_stateful ), priority=100 , match=(reg0[2] == 1), action=(ct_lb;) + +In the above example, when the packet enters the egress pipeline and since it needs to +enter the router pipeline, we can skip setting reg0[0] if outport is peer port of +logical router port. There is no need to send the packet to conntrack in this case. + +This patch handles this case for router ports. Next patch in the series avoids sending to +conntrack with the action - ct_lb if the packet is not destined to the LB VIP. + +With the present master for the above example, we see total of 4 recirculations on the +chassis claiming the lport 'P1'. With this patch we see only 2 recirculations. + +Acked-by: Dumitru Ceara +Signed-off-by: Numan Siddique +--- + northd/ovn-northd.8.xml | 33 ++++++++++++++++++++++++++++++++- + northd/ovn-northd.c | 39 ++++++++++++++++++++++++++++++--------- + 2 files changed, 62 insertions(+), 10 deletions(-) + +diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml +index f35a035fd..e45d494e8 100644 +--- a/northd/ovn-northd.8.xml ++++ b/northd/ovn-northd.8.xml +@@ -338,6 +338,15 @@ + db="OVN_Northbound"/> table. +

            + ++

            ++ This table also has a priority-110 flow with the match ++ inport == I for all logical switch ++ datapaths to move traffic to the next table. Where I ++ is the peer of a logical router port. This flow is added to ++ skip the connection tracking of packets which enter from ++ logical router datapath to logical switch datapath. ++

            ++ +

            Ingress Table 5: Pre-stateful

            + +

            +@@ -505,7 +514,20 @@ + +

            + It contains a priority-0 flow that simply moves traffic to the next +- table. For established connections a priority 100 flow matches on ++ table. ++

            ++ ++

            ++ A priority-65535 flow with the match ++ inport == I for all logical switch ++ datapaths to move traffic to the next table. Where I ++ is the peer of a logical router port. This flow is added to ++ skip the connection tracking of packets which enter from ++ logical router datapath to logical switch datapath. ++

            ++ ++

            ++ For established connections a priority 65534 flow matches on + ct.est && !ct.rel && !ct.new && + !ct.inv and sets an action reg0[2] = 1; next; to act + as a hint for table Stateful to send packets through +@@ -1342,6 +1364,15 @@ output; + db="OVN_Northbound"/> table. +

            + ++

            ++ This table also has a priority-110 flow with the match ++ outport == I for all logical switch ++ datapaths to move traffic to the next table. Where I ++ is the peer of a logical router port. This flow is added to ++ skip the connection tracking of packets which will be entering ++ logical router datapath from logical switch datapath for routing. ++

            ++ +

            Egress Table 2: Pre-stateful

            + +

            +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 1f5433d9d..7b534ce3c 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -4850,8 +4850,9 @@ build_lswitch_output_port_sec(struct hmap *ports, struct hmap *datapaths, + } + + static void +-build_pre_acl_flows(struct ovn_datapath *od, struct ovn_port *op, +- struct hmap *lflows) ++skip_port_from_conntrack(struct ovn_datapath *od, struct ovn_port *op, ++ enum ovn_stage in_stage, enum ovn_stage out_stage, ++ uint16_t priority, struct hmap *lflows) + { + /* Can't use ct() for router ports. Consider the following configuration: + * lp1(10.0.0.2) on hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a +@@ -4867,10 +4868,10 @@ build_pre_acl_flows(struct ovn_datapath *od, struct ovn_port *op, + + ds_put_format(&match_in, "ip && inport == %s", op->json_key); + ds_put_format(&match_out, "ip && outport == %s", op->json_key); +- ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL, 110, ++ ovn_lflow_add_with_hint(lflows, od, in_stage, priority, + ds_cstr(&match_in), "next;", + &op->nbsp->header_); +- ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, ++ ovn_lflow_add_with_hint(lflows, od, out_stage, priority, + ds_cstr(&match_out), "next;", + &op->nbsp->header_); + +@@ -4903,10 +4904,14 @@ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows) + * defragmentation, in order to match L4 headers. */ + if (has_stateful) { + for (size_t i = 0; i < od->n_router_ports; i++) { +- build_pre_acl_flows(od, od->router_ports[i], lflows); ++ skip_port_from_conntrack(od, od->router_ports[i], ++ S_SWITCH_IN_PRE_ACL, S_SWITCH_OUT_PRE_ACL, ++ 110, lflows); + } + for (size_t i = 0; i < od->n_localnet_ports; i++) { +- build_pre_acl_flows(od, od->localnet_ports[i], lflows); ++ skip_port_from_conntrack(od, od->localnet_ports[i], ++ S_SWITCH_IN_PRE_ACL, S_SWITCH_OUT_PRE_ACL, ++ 110, lflows); + } + + /* Ingress and Egress Pre-ACL Table (Priority 110). +@@ -5050,6 +5055,17 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;"); + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;"); + ++ for (size_t i = 0; i < od->n_router_ports; i++) { ++ skip_port_from_conntrack(od, od->router_ports[i], ++ S_SWITCH_IN_PRE_LB, S_SWITCH_OUT_PRE_LB, ++ 110, lflows); ++ } ++ for (size_t i = 0; i < od->n_localnet_ports; i++) { ++ skip_port_from_conntrack(od, od->localnet_ports[i], ++ S_SWITCH_IN_PRE_LB, S_SWITCH_OUT_PRE_LB, ++ 110, lflows); ++ } ++ + struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4); + struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6); + bool vip_configured = false; +@@ -5725,13 +5741,18 @@ build_lb(struct ovn_datapath *od, struct hmap *lflows) + ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;"); + + if (od->nbs->load_balancer) { +- /* Ingress and Egress LB Table (Priority 65535). ++ for (size_t i = 0; i < od->n_router_ports; i++) { ++ skip_port_from_conntrack(od, od->router_ports[i], ++ S_SWITCH_IN_LB, S_SWITCH_OUT_LB, ++ UINT16_MAX, lflows); ++ } ++ /* Ingress and Egress LB Table (Priority 65534). + * + * Send established traffic through conntrack for just NAT. */ +- ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX, ++ ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX - 1, + "ct.est && !ct.rel && !ct.new && !ct.inv", + REGBIT_CONNTRACK_NAT" = 1; next;"); +- ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX, ++ ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX - 1, + "ct.est && !ct.rel && !ct.new && !ct.inv", + REGBIT_CONNTRACK_NAT" = 1; next;"); + } +-- +2.26.2 + diff --git a/SOURCES/0018-ovn-northd-Don-t-send-the-pkt-to-conntrack-for-NAT-i.patch b/SOURCES/0018-ovn-northd-Don-t-send-the-pkt-to-conntrack-for-NAT-i.patch new file mode 100644 index 0000000..17307a3 --- /dev/null +++ b/SOURCES/0018-ovn-northd-Don-t-send-the-pkt-to-conntrack-for-NAT-i.patch @@ -0,0 +1,475 @@ +From ad27f022b9bda5364f34c611ba116e0e18269f2c Mon Sep 17 00:00:00 2001 +From: Numan Siddique +Date: Tue, 4 Aug 2020 12:49:48 +0530 +Subject: [PATCH 18/22] ovn-northd: Don't send the pkt to conntrack for NAT if + its not destined for LB VIP. + +Presently when a logical switch has load balancer(s) associated to it, then the +packet is still sent to conntrack with the action ct_lb on both the ingress +and egress logical switch pipeline even if the destination IP is not LB VIP. + +This is because below logical flows are hit: + +In the ingress logical switch pipeline: + - table=9 (ls_in_lb ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv), action=(reg0[2] = 1; next;) + - table=10(ls_in_stateful ), priority=100 , match=(reg0[2] == 1), action=(ct_lb;) + +In the egress logical switch pipeline: + - table=3 (ls_out_lb ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv), action=(reg0[2] = 1; next;) + - table=7 (ls_out_stateful), priority=100 , match=(reg0[2] == 1), action=(ct_lb;) + +This patch avoid unnecessary ct actions by setting the ct_label.natted to 1 when the ct_lb(backends=...) action +is applied for NEW connections and updating the above logical flows to check for this mark: + + - table=9 (ls_in_lb), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct_label.natted == 1), + action=(reg0[2] = 1; next;) + + - table=3 (ls_out_lb), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct_label.natted == 1), + action=(reg0[2] = 1; next;) + +Acked-by: Dumitru Ceara +Signed-off-by: Numan Siddique +--- + lib/actions.c | 3 +- + lib/logical-fields.c | 6 +- + northd/ovn-northd.c | 6 +- + tests/ovn.at | 17 +++--- + tests/system-ovn.at | 130 +++++++++++++++++++++---------------------- + 5 files changed, 84 insertions(+), 78 deletions(-) + +diff --git a/lib/actions.c b/lib/actions.c +index 245486b0a..8c11b7b3f 100644 +--- a/lib/actions.c ++++ b/lib/actions.c +@@ -1098,7 +1098,8 @@ encode_CT_LB(const struct ovnact_ct_lb *cl, + if (dst->port) { + ds_put_format(&ds, ":%"PRIu16, dst->port); + } +- ds_put_format(&ds, "),commit,table=%d,zone=NXM_NX_REG%d[0..15])", ++ ds_put_format(&ds, "),commit,table=%d,zone=NXM_NX_REG%d[0..15]," ++ "exec(set_field:2/2->ct_label))", + recirc_table, zone_reg); + } + +diff --git a/lib/logical-fields.c b/lib/logical-fields.c +index 15342dded..bf61df771 100644 +--- a/lib/logical-fields.c ++++ b/lib/logical-fields.c +@@ -126,10 +126,12 @@ ovn_init_symtab(struct shash *symtab) + expr_symtab_add_field_scoped(symtab, "ct_mark", MFF_CT_MARK, NULL, false, + WR_CT_COMMIT); + +- expr_symtab_add_field_scoped(symtab, "ct_label", MFF_CT_LABEL, NULL, false, +- WR_CT_COMMIT); ++ expr_symtab_add_field_scoped(symtab, "ct_label", MFF_CT_LABEL, NULL, ++ false, WR_CT_COMMIT); + expr_symtab_add_subfield_scoped(symtab, "ct_label.blocked", NULL, + "ct_label[0]", WR_CT_COMMIT); ++ expr_symtab_add_subfield_scoped(symtab, "ct_label.natted", NULL, ++ "ct_label[1]", WR_CT_COMMIT); + expr_symtab_add_subfield_scoped(symtab, "ct_label.ecmp_reply_eth", NULL, + "ct_label[32..79]", WR_CT_COMMIT); + expr_symtab_add_subfield_scoped(symtab, "ct_label.ecmp_reply_port", NULL, +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 7b534ce3c..5f0abeee1 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -5750,10 +5750,12 @@ build_lb(struct ovn_datapath *od, struct hmap *lflows) + * + * Send established traffic through conntrack for just NAT. */ + ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX - 1, +- "ct.est && !ct.rel && !ct.new && !ct.inv", ++ "ct.est && !ct.rel && !ct.new && !ct.inv && " ++ "ct_label.natted == 1", + REGBIT_CONNTRACK_NAT" = 1; next;"); + ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX - 1, +- "ct.est && !ct.rel && !ct.new && !ct.inv", ++ "ct.est && !ct.rel && !ct.new && !ct.inv && " ++ "ct_label.natted == 1", + REGBIT_CONNTRACK_NAT" = 1; next;"); + } + } +diff --git a/tests/ovn.at b/tests/ovn.at +index 2651b3eac..0d99adf3f 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -194,6 +194,7 @@ ct_label = NXM_NX_CT_LABEL + ct_label.blocked = ct_label[0] + ct_label.ecmp_reply_eth = ct_label[32..79] + ct_label.ecmp_reply_port = ct_label[80..95] ++ct_label.natted = ct_label[1] + ct_mark = NXM_NX_CT_MARK + ct_state = NXM_NX_CT_STATE + ]]) +@@ -996,17 +997,17 @@ ct_lb(192.168.1.2:80, 192.168.1.3:80); + Syntax error at `192.168.1.2' expecting backends. + ct_lb(backends=192.168.1.2:80,192.168.1.3:80); + encodes as group:1 +- uses group: id(1), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2:80),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15])) ++ uses group: id(1), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2:80),commit,table=19,zone=NXM_NX_REG13[0..15],exec(set_field:2/2->ct_label)),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15],exec(set_field:2/2->ct_label))) + has prereqs ip + ct_lb(backends=192.168.1.2, 192.168.1.3, ); + formats as ct_lb(backends=192.168.1.2,192.168.1.3); + encodes as group:2 +- uses group: id(2), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3),commit,table=19,zone=NXM_NX_REG13[0..15])) ++ uses group: id(2), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2),commit,table=19,zone=NXM_NX_REG13[0..15],exec(set_field:2/2->ct_label)),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3),commit,table=19,zone=NXM_NX_REG13[0..15],exec(set_field:2/2->ct_label))) + has prereqs ip + ct_lb(backends=fd0f::2, fd0f::3, ); + formats as ct_lb(backends=fd0f::2,fd0f::3); + encodes as group:3 +- uses group: id(3), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15])) ++ uses group: id(3), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15],exec(set_field:2/2->ct_label)),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15],exec(set_field:2/2->ct_label))) + has prereqs ip + + ct_lb(backends=192.168.1.2:); +@@ -1022,23 +1023,23 @@ ct_lb(backends=192.168.1.2:80,192.168.1.3:80; hash_fields=eth_src,eth_dst,ip_src + Syntax error at `eth_src' invalid hash_fields. + ct_lb(backends=192.168.1.2:80,192.168.1.3:80; hash_fields="eth_src,eth_dst,ip_src"); + encodes as group:4 +- uses group: id(4), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2:80),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15])) ++ uses group: id(4), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2:80),commit,table=19,zone=NXM_NX_REG13[0..15],exec(set_field:2/2->ct_label)),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15],exec(set_field:2/2->ct_label))) + has prereqs ip + ct_lb(backends=fd0f::2,fd0f::3; hash_fields="eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst"); + encodes as group:5 +- uses group: id(5), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15])) ++ uses group: id(5), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15],exec(set_field:2/2->ct_label)),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15],exec(set_field:2/2->ct_label))) + has prereqs ip + ct_lb(backends=fd0f::2,fd0f::3; hash_fields="eth_src,eth_dst,ip_src,ip_dst,tcp_src,tcp_dst"); + encodes as group:6 +- uses group: id(6), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src,ip_dst,tcp_src,tcp_dst),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15])) ++ uses group: id(6), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src,ip_dst,tcp_src,tcp_dst),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15],exec(set_field:2/2->ct_label)),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15],exec(set_field:2/2->ct_label))) + has prereqs ip + ct_lb(backends=fd0f::2,fd0f::3; hash_fields="eth_src,eth_dst,ip_src,ip_dst,udp_src,udp_dst"); + encodes as group:7 +- uses group: id(7), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src,ip_dst,udp_src,udp_dst),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15])) ++ uses group: id(7), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src,ip_dst,udp_src,udp_dst),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15],exec(set_field:2/2->ct_label)),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15],exec(set_field:2/2->ct_label))) + has prereqs ip + ct_lb(backends=fd0f::2,fd0f::3; hash_fields="eth_src,eth_dst,ip_src,ip_dst,sctp_src,sctp_dst"); + encodes as group:8 +- uses group: id(8), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src,ip_dst,sctp_src,sctp_dst),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15])) ++ uses group: id(8), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src,ip_dst,sctp_src,sctp_dst),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15],exec(set_field:2/2->ct_label)),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15],exec(set_field:2/2->ct_label))) + has prereqs ip + + # ct_next +diff --git a/tests/system-ovn.at b/tests/system-ovn.at +index 0d478b4aa..d0ffb5c1a 100644 +--- a/tests/system-ovn.at ++++ b/tests/system-ovn.at +@@ -1441,9 +1441,9 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) | \ + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=172.16.1.2,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=172.16.1.3,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=172.16.1.4,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=172.16.1.2,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=172.16.1.3,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=172.16.1.4,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + dnl Should work with the virtual IP 30.0.0.3 address through NAT +@@ -1455,9 +1455,9 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.3) | \ + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=192.168.1.2,dst=30.0.0.3,sport=,dport=),reply=(src=172.16.1.2,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=192.168.1.2,dst=30.0.0.3,sport=,dport=),reply=(src=172.16.1.3,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=192.168.1.2,dst=30.0.0.3,sport=,dport=),reply=(src=172.16.1.4,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.3,sport=,dport=),reply=(src=172.16.1.2,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.3,sport=,dport=),reply=(src=172.16.1.3,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.3,sport=,dport=),reply=(src=172.16.1.4,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + dnl Test load-balancing that includes L4 ports in NAT. +@@ -1469,9 +1469,9 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.2) | \ + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=172.16.1.2,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=172.16.1.3,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=172.16.1.4,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=172.16.1.2,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=172.16.1.3,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=172.16.1.4,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + # Configure selection_fields. +@@ -1492,9 +1492,9 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.2) | \ + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=172.16.1.2,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=172.16.1.3,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=172.16.1.4,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=172.16.1.2,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=172.16.1.3,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=172.16.1.4,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +@@ -1687,9 +1687,9 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd03::1) | grep -v fe80 | \ + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=fd01::2,dst=fd03::1,sport=,dport=),reply=(src=fd02::2,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd01::2,dst=fd03::1,sport=,dport=),reply=(src=fd02::3,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd01::2,dst=fd03::1,sport=,dport=),reply=(src=fd02::4,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::1,sport=,dport=),reply=(src=fd02::2,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::1,sport=,dport=),reply=(src=fd02::3,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::1,sport=,dport=),reply=(src=fd02::4,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + dnl Should work with the virtual IP fd03::3 address through NAT +@@ -1701,9 +1701,9 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd03::3) | grep -v fe80 | \ + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=fd01::2,dst=fd03::3,sport=,dport=),reply=(src=fd02::2,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd01::2,dst=fd03::3,sport=,dport=),reply=(src=fd02::3,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd01::2,dst=fd03::3,sport=,dport=),reply=(src=fd02::4,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::3,sport=,dport=),reply=(src=fd02::2,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::3,sport=,dport=),reply=(src=fd02::3,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::3,sport=,dport=),reply=(src=fd02::4,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + dnl Test load-balancing that includes L4 ports in NAT. +@@ -1715,9 +1715,9 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd03::2) | grep -v fe80 | \ + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd02::2,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd02::3,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd02::4,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd02::2,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd02::3,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd02::4,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + # Configure selection_fields. +@@ -1738,9 +1738,9 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd03::2) | grep -v fe80 | \ + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd02::2,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd02::3,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd02::4,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd02::2,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd02::3,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd02::4,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +@@ -1884,9 +1884,9 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) | \ + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.1.3,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.1.4,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.1.5,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.1.3,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.1.4,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.1.5,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + dnl Test load-balancing that includes L4 ports in NAT. +@@ -1898,9 +1898,9 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.2) | \ + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=192.168.1.3,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=192.168.1.4,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=192.168.1.5,dst=192.168.1.2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=192.168.1.3,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=192.168.1.4,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=192.168.1.5,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + +@@ -1993,9 +1993,9 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd03::1) | grep -v fe80 | \ + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=fd01::2,dst=fd03::1,sport=,dport=),reply=(src=fd01::3,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd01::2,dst=fd03::1,sport=,dport=),reply=(src=fd01::4,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd01::2,dst=fd03::1,sport=,dport=),reply=(src=fd01::5,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::1,sport=,dport=),reply=(src=fd01::3,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::1,sport=,dport=),reply=(src=fd01::4,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::1,sport=,dport=),reply=(src=fd01::5,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + dnl Test load-balancing that includes L4 ports in NAT. +@@ -2007,9 +2007,9 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd03::2) | grep -v fe80 | \ + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd01::3,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd01::4,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd01::5,dst=fd01::2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd01::3,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd01::4,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd01::2,dst=fd03::2,sport=,dport=),reply=(src=fd01::5,dst=fd01::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + +@@ -2145,8 +2145,8 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) | + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=172.16.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.1.2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=172.16.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=172.16.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=172.16.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + dnl Test load-balancing that includes L4 ports in NAT. +@@ -2158,8 +2158,8 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.2) | + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=172.16.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.1.2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=172.16.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=172.16.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=172.16.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + check_est_flows () { +@@ -2200,8 +2200,8 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.2) | + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=172.16.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.1.2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=172.16.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=172.16.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=172.16.1.2,dst=30.0.0.2,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(20.0.0.2) | +@@ -2349,8 +2349,8 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd30::1) | grep -v fe80 | + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=fd72::2,dst=fd30::1,sport=,dport=),reply=(src=fd11::2,dst=fd72::2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd72::2,dst=fd30::1,sport=,dport=),reply=(src=fd12::2,dst=fd72::2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=fd72::2,dst=fd30::1,sport=,dport=),reply=(src=fd11::2,dst=fd72::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd72::2,dst=fd30::1,sport=,dport=),reply=(src=fd12::2,dst=fd72::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + dnl Test load-balancing that includes L4 ports in NAT. +@@ -2362,8 +2362,8 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd30::2) | grep -v fe80 | + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=fd72::2,dst=fd30::2,sport=,dport=),reply=(src=fd11::2,dst=fd72::2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd72::2,dst=fd30::2,sport=,dport=),reply=(src=fd12::2,dst=fd72::2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=fd72::2,dst=fd30::2,sport=,dport=),reply=(src=fd11::2,dst=fd72::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd72::2,dst=fd30::2,sport=,dport=),reply=(src=fd12::2,dst=fd72::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + OVS_APP_EXIT_AND_WAIT([ovn-controller]) +@@ -2525,8 +2525,8 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) | + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=172.16.1.3,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.1.3,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=172.16.1.3,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.3,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=172.16.1.3,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.1.3,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=172.16.1.3,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.3,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + dnl Force SNAT should have worked. +@@ -2696,8 +2696,8 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd30::1) | grep -v fe80 | + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=fd72::3,dst=fd30::1,sport=,dport=),reply=(src=fd11::2,dst=fd72::3,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd72::3,dst=fd30::1,sport=,dport=),reply=(src=fd12::2,dst=fd72::3,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=fd72::3,dst=fd30::1,sport=,dport=),reply=(src=fd11::2,dst=fd72::3,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd72::3,dst=fd30::1,sport=,dport=),reply=(src=fd12::2,dst=fd72::3,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + dnl Force SNAT should have worked. +@@ -2910,13 +2910,13 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) | + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=172.16.1.3,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.1.3,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=172.16.1.3,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.3,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=172.16.1.3,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.1.3,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=172.16.1.3,dst=30.0.0.1,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.3,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd30::1) | grep -v fe80 | + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=fd72::3,dst=fd30::1,sport=,dport=),reply=(src=fd11::2,dst=fd72::3,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd72::3,dst=fd30::1,sport=,dport=),reply=(src=fd12::2,dst=fd72::3,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=fd72::3,dst=fd30::1,sport=,dport=),reply=(src=fd11::2,dst=fd72::3,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd72::3,dst=fd30::1,sport=,dport=),reply=(src=fd12::2,dst=fd72::3,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + dnl Force SNAT should have worked. +@@ -3054,8 +3054,8 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.1.10) | + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=172.16.1.2,dst=172.16.1.10,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.1.2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=172.16.1.2,dst=172.16.1.10,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=172.16.1.2,dst=172.16.1.10,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=172.16.1.2,dst=172.16.1.10,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + dnl Test load-balancing that includes L4 ports in NAT. +@@ -3067,8 +3067,8 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.1.11) | + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=172.16.1.2,dst=172.16.1.11,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.1.2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=172.16.1.2,dst=172.16.1.11,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=172.16.1.2,dst=172.16.1.11,sport=,dport=),reply=(src=192.168.1.2,dst=172.16.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=172.16.1.2,dst=172.16.1.11,sport=,dport=),reply=(src=192.168.2.2,dst=172.16.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + OVS_APP_EXIT_AND_WAIT([ovn-controller]) +@@ -3195,8 +3195,8 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd72::10) | grep -v fe80 | + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=fd72::2,dst=fd72::10,sport=,dport=),reply=(src=fd01::2,dst=fd72::2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd72::2,dst=fd72::10,sport=,dport=),reply=(src=fd02::2,dst=fd72::2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=fd72::2,dst=fd72::10,sport=,dport=),reply=(src=fd01::2,dst=fd72::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd72::2,dst=fd72::10,sport=,dport=),reply=(src=fd02::2,dst=fd72::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + dnl Test load-balancing that includes L4 ports in NAT. +@@ -3208,8 +3208,8 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd72::11) | grep -v fe80 | + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=fd72::2,dst=fd72::11,sport=,dport=),reply=(src=fd01::2,dst=fd72::2,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=fd72::2,dst=fd72::11,sport=,dport=),reply=(src=fd02::2,dst=fd72::2,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=fd72::2,dst=fd72::11,sport=,dport=),reply=(src=fd01::2,dst=fd72::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=fd72::2,dst=fd72::11,sport=,dport=),reply=(src=fd02::2,dst=fd72::2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + OVS_APP_EXIT_AND_WAIT([ovn-controller]) +@@ -4207,8 +4207,8 @@ done + dnl Each server should have at least one connection. + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.0.0.10) | \ + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=10.0.0.4,dst=10.0.0.10,sport=,dport=),reply=(src=10.0.0.3,dst=10.0.0.4,sport=,dport=),zone=,protoinfo=(state=) +-tcp,orig=(src=10.0.0.4,dst=10.0.0.10,sport=,dport=),reply=(src=20.0.0.3,dst=10.0.0.4,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=10.0.0.4,dst=10.0.0.10,sport=,dport=),reply=(src=10.0.0.3,dst=10.0.0.4,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) ++tcp,orig=(src=10.0.0.4,dst=10.0.0.10,sport=,dport=),reply=(src=20.0.0.3,dst=10.0.0.4,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + # Stop webserer in sw0-p1 +@@ -4232,7 +4232,7 @@ done + + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.0.0.10) | \ + sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl +-tcp,orig=(src=10.0.0.4,dst=10.0.0.10,sport=,dport=),reply=(src=20.0.0.3,dst=10.0.0.4,sport=,dport=),zone=,protoinfo=(state=) ++tcp,orig=(src=10.0.0.4,dst=10.0.0.10,sport=,dport=),reply=(src=20.0.0.3,dst=10.0.0.4,sport=,dport=),zone=,labels=0x2,protoinfo=(state=) + ]) + + # Create udp load balancer. +-- +2.26.2 + diff --git a/SOURCES/0019-ovsdb-idl-Add-function-to-reset-min_index.patch b/SOURCES/0019-ovsdb-idl-Add-function-to-reset-min_index.patch new file mode 100644 index 0000000..e85d3ac --- /dev/null +++ b/SOURCES/0019-ovsdb-idl-Add-function-to-reset-min_index.patch @@ -0,0 +1,68 @@ +From 3ad242eef272a8cc31a18a1b047ba7d54e861b0d Mon Sep 17 00:00:00 2001 +From: Mark Michelson +Date: Fri, 1 May 2020 15:13:08 -0400 +Subject: [PATCH 19/22] ovsdb-idl: Add function to reset min_index. + +If an administrator removes all of the databases in a cluster from +disk, then ovsdb IDL clients will have a problem. The databases will all +reset their stored indexes to 0, so The IDL client's min_index will be +higher than the indexes of all databases in the cluster. This results in +the client constantly connecting to databases, detecting the data as +"stale", and then attempting to connect to another. + +This function provides a way to reset the IDL to an initial state with +min_index of 0. This way, the client will not wrongly detect the +database data as stale and will recover properly. + +Notice that this function is not actually used anywhere in this patch. +This will be used by OVN, though, since OVN is the primary user of +clustered OVSDB. + +Signed-off-by: Mark Michelson +Acked-by: Han Zhou +Signed-off-by: Ilya Maximets + +(cherry-picked from upstream ovs commit 89b522aee379f7ebd21ec67ffb622118af7e9db1) + +Change-Id: I943ece9a07566a34b11248455cc1abbe7892d4e8 +--- + openvswitch-2.13.0/lib/ovsdb-idl.c | 10 ++++++++++ + openvswitch-2.13.0/lib/ovsdb-idl.h | 1 + + 2 files changed, 11 insertions(+) + +diff --git a/openvswitch-2.13.0/lib/ovsdb-idl.c b/openvswitch-2.13.0/lib/ovsdb-idl.c +index 8eb421366..648c227d6 100644 +--- a/openvswitch-2.13.0/lib/ovsdb-idl.c ++++ b/openvswitch-2.13.0/lib/ovsdb-idl.c +@@ -561,6 +561,16 @@ ovsdb_idl_set_shuffle_remotes(struct ovsdb_idl *idl, bool shuffle) + idl->shuffle_remotes = shuffle; + } + ++/* Reset min_index to 0. This prevents a situation where the client ++ * thinks all databases have stale data, when they actually have all ++ * been destroyed and rebuilt from scratch. ++ */ ++void ++ovsdb_idl_reset_min_index(struct ovsdb_idl *idl) ++{ ++ idl->min_index = 0; ++} ++ + static void + ovsdb_idl_db_destroy(struct ovsdb_idl_db *db) + { +diff --git a/openvswitch-2.13.0/lib/ovsdb-idl.h b/openvswitch-2.13.0/lib/ovsdb-idl.h +index 9f12ce320..c56cd19b1 100644 +--- a/openvswitch-2.13.0/lib/ovsdb-idl.h ++++ b/openvswitch-2.13.0/lib/ovsdb-idl.h +@@ -64,6 +64,7 @@ struct ovsdb_idl *ovsdb_idl_create_unconnected( + const struct ovsdb_idl_class *, bool monitor_everything_by_default); + void ovsdb_idl_set_remote(struct ovsdb_idl *, const char *, bool); + void ovsdb_idl_set_shuffle_remotes(struct ovsdb_idl *, bool); ++void ovsdb_idl_reset_min_index(struct ovsdb_idl *); + void ovsdb_idl_destroy(struct ovsdb_idl *); + + void ovsdb_idl_set_leader_only(struct ovsdb_idl *, bool leader_only); +-- +2.26.2 + diff --git a/SOURCES/0020-Add-northd-and-ovn-controller-cluster-status-reset-c.patch b/SOURCES/0020-Add-northd-and-ovn-controller-cluster-status-reset-c.patch new file mode 100644 index 0000000..73d6138 --- /dev/null +++ b/SOURCES/0020-Add-northd-and-ovn-controller-cluster-status-reset-c.patch @@ -0,0 +1,246 @@ +From 8e629246fead7a5dfee78ca45627a0cfd7fef4b1 Mon Sep 17 00:00:00 2001 +From: Mark Michelson +Date: Wed, 10 Jun 2020 14:50:06 -0400 +Subject: [PATCH 20/22] Add northd and ovn-controller cluster status reset + commands. + +During the course of debugging a clustered DB environment, all members +of the southbound database cluster were destroyed (i.e. the .db files +were removed from disk) and then restarted. Once this happened, +ovn-northd and ovn-controller could not interact with the southbound +database because they both detected all members of the cluster as having +"stale" data. The only course of action was to reset ovn-northd and all +ovn-controllers. It is possible to have this happen with the northbound +database as well if it is clustered. + +This patch offers new ovn-appctl commands for ovn-northd and +ovn-controller that allows for it to reset its clustered status. This +allows for it to interact with the database successfully after a cluster +teardown and restart. + +Signed-off-by: Mark Michelson +Acked-by: Han Zhou +Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1829109 + +(cherry-picked from upstream ovn master commit 512b884dea3f85791eca44fd1d92956e8282be6d) + +Change-Id: I3e7d844d6f79552fd53a018c74b80def6069edcb +--- + controller/ovn-controller.8.xml | 16 ++++++++++++++++ + controller/ovn-controller.c | 30 ++++++++++++++++++++++++++--- + northd/ovn-northd.8.xml | 28 +++++++++++++++++++++++++++ + northd/ovn-northd.c | 34 +++++++++++++++++++++++++++++++++ + 4 files changed, 105 insertions(+), 3 deletions(-) + +diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml +index 92e0a6e43..66877314c 100644 +--- a/controller/ovn-controller.8.xml ++++ b/controller/ovn-controller.8.xml +@@ -491,6 +491,22 @@ + recomputes are cpu intensive. +

            + ++ ++
            sb-cluster-state-reset
            ++
            ++

            ++ Reset southbound database cluster status when databases are destroyed ++ and rebuilt. ++

            ++

            ++ If all databases in a clustered southbound database are removed from ++ disk, then the stored index of all databases will be reset to zero. ++ This will cause ovn-controller to be unable to read or write to the ++ southbound database, because it will always detect the data as stale. ++ In such a case, run this command so that ovn-controller will reset its ++ local index so that it can interact with the southbound database again. ++

            ++
            +
            +

            + +diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c +index 85e58d04f..fe6048153 100644 +--- a/controller/ovn-controller.c ++++ b/controller/ovn-controller.c +@@ -72,6 +72,7 @@ static unixctl_cb_func ct_zone_list; + static unixctl_cb_func extend_table_list; + static unixctl_cb_func inject_pkt; + static unixctl_cb_func engine_recompute_cmd; ++static unixctl_cb_func cluster_state_reset_cmd; + + #define DEFAULT_BRIDGE_NAME "br-int" + #define DEFAULT_PROBE_INTERVAL_MSEC 5000 +@@ -445,7 +446,7 @@ get_ofctrl_probe_interval(struct ovsdb_idl *ovs_idl) + * updates 'sbdb_idl' with that pointer. */ + static void + update_sb_db(struct ovsdb_idl *ovs_idl, struct ovsdb_idl *ovnsb_idl, +- bool *monitor_all_p) ++ bool *monitor_all_p, bool *reset_ovnsb_idl_min_index) + { + const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(ovs_idl); + if (!cfg) { +@@ -475,6 +476,12 @@ update_sb_db(struct ovsdb_idl *ovs_idl, struct ovsdb_idl *ovnsb_idl, + if (monitor_all_p) { + *monitor_all_p = monitor_all; + } ++ if (*reset_ovnsb_idl_min_index) { ++ VLOG_INFO("Resetting southbound database cluster state"); ++ engine_set_force_recompute(true); ++ ovsdb_idl_reset_min_index(ovnsb_idl); ++ *reset_ovnsb_idl_min_index = false; ++ } + } + + static void +@@ -2287,6 +2294,11 @@ main(int argc, char *argv[]) + unixctl_command_register("recompute", "", 0, 0, engine_recompute_cmd, + NULL); + ++ bool reset_ovnsb_idl_min_index = false; ++ unixctl_command_register("sb-cluster-state-reset", "", 0, 0, ++ cluster_state_reset_cmd, ++ &reset_ovnsb_idl_min_index); ++ + unsigned int ovs_cond_seqno = UINT_MAX; + unsigned int ovnsb_cond_seqno = UINT_MAX; + +@@ -2308,7 +2320,8 @@ main(int argc, char *argv[]) + ovs_cond_seqno = new_ovs_cond_seqno; + } + +- update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl, &sb_monitor_all); ++ update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl, &sb_monitor_all, ++ &reset_ovnsb_idl_min_index); + update_ssl_config(ovsrec_ssl_table_get(ovs_idl_loop.idl)); + ofctrl_set_probe_interval(get_ofctrl_probe_interval(ovs_idl_loop.idl)); + +@@ -2558,7 +2571,7 @@ main(int argc, char *argv[]) + if (!restart) { + bool done = !ovsdb_idl_has_ever_connected(ovnsb_idl_loop.idl); + while (!done) { +- update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl, NULL); ++ update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl, NULL, false); + update_ssl_config(ovsrec_ssl_table_get(ovs_idl_loop.idl)); + + struct ovsdb_idl_txn *ovs_idl_txn +@@ -2780,3 +2793,14 @@ engine_recompute_cmd(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED, + poll_immediate_wake(); + unixctl_command_reply(conn, NULL); + } ++ ++static void ++cluster_state_reset_cmd(struct unixctl_conn *conn, int argc OVS_UNUSED, ++ const char *argv[] OVS_UNUSED, void *idl_reset_) ++{ ++ bool *idl_reset = idl_reset_; ++ ++ *idl_reset = true; ++ poll_immediate_wake(); ++ unixctl_command_reply(conn, NULL); ++} +diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml +index e45d494e8..989e3643b 100644 +--- a/northd/ovn-northd.8.xml ++++ b/northd/ovn-northd.8.xml +@@ -96,6 +96,34 @@ + acquired OVSDB lock on SB DB, "standby" if it has not or "paused" if + this instance is paused. + ++ ++
            sb-cluster-state-reset
            ++
            ++

            ++ Reset southbound database cluster status when databases are destroyed ++ and rebuilt. ++

            ++

            ++ If all databases in a clustered southbound database are removed from ++ disk, then the stored index of all databases will be reset to zero. ++ This will cause ovn-northd to be unable to read or write to the ++ southbound database, because it will always detect the data as stale. ++ In such a case, run this command so that ovn-northd will reset its ++ local index so that it can interact with the southbound database again. ++

            ++
            ++ ++
            nb-cluster-state-reset
            ++
            ++

            ++ Reset northbound database cluster status when databases are destroyed ++ and rebuilt. ++

            ++

            ++ This performs the same task as sb-cluster-state-reset ++ except for the northbound database client. ++

            ++
            + +

            + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 5f0abeee1..fc05accde 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -56,6 +56,7 @@ static unixctl_cb_func ovn_northd_pause; + static unixctl_cb_func ovn_northd_resume; + static unixctl_cb_func ovn_northd_is_paused; + static unixctl_cb_func ovn_northd_status; ++static unixctl_cb_func cluster_state_reset_cmd; + + struct northd_context { + struct ovsdb_idl *ovnnb_idl; +@@ -12393,6 +12394,16 @@ main(int argc, char *argv[]) + &state); + unixctl_command_register("status", "", 0, 0, ovn_northd_status, &state); + ++ bool reset_ovnsb_idl_min_index = false; ++ unixctl_command_register("sb-cluster-state-reset", "", 0, 0, ++ cluster_state_reset_cmd, ++ &reset_ovnsb_idl_min_index); ++ ++ bool reset_ovnnb_idl_min_index = false; ++ unixctl_command_register("nb-cluster-state-reset", "", 0, 0, ++ cluster_state_reset_cmd, ++ &reset_ovnnb_idl_min_index); ++ + daemonize_complete(); + + /* We want to detect (almost) all changes to the ovn-nb db. */ +@@ -12684,6 +12695,18 @@ main(int argc, char *argv[]) + ovsdb_idl_set_probe_interval(ovnnb_idl_loop.idl, northd_probe_interval); + ovsdb_idl_set_probe_interval(ovnsb_idl_loop.idl, northd_probe_interval); + ++ if (reset_ovnsb_idl_min_index) { ++ VLOG_INFO("Resetting southbound database cluster state"); ++ ovsdb_idl_reset_min_index(ovnsb_idl_loop.idl); ++ reset_ovnsb_idl_min_index = false; ++ } ++ ++ if (reset_ovnnb_idl_min_index) { ++ VLOG_INFO("Resetting northbound database cluster state"); ++ ovsdb_idl_reset_min_index(ovnnb_idl_loop.idl); ++ reset_ovnnb_idl_min_index = false; ++ } ++ + poll_block(); + if (should_service_stop()) { + exiting = true; +@@ -12762,3 +12785,14 @@ ovn_northd_status(struct unixctl_conn *conn, int argc OVS_UNUSED, + unixctl_command_reply(conn, ds_cstr(&s)); + ds_destroy(&s); + } ++ ++static void ++cluster_state_reset_cmd(struct unixctl_conn *conn, int argc OVS_UNUSED, ++ const char *argv[] OVS_UNUSED, void *idl_reset_) ++{ ++ bool *idl_reset = idl_reset_; ++ ++ *idl_reset = true; ++ poll_immediate_wake(); ++ unixctl_command_reply(conn, NULL); ++} +-- +2.26.2 + diff --git a/SOURCES/0021-Fix-compilation-error-when-configured-with-enable-sp.patch b/SOURCES/0021-Fix-compilation-error-when-configured-with-enable-sp.patch new file mode 100644 index 0000000..17b4266 --- /dev/null +++ b/SOURCES/0021-Fix-compilation-error-when-configured-with-enable-sp.patch @@ -0,0 +1,49 @@ +From a426e7292fc6a8c9d37e60abd0ff58eac983b18e Mon Sep 17 00:00:00 2001 +From: Numan Siddique +Date: Thu, 11 Jun 2020 12:47:08 +0530 +Subject: [PATCH 21/22] Fix compilation error when configured with + --enable-sparse. + +The below error is seen. + +../controller/ovn-controller.c:2305:70: error: Using plain integer as NULL pointer +make[1]: *** [Makefile:2000: controller/ovn-controller.o] Error 1 +make[1]: *** Waiting for unfinished jobs.... + +Fixes: 512b884dea3f("Add northd and ovn-controller cluster status reset commands.") +CC: Mark Michelson +Acked-by: Dumitru Ceara +Signed-off-by: Numan Siddique + +(cherry-picked from upstream ovn master commit 10deb869fff9f0b6736523cf3bdf0d856035895b) + +Change-Id: Iaff476538e85a9d68a1269cd55963edb5275dbe3 +--- + controller/ovn-controller.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c +index fe6048153..67b3cd989 100644 +--- a/controller/ovn-controller.c ++++ b/controller/ovn-controller.c +@@ -476,7 +476,7 @@ update_sb_db(struct ovsdb_idl *ovs_idl, struct ovsdb_idl *ovnsb_idl, + if (monitor_all_p) { + *monitor_all_p = monitor_all; + } +- if (*reset_ovnsb_idl_min_index) { ++ if (reset_ovnsb_idl_min_index && *reset_ovnsb_idl_min_index) { + VLOG_INFO("Resetting southbound database cluster state"); + engine_set_force_recompute(true); + ovsdb_idl_reset_min_index(ovnsb_idl); +@@ -2571,7 +2571,7 @@ main(int argc, char *argv[]) + if (!restart) { + bool done = !ovsdb_idl_has_ever_connected(ovnsb_idl_loop.idl); + while (!done) { +- update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl, NULL, false); ++ update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl, NULL, NULL); + update_ssl_config(ovsrec_ssl_table_get(ovs_idl_loop.idl)); + + struct ovsdb_idl_txn *ovs_idl_txn +-- +2.26.2 + diff --git a/SOURCES/0022-Avoid-nb_cfg-update-notification-flooding.patch b/SOURCES/0022-Avoid-nb_cfg-update-notification-flooding.patch new file mode 100644 index 0000000..184e0b3 --- /dev/null +++ b/SOURCES/0022-Avoid-nb_cfg-update-notification-flooding.patch @@ -0,0 +1,565 @@ +From ddbdb15a4b6050d9c667ca2bc546a118e208a342 Mon Sep 17 00:00:00 2001 +From: Han Zhou +Date: Thu, 30 Jul 2020 23:18:58 -0700 +Subject: [PATCH 22/22] Avoid nb_cfg update notification flooding + +nb_cfg as a mechanism to "ping" OVN control plane is very useful +in many ways. However, the current implementation will trigger +update notifications flooding in the whole control plane. Each +HV updates to SB the nb_cfg number and all these updates are +notified to all the other HVs, which is O(n^2). Although updates +are batched in fewers notifications than n^2, it still generates +significant load on SB DB and ovn-controllers. + +To solve this problem and make the mechanism more useful in large +scale producation deployment, this patch separates the per HV +*private* data (write only by the owning chassis and not +interesting to any other HVs) from the Chassis table to a separate +table, so that each HV can conditionally monitor and get updates +only for its own record. + +Test result shows great improvement: +In a test environment with 1200 sandbox HVs, and 12K ports created +on 80 lswitches and 1 lrouter, do the sync test when the system +is idle, with command: + + time ovn-nbctl --wait=hv sync + +Original result: +real 0m13.724s +user 0m0.295s +sys 0m0.012s + +With this patch: +real 0m3.255s +user 0m0.248s +sys 0m0.020s + +Also, regarding backwards compatibility note that the nb_cfg from the +Chassis table is no longer updated. If any system is relying on this +mechanism they should start using the nb_cfg from the Chassis_Private +table from now on. + +Change-Id: I9be2449f3317ff6b91d9afc8f53a9caa8e14c062 +Co-authored-by: Lucas Alvares Gomes +Signed-off-by: Lucas Alvares Gomes +Signed-off-by: Han Zhou +Acked-by: Dumitru Ceara + +(cherry-picked from upstream master commit 4adc10f58127e45b5883f2e7cb1c702720b95043) +--- + controller/chassis.c | 30 ++++++++++++++++++++---- + controller/chassis.h | 8 +++++-- + controller/ovn-controller.c | 42 ++++++++++++++++++++++++++++----- + lib/chassis-index.c | 26 +++++++++++++++++++++ + lib/chassis-index.h | 6 +++++ + northd/ovn-northd.c | 46 +++++++++++++++++++++++++++++++------ + ovn-sb.ovsschema | 17 ++++++++++++-- + ovn-sb.xml | 42 +++++++++++++++++++++++++++++---- + tests/ovn-controller.at | 26 +++++++++++++++++++++ + 9 files changed, 218 insertions(+), 25 deletions(-) + +diff --git a/controller/chassis.c b/controller/chassis.c +index bdf3fb950..6ac591e02 100644 +--- a/controller/chassis.c ++++ b/controller/chassis.c +@@ -621,14 +621,18 @@ chassis_update(const struct sbrec_chassis *chassis_rec, + const struct sbrec_chassis * + chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn, + struct ovsdb_idl_index *sbrec_chassis_by_name, ++ struct ovsdb_idl_index *sbrec_chassis_private_by_name, + const struct ovsrec_open_vswitch_table *ovs_table, + const struct sbrec_chassis_table *chassis_table, + const char *chassis_id, + const struct ovsrec_bridge *br_int, +- const struct sset *transport_zones) ++ const struct sset *transport_zones, ++ const struct sbrec_chassis_private **chassis_private) + { + struct ovs_chassis_cfg ovs_cfg; + ++ *chassis_private = NULL; ++ + /* Get the chassis config from the ovs table. */ + ovs_chassis_cfg_init(&ovs_cfg); + if (!chassis_parse_ovs_config(ovs_table, br_int, &ovs_cfg)) { +@@ -655,6 +659,18 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn, + !existed ? "registering" : "updating", + chassis_id); + } ++ ++ const struct sbrec_chassis_private *chassis_private_rec = ++ chassis_private_lookup_by_name(sbrec_chassis_private_by_name, ++ chassis_id); ++ if (!chassis_private_rec && ovnsb_idl_txn) { ++ chassis_private_rec = sbrec_chassis_private_insert(ovnsb_idl_txn); ++ sbrec_chassis_private_set_name(chassis_private_rec, ++ chassis_id); ++ sbrec_chassis_private_set_chassis(chassis_private_rec, ++ chassis_rec); ++ } ++ *chassis_private = chassis_private_rec; + } + + ovs_chassis_cfg_destroy(&ovs_cfg); +@@ -710,16 +726,22 @@ chassis_get_mac(const struct sbrec_chassis *chassis_rec, + * required. */ + bool + chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn, +- const struct sbrec_chassis *chassis_rec) ++ const struct sbrec_chassis *chassis_rec, ++ const struct sbrec_chassis_private *chassis_private_rec) + { +- if (!chassis_rec) { ++ if (!chassis_rec && !chassis_private_rec) { + return true; + } + if (ovnsb_idl_txn) { + ovsdb_idl_txn_add_comment(ovnsb_idl_txn, + "ovn-controller: unregistering chassis '%s'", + chassis_rec->name); +- sbrec_chassis_delete(chassis_rec); ++ if (chassis_rec) { ++ sbrec_chassis_delete(chassis_rec); ++ } ++ if (chassis_private_rec) { ++ sbrec_chassis_private_delete(chassis_private_rec); ++ } + } + return false; + } +diff --git a/controller/chassis.h b/controller/chassis.h +index 178d2957e..81055b403 100644 +--- a/controller/chassis.h ++++ b/controller/chassis.h +@@ -17,6 +17,7 @@ + #define OVN_CHASSIS_H 1 + + #include ++#include "lib/ovn-sb-idl.h" + + struct ovsdb_idl; + struct ovsdb_idl_index; +@@ -33,12 +34,15 @@ void chassis_register_ovs_idl(struct ovsdb_idl *); + const struct sbrec_chassis *chassis_run( + struct ovsdb_idl_txn *ovnsb_idl_txn, + struct ovsdb_idl_index *sbrec_chassis_by_name, ++ struct ovsdb_idl_index *sbrec_chassis_private_by_name, + const struct ovsrec_open_vswitch_table *, + const struct sbrec_chassis_table *, + const char *chassis_id, const struct ovsrec_bridge *br_int, +- const struct sset *transport_zones); ++ const struct sset *transport_zones, ++ const struct sbrec_chassis_private **chassis_private); + bool chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn, +- const struct sbrec_chassis *); ++ const struct sbrec_chassis *, ++ const struct sbrec_chassis_private *); + bool chassis_get_mac(const struct sbrec_chassis *chassis, + const char *bridge_mapping, + struct eth_addr *chassis_mac); +diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c +index 67b3cd989..933acf676 100644 +--- a/controller/ovn-controller.c ++++ b/controller/ovn-controller.c +@@ -155,6 +155,7 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, + struct ovsdb_idl_condition ce = OVSDB_IDL_CONDITION_INIT(&ce); + struct ovsdb_idl_condition ip_mcast = OVSDB_IDL_CONDITION_INIT(&ip_mcast); + struct ovsdb_idl_condition igmp = OVSDB_IDL_CONDITION_INIT(&igmp); ++ struct ovsdb_idl_condition chprv = OVSDB_IDL_CONDITION_INIT(&chprv); + + if (monitor_all) { + ovsdb_idl_condition_add_clause_true(&pb); +@@ -165,6 +166,7 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, + ovsdb_idl_condition_add_clause_true(&ce); + ovsdb_idl_condition_add_clause_true(&ip_mcast); + ovsdb_idl_condition_add_clause_true(&igmp); ++ ovsdb_idl_condition_add_clause_true(&chprv); + goto out; + } + +@@ -196,7 +198,16 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, + &chassis->header_.uuid); + sbrec_igmp_group_add_clause_chassis(&igmp, OVSDB_F_EQ, + &chassis->header_.uuid); ++ ++ /* Monitors Chassis_Private record for current chassis only */ ++ sbrec_chassis_private_add_clause_name(&chprv, OVSDB_F_EQ, ++ chassis->name); ++ } else { ++ /* During initialization, we monitor all records in Chassis_Private so ++ * that we don't try to recreate existing ones. */ ++ ovsdb_idl_condition_add_clause_true(&chprv); + } ++ + if (local_ifaces) { + const char *name; + SSET_FOR_EACH (name, local_ifaces) { +@@ -229,6 +240,7 @@ out: + sbrec_controller_event_set_condition(ovnsb_idl, &ce); + sbrec_ip_multicast_set_condition(ovnsb_idl, &ip_mcast); + sbrec_igmp_group_set_condition(ovnsb_idl, &igmp); ++ sbrec_chassis_private_set_condition(ovnsb_idl, &chprv); + ovsdb_idl_condition_destroy(&pb); + ovsdb_idl_condition_destroy(&lf); + ovsdb_idl_condition_destroy(&mb); +@@ -237,6 +249,7 @@ out: + ovsdb_idl_condition_destroy(&ce); + ovsdb_idl_condition_destroy(&ip_mcast); + ovsdb_idl_condition_destroy(&igmp); ++ ovsdb_idl_condition_destroy(&chprv); + } + + static const char * +@@ -2090,6 +2103,8 @@ main(int argc, char *argv[]) + + struct ovsdb_idl_index *sbrec_chassis_by_name + = chassis_index_create(ovnsb_idl_loop.idl); ++ struct ovsdb_idl_index *sbrec_chassis_private_by_name ++ = chassis_private_index_create(ovnsb_idl_loop.idl); + struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath + = mcast_group_index_create(ovnsb_idl_loop.idl); + struct ovsdb_idl_index *sbrec_logical_flow_by_logical_datapath +@@ -2118,7 +2133,8 @@ main(int argc, char *argv[]) + = igmp_group_index_create(ovnsb_idl_loop.idl); + + ovsdb_idl_track_add_all(ovnsb_idl_loop.idl); +- ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg); ++ ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, ++ &sbrec_chassis_private_col_nb_cfg); + + /* Omit the external_ids column of all the tables except for - + * - DNS. pinctrl.c uses the external_ids column of DNS, +@@ -2155,6 +2171,10 @@ main(int argc, char *argv[]) + * other_config column so we no longer need to monitor it */ + ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, &sbrec_chassis_col_external_ids); + ++ /* Do not monitor Chassis_Private external_ids */ ++ ovsdb_idl_omit(ovnsb_idl_loop.idl, ++ &sbrec_chassis_private_col_external_ids); ++ + update_sb_monitors(ovnsb_idl_loop.idl, NULL, NULL, NULL, false); + + stopwatch_create(CONTROLLER_LOOP_STOPWATCH_NAME, SW_MS); +@@ -2361,10 +2381,13 @@ main(int argc, char *argv[]) + process_br_int(ovs_idl_txn, bridge_table, ovs_table); + const char *chassis_id = get_ovs_chassis_id(ovs_table); + const struct sbrec_chassis *chassis = NULL; ++ const struct sbrec_chassis_private *chassis_private = NULL; + if (chassis_id) { + chassis = chassis_run(ovnsb_idl_txn, sbrec_chassis_by_name, ++ sbrec_chassis_private_by_name, + ovs_table, chassis_table, chassis_id, +- br_int, &transport_zones); ++ br_int, &transport_zones, ++ &chassis_private); + } + + if (br_int) { +@@ -2489,10 +2512,10 @@ main(int argc, char *argv[]) + engine_set_force_recompute(false); + } + +- if (ovnsb_idl_txn && chassis) { ++ if (ovnsb_idl_txn && chassis_private) { + int64_t cur_cfg = ofctrl_get_cur_cfg(); +- if (cur_cfg && cur_cfg != chassis->nb_cfg) { +- sbrec_chassis_set_nb_cfg(chassis, cur_cfg); ++ if (cur_cfg && cur_cfg != chassis_private->nb_cfg) { ++ sbrec_chassis_private_set_nb_cfg(chassis_private, cur_cfg); + } + } + +@@ -2595,10 +2618,17 @@ main(int argc, char *argv[]) + ? chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id) + : NULL); + ++ const struct sbrec_chassis_private *chassis_private ++ = (chassis_id ++ ? chassis_private_lookup_by_name( ++ sbrec_chassis_private_by_name, chassis_id) ++ : NULL); ++ + /* Run all of the cleanup functions, even if one of them returns + * false. We're done if all of them return true. */ + done = binding_cleanup(ovnsb_idl_txn, port_binding_table, chassis); +- done = chassis_cleanup(ovnsb_idl_txn, chassis) && done; ++ done = chassis_cleanup(ovnsb_idl_txn, ++ chassis, chassis_private) && done; + done = encaps_cleanup(ovs_idl_txn, br_int) && done; + done = igmp_group_cleanup(ovnsb_idl_txn, sbrec_igmp_group) && done; + if (done) { +diff --git a/lib/chassis-index.c b/lib/chassis-index.c +index 39066f4cc..13120fe3e 100644 +--- a/lib/chassis-index.c ++++ b/lib/chassis-index.c +@@ -40,6 +40,32 @@ chassis_lookup_by_name(struct ovsdb_idl_index *sbrec_chassis_by_name, + return retval; + } + ++struct ovsdb_idl_index * ++chassis_private_index_create(struct ovsdb_idl *idl) ++{ ++ return ovsdb_idl_index_create1(idl, ++ &sbrec_chassis_private_col_name); ++} ++ ++/* Finds and returns the chassis with the given 'name', or NULL if no such ++ * chassis exists. */ ++const struct sbrec_chassis_private * ++chassis_private_lookup_by_name( ++ struct ovsdb_idl_index *sbrec_chassis_private_by_name, ++ const char *name) ++{ ++ struct sbrec_chassis_private *target = ++ sbrec_chassis_private_index_init_row(sbrec_chassis_private_by_name); ++ sbrec_chassis_private_index_set_name(target, name); ++ ++ struct sbrec_chassis_private *retval = sbrec_chassis_private_index_find( ++ sbrec_chassis_private_by_name, target); ++ ++ sbrec_chassis_private_index_destroy_row(target); ++ ++ return retval; ++} ++ + struct ovsdb_idl_index * + ha_chassis_group_index_create(struct ovsdb_idl *idl) + { +diff --git a/lib/chassis-index.h b/lib/chassis-index.h +index 302e5f0fd..b9b331f34 100644 +--- a/lib/chassis-index.h ++++ b/lib/chassis-index.h +@@ -23,6 +23,12 @@ struct ovsdb_idl_index *chassis_index_create(struct ovsdb_idl *); + const struct sbrec_chassis *chassis_lookup_by_name( + struct ovsdb_idl_index *sbrec_chassis_by_name, const char *name); + ++struct ovsdb_idl_index *chassis_private_index_create(struct ovsdb_idl *); ++ ++const struct sbrec_chassis_private * ++chassis_private_lookup_by_name( ++ struct ovsdb_idl_index *sbrec_chassis_private_by_name, const char *name); ++ + struct ovsdb_idl_index *ha_chassis_group_index_create(struct ovsdb_idl *idl); + const struct sbrec_ha_chassis_group *ha_chassis_group_lookup_by_name( + struct ovsdb_idl_index *sbrec_ha_chassis_grp_by_name, const char *name); +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index fc05accde..c83f9d5c2 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -12024,6 +12024,11 @@ static const char *rbac_chassis_update[] = + {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches", + "other_config"}; + ++static const char *rbac_chassis_private_auth[] = ++ {"name"}; ++static const char *rbac_chassis_private_update[] = ++ {"nb_cfg", "chassis"}; ++ + static const char *rbac_encap_auth[] = + {"chassis_name"}; + static const char *rbac_encap_update[] = +@@ -12061,6 +12066,14 @@ static struct rbac_perm_cfg { + .update = rbac_chassis_update, + .n_update = ARRAY_SIZE(rbac_chassis_update), + .row = NULL ++ },{ ++ .table = "Chassis_Private", ++ .auth = rbac_chassis_private_auth, ++ .n_auth = ARRAY_SIZE(rbac_chassis_private_auth), ++ .insdel = true, ++ .update = rbac_chassis_private_update, ++ .n_update = ARRAY_SIZE(rbac_chassis_private_update), ++ .row = NULL + },{ + .table = "Encap", + .auth = rbac_encap_auth, +@@ -12230,12 +12243,23 @@ update_northbound_cfg(struct northd_context *ctx, + /* Update northbound hv_cfg if appropriate. */ + if (nbg) { + /* Find minimum nb_cfg among all chassis. */ +- const struct sbrec_chassis *chassis; ++ const struct sbrec_chassis_private *chassis_priv; + int64_t hv_cfg = nbg->nb_cfg; +- SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) { +- if (!smap_get_bool(&chassis->other_config, "is-remote", false) && +- chassis->nb_cfg < hv_cfg) { +- hv_cfg = chassis->nb_cfg; ++ SBREC_CHASSIS_PRIVATE_FOR_EACH (chassis_priv, ctx->ovnsb_idl) { ++ const struct sbrec_chassis *chassis = chassis_priv->chassis; ++ if (chassis) { ++ if (smap_get_bool(&chassis->other_config, ++ "is-remote", false)) { ++ /* Skip remote chassises. */ ++ continue; ++ } ++ } else { ++ VLOG_WARN("Chassis not exist for Chassis_Private record, " ++ "name: %s", chassis_priv->name); ++ } ++ ++ if (chassis_priv->nb_cfg < hv_cfg) { ++ hv_cfg = chassis_priv->nb_cfg; + } + } + +@@ -12248,7 +12272,8 @@ update_northbound_cfg(struct northd_context *ctx, + + /* Handle a fairly small set of changes in the southbound database. */ + static void +-ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop, ++ovnsb_db_run(struct northd_context *ctx, ++ struct ovsdb_idl_loop *sb_loop, + struct hmap *ports) + { + if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) { +@@ -12529,10 +12554,17 @@ main(int argc, char *argv[]) + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_band_col_burst_size); + + ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis); +- ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg); + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_name); + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_other_config); + ++ ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis_private); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, ++ &sbrec_chassis_private_col_name); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, ++ &sbrec_chassis_private_col_chassis); ++ ovsdb_idl_add_column(ovnsb_idl_loop.idl, ++ &sbrec_chassis_private_col_nb_cfg); ++ + ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_ha_chassis); + add_column_noalert(ovnsb_idl_loop.idl, + &sbrec_ha_chassis_col_chassis); +diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema +index 99c5de822..3af76540a 100644 +--- a/ovn-sb.ovsschema ++++ b/ovn-sb.ovsschema +@@ -1,7 +1,7 @@ + { + "name": "OVN_Southbound", +- "version": "2.8.2", +- "cksum": "464326363 21916", ++ "version": "2.9.0", ++ "cksum": "223619766 22548", + "tables": { + "SB_Global": { + "columns": { +@@ -46,6 +46,19 @@ + "max": "unlimited"}}}, + "isRoot": true, + "indexes": [["name"]]}, ++ "Chassis_Private": { ++ "columns": { ++ "name": {"type": "string"}, ++ "chassis": {"type": {"key": {"type": "uuid", ++ "refTable": "Chassis", ++ "refType": "weak"}, ++ "min": 0, "max": 1}}, ++ "nb_cfg": {"type": {"key": "integer"}}, ++ "external_ids": { ++ "type": {"key": "string", "value": "string", ++ "min": 0, "max": "unlimited"}}}, ++ "isRoot": true, ++ "indexes": [["name"]]}, + "Encap": { + "columns": { + "type": {"type": {"key": { +diff --git a/ovn-sb.xml b/ovn-sb.xml +index a74d9c3ea..59b21711b 100644 +--- a/ovn-sb.xml ++++ b/ovn-sb.xml +@@ -256,10 +256,8 @@ + + + +- Sequence number for the configuration. When ovn-controller +- updates the configuration of a chassis from the contents of the +- southbound database, it copies +- from the table into this column. ++ Deprecated. This column is replaced by the column of the table. + + + +@@ -366,6 +364,42 @@ + +
            + ++ ++

            ++ Each row in this table maintains per chassis private data that are ++ accessed only by the owning chassis (write only) and ovn-northd, not by ++ any other chassis. These data are stored in this separate table instead ++ of the table for performance considerations: ++ the rows in this table can be conditionally monitored by chassises so ++ that each chassis only get update notifications for its own row, to avoid ++ unnecessary chassis private data update flooding in a large scale ++ deployment. ++

            ++ ++ ++ The name of the chassis that owns these chassis-private data. ++ ++ ++ ++ The reference to table for the chassis that owns ++ these chassis-private data. ++ ++ ++ ++ Sequence number for the configuration. When ovn-controller ++ updates the configuration of a chassis from the contents of the ++ southbound database, it copies ++ from the table into this column. ++ ++ ++ ++ The overall purpose of these columns is described under Common ++ Columns at the beginning of this document. ++ ++ ++ ++
            ++ + +

            + The column in the - 2.13.0-39 +* Tue Sep 16 2020 Numan Siddique - 20.06.2-11 +- Backport "ovn-controller: Persist the conjunction ids allocated for conjuctive matches." (#1858878) +- Backport "I-P engine: Provide the option to store client data in engine ctx." (#1858878) + +* Mon Sep 15 2020 Numan Siddique - 20.06.2-10 +- Backport "ovn-northd: Fix multiple ARP replies for SNAT entries configured on a distributed router." (#1878451) + +* Tue Sep 8 2020 Lorenzo Bianconi - 20.06.2-9 +- Backport "ovn-ctl: introduce ovsdb-{n, s}b-wrapper options" (#1831558) + +* Tue Sep 8 2020 Lorenzo Bianconi - 20.06.2-8 +- Backport "Introduce DHCPDECLINE msg support to OVN DHCP server" (#1857563) + +* Mon Sep 7 2020 Lorenzo Bianconi - 20.06.2-7 +- Backport "northd: fix empty_lb_backends controller_event for IPv6" (#1875337) + +* Fri Sep 4 2020 Dumitru Ceara - 20.06.2-6 +- Backport "chassis: Fix the way encaps are updated for a chassis record." (#1873032) +- Backport "chassis: Fix chassis_private record updates when the system-id changes." (#1873032) + +* Fri Sep 4 2020 Dumitru Ceara - 20.06.2-5 +- Backport "lex: Allow unmasked bits in value/mask tokens." (#1812820) +- Backport "ovn-nbctl: Deal with nb_cfg overflows." (#1873455) +- Backport "pinctrl: Fix incorrect warning message for multicast querier." (#1875727) + +* Wed Sep 2 2020 Dumitru Ceara - 20.06.2-4 +- Backport "ovn-northd: Rate limit missing chassis log." (#1874745) + +* Mon Aug 31 2020 Dumitru Ceara - 20.06.2-3 +- Backport "ovn-controller: Fix incremental processing of Port_Binding deletes." (#1871961) + +* Wed Aug 26 2020 Numan Siddique - 20.06.2-2 +- Backport "Fix ovn-controller crash when a lport of type 'virtual' is deleted." (#1872681) + +* Sat Aug 22 2020 Numan Siddique - 20.06.2-1 +- Sync the OVN sources with the upstream v20.06.2 release and reorder + the other patches. + +* Fri Aug 21 2020 Numan Siddique - 20.06.1-18 +- Backport "Avoid nb_cfg update notification flooding" (#1871054) + +* Fri Aug 21 2020 Numan Siddique - 20.06.1-17 +- Backport "Fix the data type for DHCP option tftp_server (66)" (#1871056) +- Backport "Add support for DHCP domain search option (119)" (#1871056) + +* Thu Aug 20 2020 Numan Siddique - 20.06.1-16 +- Backport "ovsdb-idl: Add function to reset min_index." (#1829109) +- Backport "Add northd and ovn-controller cluster status reset commands." (#1829109) +- Backport "Fix compilation error when configured with --enable-sparse." (#1829109) + +* Wed Aug 19 2020 Numan Siddique - 20.06.1-15 +- Backport "northd: Fix the routing for external logical ports of bridged logical switches." (#1829762) + +* Tue Aug 11 2020 Numan Siddique - 20.06.1-14 +- Backport "ovn-northd: Add ARP responder flows for SNAT entries." (#1861294) + +* Mon Aug 10 2020 Numan Siddique - 20.06.1-13 +- Backport "ovn-northd: Don't send the pkt to conntrack if it is to be routed in egress stage." (#1836804) +- Backport "ovn-northd: Don't send the pkt to conntrack for NAT if its not destined for LB VIP." (#1836804) + +* Mon Aug 10 2020 Numan Siddique - 20.06.1-12 +- Backport "Allow force_snat options to work for dual-stack routers." (#1823003) + +* Mon Aug 10 2020 Numan Siddique - 20.06.1-11 +- Synced with the upstream branch-20.06 commit cd31e32f028fa7a06a7faaeeace307c1086e25b4 +- This includes the flow explosion patches. (#1847537) + +* Fri Aug 7 2020 Dumitru Ceara - 20.06.1-10 +- Backport "chassis: Propagate ovn-monitor-all external-id to Chassis:other_config." (#1866820) +- Backport "pinctrl: Avoid flushing of non-local IGMP_Groups." (#1866820) + +* Thu Aug 6 2020 Mark Michelson - 20.06.1-9 +- Backport "Allow bare ct_commits when no nested actions are required." + +* Mon Aug 3 2020 Dumitru Ceara - 20.06.1-8 +- Backport "expr.c: Fix argument type of expr_write_scope()." (#1849683) +- Backport "ovn-detrace: Add support for multiple remotes." (#1853716) +- Backport "ovn-detrace: Support SSL remotes." (#1853716) + +* Thu Jul 30 2020 Mark Michelson - 20.06.1-7 +- Backport ECMP symmetric reply patch series (#1849683) + +* Wed Jul 29 2020 Numan Siddique - 20.06.1-6 +- Backport "ovn-controller: Release lport if the ofport of the VIF is -1.". (#1861298) + +* Tue Jul 28 2020 Numan Siddique - 20.06.1-5 +- Backport "ovn-controller: Fix the missing flows when logical router port is added after its peer." (#1860053) +- Backport "ovn-controller: Clear flows not associated with db rows in physical flow change handler." (#1861042) + +* Fri Jul 17 2020 Numan Siddique - 20.06.1-4 +- Backport "ovn-controller: Fix the missing ct zone entries for container ports." (#1858191) + +* Thu Jul 16 2020 Numan Siddique - 20.06.1-3 +- Backport "ovn-controller: Fix the missing flows with monitor-all set to True" (#1857537) + +* Thu Jul 9 2020 Lorenzo Bianconi - 20.06.1-2 +- Backport "Introduce icmp6_error action" (#1846300) +- Backport "Introduce icmp6.frag_mtu action" (#1846300) +- Backport "northd: introduce icmp6_error logical flows in router pipeline" (#1846300) + +* Thu Jul 09 2020 Numan Siddique - 20.06.1-1 +- Backport "Support packet metadata marking for logical router policies." (#1828933) +- Backport "ovn-nbctl: Enhance lr-policy-add to set the options."(#1828933) +- Backport "pinctrl: Support DHCPRELEASE and DHCPINFORM in native OVN dhcp responder." (#1801258) + +* Thu Jul 09 2020 Numan Siddique - 20.06.1-0 +- Synced the upstream v20.06.1 source. +- This resolves the following BZs: +- 1846189 +- 1850511 +- 1818128 +- 1805630 + +* Wed Jul 08 2020 Numan Siddique - 2.13.0-39 - Backport "ovn-northd: Fix the missing lflow issue in LS_OUT_PRE_LB." (#1849162) -* Wed Jul 08 2020 Numan Siddique - 2.13.0-38 +* Wed Jul 08 2020 Numan Siddique - 2.13.0-38 - Backport "Split SB Port_Group per datapath." (#1818128) * Fri Jun 19 2020 Dumitru Ceara - 2.13.0-37