diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5651078 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +SOURCES/*.tar.gz +SRPMS diff --git a/SOURCES/0001-northd-add-reject-action-for-lb-with-no-backends.patch b/SOURCES/0001-northd-add-reject-action-for-lb-with-no-backends.patch new file mode 100644 index 0000000..9991763 --- /dev/null +++ b/SOURCES/0001-northd-add-reject-action-for-lb-with-no-backends.patch @@ -0,0 +1,392 @@ +From 8a9f48c2f146476f1e46da4144b77e38d712673c Mon Sep 17 00:00:00 2001 +From: Lorenzo Bianconi +Date: Wed, 9 Dec 2020 12:25:31 +0100 +Subject: [PATCH 1/7] northd: add reject action for lb with no backends + +Introduce the capability to create a load balancer with no backends and +with --reject option in order to send a TCP reset segment (for tcp) or +an ICMP port unreachable packet (for all other kind of traffic) whenever +an incoming packet is received for this load-balancer. + +Tested-by: Antonio Ojea +Signed-off-by: Lorenzo Bianconi +Acked-by: Mark Michelson +Signed-off-by: Numan Siddique + +(cherry-picked from master commit ebbcd8e8cc5000d50691e72edfde7ede4a906ade) + +Change-Id: I5676901df564ecf978455319cfcddd24c2efdae4 +--- + lib/lb.c | 2 ++ + lib/lb.h | 1 + + northd/ovn-northd.8.xml | 19 +++++++++++++++++ + northd/ovn-northd.c | 44 ++++++++++++++++++++++++++------------- + ovn-nb.ovsschema | 9 ++++++-- + ovn-nb.xml | 10 +++++++++ + tests/ovn-northd.at | 25 ++++++++++++++++++++++ + tests/system-ovn.at | 28 ++++++++++++++++++++++++- + utilities/ovn-nbctl.8.xml | 11 +++++++++- + utilities/ovn-nbctl.c | 7 ++++++- + 10 files changed, 136 insertions(+), 20 deletions(-) + +diff --git a/lib/lb.c b/lib/lb.c +index a90042e58..2517c02ef 100644 +--- a/lib/lb.c ++++ b/lib/lb.c +@@ -189,6 +189,8 @@ ovn_northd_lb_create(const struct nbrec_load_balancer *nbrec_lb, + struct ovn_lb_vip *lb_vip = &lb->vips[n_vips]; + struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[n_vips]; + ++ lb_vip->empty_backend_rej = smap_get_bool(&nbrec_lb->options, ++ "reject", false); + if (!ovn_lb_vip_init(lb_vip, node->key, node->value)) { + continue; + } +diff --git a/lib/lb.h b/lib/lb.h +index 6644ad0d8..42c580bd1 100644 +--- a/lib/lb.h ++++ b/lib/lb.h +@@ -49,6 +49,7 @@ struct ovn_lb_vip { + + struct ovn_lb_backend *backends; + size_t n_backends; ++ bool empty_backend_rej; + }; + + struct ovn_lb_backend { +diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml +index a9a3a9f4f..d86f36ea6 100644 +--- a/northd/ovn-northd.8.xml ++++ b/northd/ovn-northd.8.xml +@@ -700,6 +700,16 @@ + ct_lb(args), where args contains comma + separated IP addresses of the same address family as VIP. + ++ ++
  • ++ If the load balancer is created with --reject option and ++ it has no active backends, a TCP reset segment (for tcp) or an ICMP ++ port unreachable packet (for all other kind of traffic) will be sent ++ whenever an incoming packet is received for this load-balancer. ++ Please note using --reject option will disable ++ empty_lb SB controller event for this load balancer. ++
  • ++ +
  • + A priority-100 flow commits packets to connection tracker using + ct_commit; next; action based on a hint provided by +@@ -2592,6 +2602,15 @@ icmp6 { + packets, the above action will be replaced by + flags.force_snat_for_lb = 1; ct_dnat;. +
  • ++ ++
  • ++ If the load balancer is created with --reject option and ++ it has no active backends, a TCP reset segment (for tcp) or an ICMP ++ port unreachable packet (for all other kind of traffic) will be sent ++ whenever an incoming packet is received for this load-balancer. ++ Please note using --reject option will disable ++ empty_lb SB controller event for this load balancer. ++
  • + + +

    Ingress Table 6: DNAT on Gateway Routers

    +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 5a3227568..478f1a339 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -3436,12 +3436,12 @@ ovn_lb_svc_create(struct northd_context *ctx, struct ovn_northd_lb *lb, + } + + static +-void build_lb_vip_ct_lb_actions(struct ovn_lb_vip *lb_vip, +- struct ovn_northd_lb_vip *lb_vip_nb, +- struct ds *action, +- char *selection_fields) ++void build_lb_vip_actions(struct ovn_lb_vip *lb_vip, ++ struct ovn_northd_lb_vip *lb_vip_nb, ++ struct ds *action, char *selection_fields, ++ bool ls_dp) + { +- bool skip_hash_fields = false; ++ bool skip_hash_fields = false, reject = false; + + if (lb_vip_nb->lb_health_check) { + ds_put_cstr(action, "ct_lb(backends="); +@@ -3463,18 +3463,30 @@ void build_lb_vip_ct_lb_actions(struct ovn_lb_vip *lb_vip, + } + + if (!n_active_backends) { +- skip_hash_fields = true; +- ds_clear(action); +- ds_put_cstr(action, "drop;"); ++ if (!lb_vip->empty_backend_rej) { ++ ds_clear(action); ++ ds_put_cstr(action, "drop;"); ++ skip_hash_fields = true; ++ } else { ++ reject = true; ++ } + } else { + ds_chomp(action, ','); + ds_put_cstr(action, ");"); + } ++ } else if (lb_vip->empty_backend_rej && !lb_vip->n_backends) { ++ reject = true; + } else { + ds_put_format(action, "ct_lb(backends=%s);", lb_vip_nb->backend_ips); + } + +- if (!skip_hash_fields && selection_fields && selection_fields[0]) { ++ if (reject) { ++ int stage = ls_dp ? ovn_stage_get_table(S_SWITCH_OUT_QOS_MARK) ++ : ovn_stage_get_table(S_ROUTER_OUT_SNAT); ++ ds_clear(action); ++ ds_put_format(action, "reg0 = 0; reject { outport <-> inport; " ++ "next(pipeline=egress,table=%d);};", stage); ++ } else if (!skip_hash_fields && selection_fields && selection_fields[0]) { + ds_chomp(action, ';'); + ds_chomp(action, ')'); + ds_put_format(action, "; hash_fields=\"%s\");", selection_fields); +@@ -5084,7 +5096,8 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows, + struct nbrec_load_balancer *lb, + int pl, struct shash *meter_groups) + { +- if (!controller_event_en || lb_vip->n_backends) { ++ if (!controller_event_en || lb_vip->n_backends || ++ lb_vip->empty_backend_rej) { + return; + } + +@@ -5974,8 +5987,8 @@ build_lb_rules(struct ovn_datapath *od, struct hmap *lflows, + + /* New connections in Ingress table. */ + struct ds action = DS_EMPTY_INITIALIZER; +- build_lb_vip_ct_lb_actions(lb_vip, lb_vip_nb, &action, +- lb->selection_fields); ++ build_lb_vip_actions(lb_vip, lb_vip_nb, &action, ++ lb->selection_fields, true); + + struct ds match = DS_EMPTY_INITIALIZER; + ds_put_format(&match, "ct.new && %s.dst == %s", ip_match, +@@ -9685,8 +9698,8 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + struct ovn_lb_vip *lb_vip = &lb->vips[j]; + struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[j]; + ds_clear(&actions); +- build_lb_vip_ct_lb_actions(lb_vip, lb_vip_nb, &actions, +- lb->selection_fields); ++ build_lb_vip_actions(lb_vip, lb_vip_nb, &actions, ++ lb->selection_fields, false); + + if (!sset_contains(&all_ips, lb_vip->vip_str)) { + sset_add(&all_ips, lb_vip->vip_str); +@@ -9737,7 +9750,8 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + prio = 120; + } + +- if (od->l3redirect_port) { ++ if (od->l3redirect_port && ++ (lb_vip->n_backends || !lb_vip->empty_backend_rej)) { + ds_put_format(&match, " && is_chassis_resident(%s)", + od->l3redirect_port->json_key); + } +diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema +index 269e3a888..af77dd138 100644 +--- a/ovn-nb.ovsschema ++++ b/ovn-nb.ovsschema +@@ -1,7 +1,7 @@ + { + "name": "OVN_Northbound", +- "version": "5.28.0", +- "cksum": "610359755 26847", ++ "version": "5.29.0", ++ "cksum": "328602112 27064", + "tables": { + "NB_Global": { + "columns": { +@@ -188,6 +188,11 @@ + ["eth_src", "eth_dst", "ip_src", "ip_dst", + "tp_src", "tp_dst"]]}, + "min": 0, "max": "unlimited"}}, ++ "options": { ++ "type": {"key": "string", ++ "value": "string", ++ "min": 0, ++ "max": "unlimited"}}, + "external_ids": { + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}}, +diff --git a/ovn-nb.xml b/ovn-nb.xml +index c9ab25ceb..e7a8d6833 100644 +--- a/ovn-nb.xml ++++ b/ovn-nb.xml +@@ -1635,6 +1635,16 @@ + See External IDs at the beginning of this document. + + ++ ++ ++ If the load balancer is created with --reject option and ++ it has no active backends, a TCP reset segment (for tcp) or an ICMP ++ port unreachable packet (for all other kind of traffic) will be sent ++ whenever an incoming packet is received for this load-balancer. ++ Please note using --reject option will disable empty_lb ++ SB controller event for this load balancer. ++ ++ + + + +diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at +index 90ca0a4db..50a4cae76 100644 +--- a/tests/ovn-northd.at ++++ b/tests/ovn-northd.at +@@ -1233,6 +1233,31 @@ wait_row_count Service_Monitor 2 + ovn-nbctl --wait=sb lb-del lb2 + wait_row_count Service_Monitor 0 + ++check ovn-nbctl --reject lb-add lb3 10.0.0.10:80 10.0.0.3:80,20.0.0.3:80 ++check ovn-nbctl --wait=sb set load_balancer lb3 ip_port_mappings:10.0.0.3=sw0-p1:10.0.0.2 ++check ovn-nbctl --wait=sb set load_balancer lb3 ip_port_mappings:20.0.0.3=sw1-p1:20.0.0.2 ++wait_row_count Service_Monitor 0 ++ ++check ovn-nbctl --wait=sb ls-lb-add sw0 lb3 ++AT_CHECK([ovn-nbctl --wait=sb -- --id=@hc create \ ++Load_Balancer_Health_Check vip="10.0.0.10\:80" -- add Load_Balancer lb3 \ ++health_check @hc | uuidfilt], [0], [<0> ++]) ++wait_row_count Service_Monitor 2 ++ ++# Set the service monitor for sw0-p1 and sw1-p1 to online ++sm_sw0_p1=$(fetch_column Service_Monitor _uuid logical_port=sw0-p1) ++sm_sw1_p1=$(fetch_column Service_Monitor _uuid logical_port=sw1-p1) ++ ++ovn-sbctl set service_monitor $sm_sw0_p1 status=offline ++ovn-sbctl set service_monitor $sm_sw1_p1 status=offline ++ ++AT_CAPTURE_FILE([sbflows12]) ++OVS_WAIT_FOR_OUTPUT( ++ [ovn-sbctl dump-flows sw0 | tee sbflows12 | grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" | grep priority=120 | sed 's/table=..//'], [0], [dnl ++ (ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg0 = 0; reject { outport <-> inport; next(pipeline=egress,table=6);};) ++]) ++ + AT_CLEANUP + + AT_SETUP([ovn -- Load balancer VIP in NAT entries]) +diff --git a/tests/system-ovn.at b/tests/system-ovn.at +index d59f7c97e..1e73001ab 100644 +--- a/tests/system-ovn.at ++++ b/tests/system-ovn.at +@@ -1574,6 +1574,18 @@ OVS_WAIT_UNTIL([ + grep "selection_method=hash,fields(ip_src,ip_dst,sctp_src,sctp_dst)" -c) -eq 2 + ]) + ++ovn-nbctl --reject lb-add lb3 30.0.0.10:80 "" ++ovn-nbctl ls-lb-add foo lb3 ++# Filter reset segments ++NS_CHECK_EXEC([foo1], [tcpdump -c 1 -neei foo1 ip[[33:1]]=0x14 > rst.pcap &]) ++sleep 1 ++NS_CHECK_EXEC([foo1], [wget -q 30.0.0.10],[4]) ++ ++OVS_WAIT_UNTIL([ ++ n_reset=$(cat rst.pcap | wc -l) ++ test "${n_reset}" = "1" ++]) ++ + OVS_APP_EXIT_AND_WAIT([ovn-controller]) + + as ovn-sb +@@ -4151,7 +4163,7 @@ ovn-nbctl lsp-set-type sw1-lr0 router + ovn-nbctl lsp-set-addresses sw1-lr0 router + ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1 + +-ovn-nbctl lb-add lb1 10.0.0.10:80 10.0.0.3:80,20.0.0.3:80 ++ovn-nbctl --reject lb-add lb1 10.0.0.10:80 10.0.0.3:80,20.0.0.3:80 + + ovn-nbctl --wait=sb set load_balancer . ip_port_mappings:10.0.0.3=sw0-p1:10.0.0.2 + ovn-nbctl --wait=sb set load_balancer . ip_port_mappings:20.0.0.3=sw1-p1:20.0.0.2 +@@ -4266,6 +4278,20 @@ ovn-sbctl list service_monitor + OVS_WAIT_UNTIL([test 2 = `ovn-sbctl --bare --columns status find \ + service_monitor protocol=udp | sed '/^$/d' | grep offline | wc -l`]) + ++# Stop webserer in sw1-p1 ++pid_file=$(cat l7_pid_file) ++NS_CHECK_EXEC([sw1-p1], [kill $(cat $pid_file)]) ++ ++NS_CHECK_EXEC([sw0-p2], [tcpdump -c 1 -neei sw0-p2 ip[[33:1]]=0x14 > rst.pcap &]) ++OVS_WAIT_UNTIL([test 2 = `ovn-sbctl --bare --columns status find \ ++service_monitor protocol=tcp | sed '/^$/d' | grep offline | wc -l`]) ++NS_CHECK_EXEC([sw0-p2], [wget 10.0.0.10 -v -o wget$i.log],[4]) ++ ++OVS_WAIT_UNTIL([ ++ n_reset=$(cat rst.pcap | wc -l) ++ test "${n_reset}" = "1" ++]) ++ + OVS_APP_EXIT_AND_WAIT([ovn-controller]) + + as ovn-sb +diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml +index 59302296b..e5a35f307 100644 +--- a/utilities/ovn-nbctl.8.xml ++++ b/utilities/ovn-nbctl.8.xml +@@ -903,7 +903,7 @@ + +

    Load Balancer Commands

    +
    +-
    [--may-exist | --add-duplicate] lb-add lb vip ips [protocol]
    ++
    [--may-exist | --add-duplicate | --reject] lb-add lb vip ips [protocol]
    +
    +

    + Creates a new load balancer named lb with the provided +@@ -936,6 +936,15 @@ + creates a new load balancer with a duplicate name. +

    + ++

    ++ If the load balancer is created with --reject option and ++ it has no active backends, a TCP reset segment (for tcp) or an ICMP ++ port unreachable packet (for all other kind of traffic) will be sent ++ whenever an incoming packet is received for this load-balancer. ++ Please note using --reject option will disable ++ empty_lb SB controller event for this load balancer. ++

    ++ +

    + The following example adds a load balancer. +

    +diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c +index d19e1b6c6..3a95f6b1f 100644 +--- a/utilities/ovn-nbctl.c ++++ b/utilities/ovn-nbctl.c +@@ -2821,6 +2821,7 @@ nbctl_lb_add(struct ctl_context *ctx) + + bool may_exist = shash_find(&ctx->options, "--may-exist") != NULL; + bool add_duplicate = shash_find(&ctx->options, "--add-duplicate") != NULL; ++ bool empty_backend_rej = shash_find(&ctx->options, "--reject") != NULL; + + const char *lb_proto; + bool is_update_proto = false; +@@ -2934,6 +2935,10 @@ nbctl_lb_add(struct ctl_context *ctx) + smap_add(CONST_CAST(struct smap *, &lb->vips), + lb_vip_normalized, ds_cstr(&lb_ips_new)); + nbrec_load_balancer_set_vips(lb, &lb->vips); ++ if (empty_backend_rej) { ++ const struct smap options = SMAP_CONST1(&options, "reject", "true"); ++ nbrec_load_balancer_set_options(lb, &options); ++ } + out: + ds_destroy(&lb_ips_new); + +@@ -6588,7 +6593,7 @@ static const struct ctl_command_syntax nbctl_commands[] = { + nbctl_lr_nat_set_ext_ips, NULL, "--is-exempted", RW}, + /* load balancer commands. */ + { "lb-add", 3, 4, "LB VIP[:PORT] IP[:PORT]... [PROTOCOL]", NULL, +- nbctl_lb_add, NULL, "--may-exist,--add-duplicate", RW }, ++ nbctl_lb_add, NULL, "--may-exist,--add-duplicate,--reject", RW }, + { "lb-del", 1, 2, "LB [VIP]", NULL, nbctl_lb_del, NULL, + "--if-exists", RW }, + { "lb-list", 0, 1, "[LB]", NULL, nbctl_lb_list, NULL, "", RO }, +-- +2.28.0 + diff --git a/SOURCES/0002-nbctl-Cache-to-which-switch-or-router-particular-por.patch b/SOURCES/0002-nbctl-Cache-to-which-switch-or-router-particular-por.patch new file mode 100644 index 0000000..2e1a12e --- /dev/null +++ b/SOURCES/0002-nbctl-Cache-to-which-switch-or-router-particular-por.patch @@ -0,0 +1,442 @@ +From 6ab1ea8fcd83712ae3e79b96fe9494db92a1d836 Mon Sep 17 00:00:00 2001 +From: Ilya Maximets +Date: Fri, 11 Dec 2020 11:59:15 +0100 +Subject: [PATCH 2/7] nbctl: Cache to which switch or router particular port + belongs. + +nbctl always iterates over all ports in all logical switches or routers +to find to which logical router/switch current port belongs. This +could be optimized by iterating only once and caching the current +state. This should improve a little bit performance of this utility +in case where many updates are passed in a single nbctl command. + +However, this change alone will slightly reduce performance of +standalone commands, since we're iterating twice over ports on port +deletion. + +Cache is required for the upcoming change that will make nbctl to use +partial set updates. It will allow us to drop redundant iterations +over ports, i.e. to not duplicate work. + +Acked-by: Dumitru Ceara +Signed-off-by: Ilya Maximets +Signed-off-by: Numan Siddique +--- + utilities/ovn-nbctl.c | 209 +++++++++++++++++++++++++++++------------- + 1 file changed, 146 insertions(+), 63 deletions(-) + +diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c +index 3a95f6b1f..cbf2cb1f1 100644 +--- a/utilities/ovn-nbctl.c ++++ b/utilities/ovn-nbctl.c +@@ -125,6 +125,61 @@ static char * OVS_WARN_UNUSED_RESULT main_loop(const char *args, + const struct timer *); + static void server_loop(struct ovsdb_idl *idl, int argc, char *argv[]); + ++/* A context for keeping track of which switch/router certain ports are ++ * connected to. */ ++struct nbctl_context { ++ struct ctl_context base; ++ struct shash lsp_to_ls_map; ++ struct shash lrp_to_lr_map; ++ bool context_valid; ++}; ++ ++static void ++nbctl_context_init(struct nbctl_context *nbctx) ++{ ++ nbctx->context_valid = false; ++ shash_init(&nbctx->lsp_to_ls_map); ++ shash_init(&nbctx->lrp_to_lr_map); ++} ++ ++static void ++nbctl_context_destroy(struct nbctl_context *nbctx) ++{ ++ nbctx->context_valid = false; ++ shash_destroy(&nbctx->lsp_to_ls_map); ++ shash_destroy(&nbctx->lrp_to_lr_map); ++} ++ ++/* Casts 'base' into 'struct nbctl_context' and initializes it if needed. */ ++static struct nbctl_context * ++nbctl_context_get(struct ctl_context *base) ++{ ++ struct nbctl_context *nbctx; ++ ++ nbctx = CONTAINER_OF(base, struct nbctl_context, base); ++ ++ if (nbctx->context_valid) { ++ return nbctx; ++ } ++ ++ const struct nbrec_logical_switch *ls; ++ NBREC_LOGICAL_SWITCH_FOR_EACH (ls, base->idl) { ++ for (size_t i = 0; i < ls->n_ports; i++) { ++ shash_add_once(&nbctx->lsp_to_ls_map, ls->ports[i]->name, ls); ++ } ++ } ++ ++ const struct nbrec_logical_router *lr; ++ NBREC_LOGICAL_ROUTER_FOR_EACH (lr, base->idl) { ++ for (size_t i = 0; i < lr->n_ports; i++) { ++ shash_add_once(&nbctx->lrp_to_lr_map, lr->ports[i]->name, lr); ++ } ++ } ++ ++ nbctx->context_valid = true; ++ return nbctx; ++} ++ + int + main(int argc, char *argv[]) + { +@@ -1249,6 +1304,7 @@ static void + nbctl_ls_del(struct ctl_context *ctx) + { + bool must_exist = !shash_find(&ctx->options, "--if-exists"); ++ struct nbctl_context *nbctx = nbctl_context_get(ctx); + const char *id = ctx->argv[1]; + const struct nbrec_logical_switch *ls = NULL; + +@@ -1261,6 +1317,11 @@ nbctl_ls_del(struct ctl_context *ctx) + return; + } + ++ /* Updating runtime cache. */ ++ for (size_t i = 0; i < ls->n_ports; i++) { ++ shash_find_and_delete(&nbctx->lsp_to_ls_map, ls->ports[i]->name); ++ } ++ + nbrec_logical_switch_delete(ls); + } + +@@ -1317,22 +1378,19 @@ lsp_by_name_or_uuid(struct ctl_context *ctx, const char *id, + + /* Returns the logical switch that contains 'lsp'. */ + static char * OVS_WARN_UNUSED_RESULT +-lsp_to_ls(const struct ovsdb_idl *idl, ++lsp_to_ls(struct ctl_context *ctx, + const struct nbrec_logical_switch_port *lsp, + const struct nbrec_logical_switch **ls_p) + { ++ struct nbctl_context *nbctx = nbctl_context_get(ctx); + const struct nbrec_logical_switch *ls; + *ls_p = NULL; + +- NBREC_LOGICAL_SWITCH_FOR_EACH (ls, idl) { +- for (size_t i = 0; i < ls->n_ports; i++) { +- if (ls->ports[i] == lsp) { +- *ls_p = ls; +- return NULL; +- } +- } ++ ls = shash_find_data(&nbctx->lsp_to_ls_map, lsp->name); ++ if (ls) { ++ *ls_p = ls; ++ return NULL; + } +- + /* Can't happen because of the database schema */ + return xasprintf("logical port %s is not part of any logical switch", + lsp->name); +@@ -1353,6 +1411,7 @@ static void + nbctl_lsp_add(struct ctl_context *ctx) + { + bool may_exist = shash_find(&ctx->options, "--may-exist") != NULL; ++ struct nbctl_context *nbctx = nbctl_context_get(ctx); + + const struct nbrec_logical_switch *ls = NULL; + char *error = ls_by_name_or_uuid(ctx, ctx->argv[1], true, &ls); +@@ -1395,7 +1454,7 @@ nbctl_lsp_add(struct ctl_context *ctx) + } + + const struct nbrec_logical_switch *lsw; +- error = lsp_to_ls(ctx->idl, lsp, &lsw); ++ error = lsp_to_ls(ctx, lsp, &lsw); + if (error) { + ctx->error = error; + return; +@@ -1456,13 +1515,21 @@ nbctl_lsp_add(struct ctl_context *ctx) + lsp); + nbrec_logical_switch_set_ports(ls, new_ports, ls->n_ports + 1); + free(new_ports); ++ ++ /* Updating runtime cache. */ ++ shash_add(&nbctx->lsp_to_ls_map, lsp_name, ls); + } + + /* Removes logical switch port 'ls->ports[idx]'. */ + static void +-remove_lsp(const struct nbrec_logical_switch *ls, size_t idx) ++remove_lsp(struct ctl_context *ctx, size_t idx, ++ const struct nbrec_logical_switch *ls, ++ const struct nbrec_logical_switch_port *lsp) + { +- const struct nbrec_logical_switch_port *lsp = ls->ports[idx]; ++ struct nbctl_context *nbctx = nbctl_context_get(ctx); ++ ++ /* Updating runtime cache. */ ++ shash_find_and_delete(&nbctx->lsp_to_ls_map, lsp->name); + + /* First remove 'lsp' from the array of ports. This is what will + * actually cause the logical port to be deleted when the transaction is +@@ -1498,18 +1565,18 @@ nbctl_lsp_del(struct ctl_context *ctx) + + /* Find the switch that contains 'lsp', then delete it. */ + const struct nbrec_logical_switch *ls; +- NBREC_LOGICAL_SWITCH_FOR_EACH (ls, ctx->idl) { +- for (size_t i = 0; i < ls->n_ports; i++) { +- if (ls->ports[i] == lsp) { +- remove_lsp(ls, i); +- return; +- } ++ ++ error = lsp_to_ls(ctx, lsp, &ls); ++ if (error) { ++ ctx->error = error; ++ return; ++ } ++ for (size_t i = 0; i < ls->n_ports; i++) { ++ if (ls->ports[i] == lsp) { ++ remove_lsp(ctx, i, ls, lsp); ++ break; + } + } +- +- /* Can't happen because of the database schema. */ +- ctl_error(ctx, "logical port %s is not part of any logical switch", +- ctx->argv[1]); + } + + static void +@@ -1658,7 +1725,7 @@ nbctl_lsp_set_addresses(struct ctl_context *ctx) + } + + const struct nbrec_logical_switch *ls; +- error = lsp_to_ls(ctx->idl, lsp, &ls); ++ error = lsp_to_ls(ctx, lsp, &ls); + if (error) { + ctx->error = error; + return; +@@ -3383,6 +3450,7 @@ static void + nbctl_lr_del(struct ctl_context *ctx) + { + bool must_exist = !shash_find(&ctx->options, "--if-exists"); ++ struct nbctl_context *nbctx = nbctl_context_get(ctx); + const char *id = ctx->argv[1]; + const struct nbrec_logical_router *lr = NULL; + +@@ -3395,6 +3463,11 @@ nbctl_lr_del(struct ctl_context *ctx) + return; + } + ++ /* Updating runtime cache. */ ++ for (size_t i = 0; i < lr->n_ports; i++) { ++ shash_find_and_delete(&nbctx->lrp_to_lr_map, lr->ports[i]->name); ++ } ++ + nbrec_logical_router_delete(lr); + } + +@@ -4672,20 +4745,18 @@ lrp_by_name_or_uuid(struct ctl_context *ctx, const char *id, bool must_exist, + + /* Returns the logical router that contains 'lrp'. */ + static char * OVS_WARN_UNUSED_RESULT +-lrp_to_lr(const struct ovsdb_idl *idl, ++lrp_to_lr(struct ctl_context *ctx, + const struct nbrec_logical_router_port *lrp, + const struct nbrec_logical_router **lr_p) + { ++ struct nbctl_context *nbctx = nbctl_context_get(ctx); + const struct nbrec_logical_router *lr; + *lr_p = NULL; + +- NBREC_LOGICAL_ROUTER_FOR_EACH (lr, idl) { +- for (size_t i = 0; i < lr->n_ports; i++) { +- if (lr->ports[i] == lrp) { +- *lr_p = lr; +- return NULL; +- } +- } ++ lr = shash_find_data(&nbctx->lrp_to_lr_map, lrp->name); ++ if (lr) { ++ *lr_p = lr; ++ return NULL; + } + + /* Can't happen because of the database schema */ +@@ -4898,6 +4969,7 @@ static void + nbctl_lrp_add(struct ctl_context *ctx) + { + bool may_exist = shash_find(&ctx->options, "--may-exist") != NULL; ++ struct nbctl_context *nbctx = nbctl_context_get(ctx); + + const struct nbrec_logical_router *lr = NULL; + char *error = lr_by_name_or_uuid(ctx, ctx->argv[1], true, &lr); +@@ -4947,7 +5019,7 @@ nbctl_lrp_add(struct ctl_context *ctx) + } + + const struct nbrec_logical_router *bound_lr; +- error = lrp_to_lr(ctx->idl, lrp, &bound_lr); ++ error = lrp_to_lr(ctx, lrp, &bound_lr); + if (error) { + ctx->error = error; + return; +@@ -5053,13 +5125,21 @@ nbctl_lrp_add(struct ctl_context *ctx) + lrp); + nbrec_logical_router_set_ports(lr, new_ports, lr->n_ports + 1); + free(new_ports); ++ ++ /* Updating runtime cache. */ ++ shash_add(&nbctx->lrp_to_lr_map, lrp->name, lr); + } + + /* Removes logical router port 'lr->ports[idx]'. */ + static void +-remove_lrp(const struct nbrec_logical_router *lr, size_t idx) ++remove_lrp(struct ctl_context *ctx, size_t idx, ++ const struct nbrec_logical_router *lr, ++ const struct nbrec_logical_router_port *lrp) + { +- const struct nbrec_logical_router_port *lrp = lr->ports[idx]; ++ struct nbctl_context *nbctx = nbctl_context_get(ctx); ++ ++ /* Updating runtime cache. */ ++ shash_find_and_delete(&nbctx->lrp_to_lr_map, lrp->name); + + /* First remove 'lrp' from the array of ports. This is what will + * actually cause the logical port to be deleted when the transaction is +@@ -5095,18 +5175,18 @@ nbctl_lrp_del(struct ctl_context *ctx) + + /* Find the router that contains 'lrp', then delete it. */ + const struct nbrec_logical_router *lr; +- NBREC_LOGICAL_ROUTER_FOR_EACH (lr, ctx->idl) { +- for (size_t i = 0; i < lr->n_ports; i++) { +- if (lr->ports[i] == lrp) { +- remove_lrp(lr, i); +- return; +- } ++ ++ error = lrp_to_lr(ctx, lrp, &lr); ++ if (error) { ++ ctx->error = error; ++ return; ++ } ++ for (size_t i = 0; i < lr->n_ports; i++) { ++ if (lr->ports[i] == lrp) { ++ remove_lrp(ctx, i, lr, lrp); ++ break; + } + } +- +- /* Can't happen because of the database schema. */ +- ctl_error(ctx, "logical port %s is not part of any logical router", +- ctx->argv[1]); + } + + /* Print a list of logical router ports. */ +@@ -5280,7 +5360,7 @@ fwd_group_to_logical_switch(struct ctl_context *ctx, + } + + const struct nbrec_logical_switch *ls; +- error = lsp_to_ls(ctx->idl, lsp, &ls); ++ error = lsp_to_ls(ctx, lsp, &ls); + if (error) { + ctx->error = error; + return NULL; +@@ -5355,7 +5435,7 @@ nbctl_fwd_group_add(struct ctl_context *ctx) + return; + } + if (lsp) { +- error = lsp_to_ls(ctx->idl, lsp, &ls); ++ error = lsp_to_ls(ctx, lsp, &ls); + if (error) { + ctx->error = error; + return; +@@ -6236,7 +6316,7 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands, + struct ovsdb_idl_txn *txn; + enum ovsdb_idl_txn_status status; + struct ovsdb_symbol_table *symtab; +- struct ctl_context ctx; ++ struct nbctl_context ctx; + struct ctl_command *c; + struct shash_node *node; + int64_t next_cfg = 0; +@@ -6273,25 +6353,26 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands, + ds_init(&c->output); + c->table = NULL; + } +- ctl_context_init(&ctx, NULL, idl, txn, symtab, NULL); ++ nbctl_context_init(&ctx); ++ ctl_context_init(&ctx.base, NULL, idl, txn, symtab, NULL); + for (c = commands; c < &commands[n_commands]; c++) { +- ctl_context_init_command(&ctx, c); ++ ctl_context_init_command(&ctx.base, c); + if (c->syntax->run) { +- (c->syntax->run)(&ctx); ++ (c->syntax->run)(&ctx.base); + } +- if (ctx.error) { +- error = xstrdup(ctx.error); +- ctl_context_done(&ctx, c); ++ if (ctx.base.error) { ++ error = xstrdup(ctx.base.error); ++ ctl_context_done(&ctx.base, c); + goto out_error; + } +- ctl_context_done_command(&ctx, c); ++ ctl_context_done_command(&ctx.base, c); + +- if (ctx.try_again) { +- ctl_context_done(&ctx, NULL); ++ if (ctx.base.try_again) { ++ ctl_context_done(&ctx.base, NULL); + goto try_again; + } + } +- ctl_context_done(&ctx, NULL); ++ ctl_context_done(&ctx.base, NULL); + + SHASH_FOR_EACH (node, &symtab->sh) { + struct ovsdb_symbol *symbol = node->data; +@@ -6322,14 +6403,14 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands, + if (status == TXN_UNCHANGED || status == TXN_SUCCESS) { + for (c = commands; c < &commands[n_commands]; c++) { + if (c->syntax->postprocess) { +- ctl_context_init(&ctx, c, idl, txn, symtab, NULL); +- (c->syntax->postprocess)(&ctx); +- if (ctx.error) { +- error = xstrdup(ctx.error); +- ctl_context_done(&ctx, c); ++ ctl_context_init(&ctx.base, c, idl, txn, symtab, NULL); ++ (c->syntax->postprocess)(&ctx.base); ++ if (ctx.base.error) { ++ error = xstrdup(ctx.base.error); ++ ctl_context_done(&ctx.base, c); + goto out_error; + } +- ctl_context_done(&ctx, c); ++ ctl_context_done(&ctx.base, c); + } + } + } +@@ -6417,6 +6498,7 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands, + done: ; + } + ++ nbctl_context_destroy(&ctx); + ovsdb_symbol_table_destroy(symtab); + ovsdb_idl_txn_destroy(txn); + the_idl_txn = NULL; +@@ -6434,6 +6516,7 @@ out_error: + ovsdb_idl_txn_destroy(txn); + the_idl_txn = NULL; + ++ nbctl_context_destroy(&ctx); + ovsdb_symbol_table_destroy(symtab); + return error; + } +-- +2.28.0 + diff --git a/SOURCES/0003-nbctl-Use-partial-set-updates-instead-of-re-setting-.patch b/SOURCES/0003-nbctl-Use-partial-set-updates-instead-of-re-setting-.patch new file mode 100644 index 0000000..caf0772 --- /dev/null +++ b/SOURCES/0003-nbctl-Use-partial-set-updates-instead-of-re-setting-.patch @@ -0,0 +1,772 @@ +From 5b1c8c21d5620bf3c7e8d3899fe7dbdd1c65b10e Mon Sep 17 00:00:00 2001 +From: Ilya Maximets +Date: Fri, 11 Dec 2020 11:59:16 +0100 +Subject: [PATCH 3/7] nbctl: Use partial set updates instead of re-setting the + whole column. + +Northbound database has many tables that could contain columns with +very large sets. For example 'ports' column of the 'Logical_Switch' +table could contain thousands of ports in a set. + +Current strategy of nbctl while adding a new port to the set is to +copy all existing ports, add one and set the new set of ports. +Similar behavior is for deletion too. + +If we have 1000 ports and want to add one more, resulted transaction +in current code will look like this: + + {transact, + < wait operation > + < create new lsp in Logical_Switch_Port table > + + where: _uuid == 'logical switch row uuid' + op : update + rows : ports ['< list of current 1000 ports + 1 new >'] + } + +The code was written before support for partial updates for sets was +implemented. Now we have it and can replace the old strategy. +With this change resulted transaction will be: + + {transact, + < wait operation > + < create new lsp in Logical_Switch_Port table > + + where: _uuid == 'logical switch row uuid' + op : mutate + mutations : ports insert ['< 1 uuid of a new lsp >'] + } + +Unfortunately, for now, this only decreases transaction size in half, +because '', that is still in the transaction, contains +the full current list of ports: + + where: _uuid == 'logical switch row uuid' + op : wait + until: == + rows : ports ['< list of current 1000 ports >'] + +But anyway, beside the overall code cleanup, this reduces transaction +size in half and should reduce pressure on the ovsdb-server since it +will not need to parse and process all 1000 ports twice. + +This change doesn't affect 'append_request' messages within the raft +cluster, because ovsdb-server is not smart enough to use mutations +there, and this will also not affect 'update' messages from the +ovsdb-server to its clients, because it is smart enough to construct +'modify' updates regardless of the original transaction. + +One difference between full updates and partial is that partial +changes are not visible for the idl client until transaction is +committed and the update received from the server. New switch ports +are not visible while iterating over 'ports' of the logical switch and +removed ports are still there. For this reason, we have to maintain +runtime cache of the mapping between ports and routers/switches they +attached to. + +Acked-by: Mark Michelson +Signed-off-by: Ilya Maximets +Signed-off-by: Numan Siddique +--- + utilities/ovn-nbctl.c | 350 +++++++++++------------------------------- + 1 file changed, 87 insertions(+), 263 deletions(-) + +diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c +index cbf2cb1f1..7c4dce12a 100644 +--- a/utilities/ovn-nbctl.c ++++ b/utilities/ovn-nbctl.c +@@ -126,7 +126,11 @@ static char * OVS_WARN_UNUSED_RESULT main_loop(const char *args, + static void server_loop(struct ovsdb_idl *idl, int argc, char *argv[]); + + /* A context for keeping track of which switch/router certain ports are +- * connected to. */ ++ * connected to. ++ * ++ * It is required to track changes that we did within current set of commands ++ * because partial updates of sets in database are not reflected in the idl ++ * until transaction is committed and updates received from the server. */ + struct nbctl_context { + struct ctl_context base; + struct shash lsp_to_ls_map; +@@ -1508,21 +1512,15 @@ nbctl_lsp_add(struct ctl_context *ctx) + + /* Insert the logical port into the logical switch. */ + nbrec_logical_switch_verify_ports(ls); +- struct nbrec_logical_switch_port **new_ports = xmalloc(sizeof *new_ports * +- (ls->n_ports + 1)); +- nullable_memcpy(new_ports, ls->ports, sizeof *new_ports * ls->n_ports); +- new_ports[ls->n_ports] = CONST_CAST(struct nbrec_logical_switch_port *, +- lsp); +- nbrec_logical_switch_set_ports(ls, new_ports, ls->n_ports + 1); +- free(new_ports); ++ nbrec_logical_switch_update_ports_addvalue(ls, lsp); + + /* Updating runtime cache. */ + shash_add(&nbctx->lsp_to_ls_map, lsp_name, ls); + } + +-/* Removes logical switch port 'ls->ports[idx]'. */ ++/* Removes logical switch port 'lsp' from the logical switch 'ls'. */ + static void +-remove_lsp(struct ctl_context *ctx, size_t idx, ++remove_lsp(struct ctl_context *ctx, + const struct nbrec_logical_switch *ls, + const struct nbrec_logical_switch_port *lsp) + { +@@ -1534,12 +1532,8 @@ remove_lsp(struct ctl_context *ctx, size_t idx, + /* First remove 'lsp' from the array of ports. This is what will + * actually cause the logical port to be deleted when the transaction is + * sent to the database server (due to garbage collection). */ +- struct nbrec_logical_switch_port **new_ports +- = xmemdup(ls->ports, sizeof *new_ports * ls->n_ports); +- new_ports[idx] = new_ports[ls->n_ports - 1]; + nbrec_logical_switch_verify_ports(ls); +- nbrec_logical_switch_set_ports(ls, new_ports, ls->n_ports - 1); +- free(new_ports); ++ nbrec_logical_switch_update_ports_delvalue(ls, lsp); + + /* Delete 'lsp' from the IDL. This won't have a real effect on the + * database server (the IDL will suppress it in fact) but it means that it +@@ -1571,12 +1565,7 @@ nbctl_lsp_del(struct ctl_context *ctx) + ctx->error = error; + return; + } +- for (size_t i = 0; i < ls->n_ports; i++) { +- if (ls->ports[i] == lsp) { +- remove_lsp(ctx, i, ls, lsp); +- break; +- } +- } ++ remove_lsp(ctx, ls, lsp); + } + + static void +@@ -2366,17 +2355,13 @@ nbctl_acl_add(struct ctl_context *ctx) + } + + /* Insert the acl into the logical switch/port group. */ +- struct nbrec_acl **new_acls = xmalloc(sizeof *new_acls * (n_acls + 1)); +- nullable_memcpy(new_acls, acls, sizeof *new_acls * n_acls); +- new_acls[n_acls] = acl; + if (pg) { + nbrec_port_group_verify_acls(pg); +- nbrec_port_group_set_acls(pg, new_acls, n_acls + 1); ++ nbrec_port_group_update_acls_addvalue(pg, acl); + } else { + nbrec_logical_switch_verify_acls(ls); +- nbrec_logical_switch_set_acls(ls, new_acls, n_acls + 1); ++ nbrec_logical_switch_update_acls_addvalue(ls, acl); + } +- free(new_acls); + } + + static void +@@ -2416,23 +2401,21 @@ nbctl_acl_del(struct ctl_context *ctx) + /* If priority and match are not specified, delete all ACLs with the + * specified direction. */ + if (ctx->argc == 3) { +- struct nbrec_acl **new_acls = xmalloc(sizeof *new_acls * n_acls); +- +- int n_new_acls = 0; +- for (size_t i = 0; i < n_acls; i++) { +- if (strcmp(direction, acls[i]->direction)) { +- new_acls[n_new_acls++] = acls[i]; +- } +- } +- + if (pg) { + nbrec_port_group_verify_acls(pg); +- nbrec_port_group_set_acls(pg, new_acls, n_new_acls); + } else { + nbrec_logical_switch_verify_acls(ls); +- nbrec_logical_switch_set_acls(ls, new_acls, n_new_acls); + } +- free(new_acls); ++ ++ for (size_t i = 0; i < n_acls; i++) { ++ if (!strcmp(direction, acls[i]->direction)) { ++ if (pg) { ++ nbrec_port_group_update_acls_delvalue(pg, acls[i]); ++ } else { ++ nbrec_logical_switch_update_acls_delvalue(ls, acls[i]); ++ } ++ } ++ } + return; + } + +@@ -2454,19 +2437,13 @@ nbctl_acl_del(struct ctl_context *ctx) + + if (priority == acl->priority && !strcmp(ctx->argv[4], acl->match) && + !strcmp(direction, acl->direction)) { +- struct nbrec_acl **new_acls +- = xmemdup(acls, sizeof *new_acls * n_acls); +- new_acls[i] = acls[n_acls - 1]; + if (pg) { + nbrec_port_group_verify_acls(pg); +- nbrec_port_group_set_acls(pg, new_acls, +- n_acls - 1); ++ nbrec_port_group_update_acls_delvalue(pg, acl); + } else { + nbrec_logical_switch_verify_acls(ls); +- nbrec_logical_switch_set_acls(ls, new_acls, +- n_acls - 1); ++ nbrec_logical_switch_update_acls_delvalue(ls, acl); + } +- free(new_acls); + return; + } + } +@@ -2620,14 +2597,7 @@ nbctl_qos_add(struct ctl_context *ctx) + + /* Insert the qos rule the logical switch. */ + nbrec_logical_switch_verify_qos_rules(ls); +- struct nbrec_qos **new_qos_rules +- = xmalloc(sizeof *new_qos_rules * (ls->n_qos_rules + 1)); +- nullable_memcpy(new_qos_rules, +- ls->qos_rules, sizeof *new_qos_rules * ls->n_qos_rules); +- new_qos_rules[ls->n_qos_rules] = qos; +- nbrec_logical_switch_set_qos_rules(ls, new_qos_rules, +- ls->n_qos_rules + 1); +- free(new_qos_rules); ++ nbrec_logical_switch_update_qos_rules_addvalue(ls, qos); + } + + static void +@@ -2664,34 +2634,32 @@ nbctl_qos_del(struct ctl_context *ctx) + /* If uuid was specified, delete qos_rule with the + * specified uuid. */ + if (ctx->argc == 3) { +- struct nbrec_qos **new_qos_rules +- = xmalloc(sizeof *new_qos_rules * ls->n_qos_rules); ++ size_t i; + +- int n_qos_rules = 0; ++ nbrec_logical_switch_verify_qos_rules(ls); + if (qos_rule_uuid) { +- for (size_t i = 0; i < ls->n_qos_rules; i++) { +- if (!uuid_equals(qos_rule_uuid, +- &(ls->qos_rules[i]->header_.uuid))) { +- new_qos_rules[n_qos_rules++] = ls->qos_rules[i]; ++ for (i = 0; i < ls->n_qos_rules; i++) { ++ if (uuid_equals(qos_rule_uuid, ++ &(ls->qos_rules[i]->header_.uuid))) { ++ nbrec_logical_switch_update_qos_rules_delvalue( ++ ls, ls->qos_rules[i]); ++ break; + } + } +- if (n_qos_rules == ls->n_qos_rules) { ++ if (i == ls->n_qos_rules) { + ctl_error(ctx, "uuid is not found"); + } + + /* If priority and match are not specified, delete all qos_rules + * with the specified direction. */ + } else { +- for (size_t i = 0; i < ls->n_qos_rules; i++) { +- if (strcmp(direction, ls->qos_rules[i]->direction)) { +- new_qos_rules[n_qos_rules++] = ls->qos_rules[i]; ++ for (i = 0; i < ls->n_qos_rules; i++) { ++ if (!strcmp(direction, ls->qos_rules[i]->direction)) { ++ nbrec_logical_switch_update_qos_rules_delvalue( ++ ls, ls->qos_rules[i]); + } + } + } +- +- nbrec_logical_switch_verify_qos_rules(ls); +- nbrec_logical_switch_set_qos_rules(ls, new_qos_rules, n_qos_rules); +- free(new_qos_rules); + return; + } + +@@ -2718,14 +2686,8 @@ nbctl_qos_del(struct ctl_context *ctx) + + if (priority == qos->priority && !strcmp(ctx->argv[4], qos->match) && + !strcmp(direction, qos->direction)) { +- struct nbrec_qos **new_qos_rules +- = xmemdup(ls->qos_rules, +- sizeof *new_qos_rules * ls->n_qos_rules); +- new_qos_rules[i] = ls->qos_rules[ls->n_qos_rules - 1]; + nbrec_logical_switch_verify_qos_rules(ls); +- nbrec_logical_switch_set_qos_rules(ls, new_qos_rules, +- ls->n_qos_rules - 1); +- free(new_qos_rules); ++ nbrec_logical_switch_update_qos_rules_delvalue(ls, qos); + return; + } + } +@@ -3188,16 +3150,7 @@ nbctl_lr_lb_add(struct ctl_context *ctx) + + /* Insert the load balancer into the logical router. */ + nbrec_logical_router_verify_load_balancer(lr); +- struct nbrec_load_balancer **new_lbs +- = xmalloc(sizeof *new_lbs * (lr->n_load_balancer + 1)); +- +- nullable_memcpy(new_lbs, lr->load_balancer, +- sizeof *new_lbs * lr->n_load_balancer); +- new_lbs[lr->n_load_balancer] = CONST_CAST(struct nbrec_load_balancer *, +- new_lb); +- nbrec_logical_router_set_load_balancer(lr, new_lbs, +- lr->n_load_balancer + 1); +- free(new_lbs); ++ nbrec_logical_router_update_load_balancer_addvalue(lr, new_lb); + } + + static void +@@ -3231,14 +3184,7 @@ nbctl_lr_lb_del(struct ctl_context *ctx) + if (uuid_equals(&del_lb->header_.uuid, &lb->header_.uuid)) { + /* Remove the matching rule. */ + nbrec_logical_router_verify_load_balancer(lr); +- +- struct nbrec_load_balancer **new_lbs +- = xmemdup(lr->load_balancer, +- sizeof *new_lbs * lr->n_load_balancer); +- new_lbs[i] = lr->load_balancer[lr->n_load_balancer - 1]; +- nbrec_logical_router_set_load_balancer(lr, new_lbs, +- lr->n_load_balancer - 1); +- free(new_lbs); ++ nbrec_logical_router_update_load_balancer_delvalue(lr, lb); + return; + } + } +@@ -3313,16 +3259,7 @@ nbctl_ls_lb_add(struct ctl_context *ctx) + + /* Insert the load balancer into the logical switch. */ + nbrec_logical_switch_verify_load_balancer(ls); +- struct nbrec_load_balancer **new_lbs +- = xmalloc(sizeof *new_lbs * (ls->n_load_balancer + 1)); +- +- nullable_memcpy(new_lbs, ls->load_balancer, +- sizeof *new_lbs * ls->n_load_balancer); +- new_lbs[ls->n_load_balancer] = CONST_CAST(struct nbrec_load_balancer *, +- new_lb); +- nbrec_logical_switch_set_load_balancer(ls, new_lbs, +- ls->n_load_balancer + 1); +- free(new_lbs); ++ nbrec_logical_switch_update_load_balancer_addvalue(ls, new_lb); + } + + static void +@@ -3356,14 +3293,7 @@ nbctl_ls_lb_del(struct ctl_context *ctx) + if (uuid_equals(&del_lb->header_.uuid, &lb->header_.uuid)) { + /* Remove the matching rule. */ + nbrec_logical_switch_verify_load_balancer(ls); +- +- struct nbrec_load_balancer **new_lbs +- = xmemdup(ls->load_balancer, +- sizeof *new_lbs * ls->n_load_balancer); +- new_lbs[i] = ls->load_balancer[ls->n_load_balancer - 1]; +- nbrec_logical_switch_set_load_balancer(ls, new_lbs, +- ls->n_load_balancer - 1); +- free(new_lbs); ++ nbrec_logical_switch_update_load_balancer_delvalue(ls, lb); + return; + } + } +@@ -3792,14 +3722,7 @@ nbctl_lr_policy_add(struct ctl_context *ctx) + smap_destroy(&options); + + nbrec_logical_router_verify_policies(lr); +- struct nbrec_logical_router_policy **new_policies +- = xmalloc(sizeof *new_policies * (lr->n_policies + 1)); +- memcpy(new_policies, lr->policies, +- sizeof *new_policies * lr->n_policies); +- new_policies[lr->n_policies] = policy; +- nbrec_logical_router_set_policies(lr, new_policies, +- lr->n_policies + 1); +- free(new_policies); ++ nbrec_logical_router_update_policies_addvalue(lr, policy); + if (next_hop != NULL) { + free(next_hop); + } +@@ -3836,38 +3759,35 @@ nbctl_lr_policy_del(struct ctl_context *ctx) + /* If uuid was specified, delete routing policy with the + * specified uuid. */ + if (ctx->argc == 3) { +- struct nbrec_logical_router_policy **new_policies +- = xmemdup(lr->policies, +- sizeof *new_policies * lr->n_policies); +- int n_policies = 0; ++ size_t i; + ++ nbrec_logical_router_verify_policies(lr); + if (lr_policy_uuid) { +- for (size_t i = 0; i < lr->n_policies; i++) { +- if (!uuid_equals(lr_policy_uuid, +- &(lr->policies[i]->header_.uuid))) { +- new_policies[n_policies++] = lr->policies[i]; ++ for (i = 0; i < lr->n_policies; i++) { ++ if (uuid_equals(lr_policy_uuid, ++ &(lr->policies[i]->header_.uuid))) { ++ nbrec_logical_router_update_policies_delvalue( ++ lr, lr->policies[i]); ++ break; + } + } +- if (n_policies == lr->n_policies) { ++ if (i == lr->n_policies) { + if (!shash_find(&ctx->options, "--if-exists")) { + ctl_error(ctx, "Logical router policy uuid is not found."); + } +- free(new_policies); + return; + } + +- /* If match is not specified, delete all routing policies with the +- * specified priority. */ ++ /* If match is not specified, delete all routing policies with the ++ * specified priority. */ + } else { +- for (int i = 0; i < lr->n_policies; i++) { +- if (priority != lr->policies[i]->priority) { +- new_policies[n_policies++] = lr->policies[i]; ++ for (i = 0; i < lr->n_policies; i++) { ++ if (priority == lr->policies[i]->priority) { ++ nbrec_logical_router_update_policies_delvalue( ++ lr, lr->policies[i]); + } + } + } +- nbrec_logical_router_verify_policies(lr); +- nbrec_logical_router_set_policies(lr, new_policies, n_policies); +- free(new_policies); + return; + } + +@@ -3876,14 +3796,8 @@ nbctl_lr_policy_del(struct ctl_context *ctx) + struct nbrec_logical_router_policy *routing_policy = lr->policies[i]; + if (priority == routing_policy->priority && + !strcmp(ctx->argv[3], routing_policy->match)) { +- struct nbrec_logical_router_policy **new_policies +- = xmemdup(lr->policies, +- sizeof *new_policies * lr->n_policies); +- new_policies[i] = lr->policies[lr->n_policies - 1]; + nbrec_logical_router_verify_policies(lr); +- nbrec_logical_router_set_policies(lr, new_policies, +- lr->n_policies - 1); +- free(new_policies); ++ nbrec_logical_router_update_policies_delvalue(lr, routing_policy); + return; + } + } +@@ -4083,14 +3997,7 @@ nbctl_lr_route_add(struct ctl_context *ctx) + } + + nbrec_logical_router_verify_static_routes(lr); +- struct nbrec_logical_router_static_route **new_routes +- = xmalloc(sizeof *new_routes * (lr->n_static_routes + 1)); +- nullable_memcpy(new_routes, lr->static_routes, +- sizeof *new_routes * lr->n_static_routes); +- new_routes[lr->n_static_routes] = route; +- nbrec_logical_router_set_static_routes(lr, new_routes, +- lr->n_static_routes + 1); +- free(new_routes); ++ nbrec_logical_router_update_static_routes_addvalue(lr, route); + + cleanup: + free(next_hop); +@@ -4147,11 +4054,9 @@ nbctl_lr_route_del(struct ctl_context *ctx) + output_port = ctx->argv[4]; + } + +- struct nbrec_logical_router_static_route **new_routes +- = xmemdup(lr->static_routes, +- sizeof *new_routes * lr->n_static_routes); +- size_t n_new = 0; +- for (int i = 0; i < lr->n_static_routes; i++) { ++ size_t n_removed = 0; ++ nbrec_logical_router_verify_static_routes(lr); ++ for (size_t i = 0; i < lr->n_static_routes; i++) { + /* Compare route policy, if specified. */ + if (policy) { + char *nb_policy = lr->static_routes[i]->policy; +@@ -4160,7 +4065,6 @@ nbctl_lr_route_del(struct ctl_context *ctx) + nb_is_src_route = true; + } + if (is_src_route != nb_is_src_route) { +- new_routes[n_new++] = lr->static_routes[i]; + continue; + } + } +@@ -4171,14 +4075,12 @@ nbctl_lr_route_del(struct ctl_context *ctx) + normalize_prefix_str(lr->static_routes[i]->ip_prefix); + if (!rt_prefix) { + /* Ignore existing prefix we couldn't parse. */ +- new_routes[n_new++] = lr->static_routes[i]; + continue; + } + + int ret = strcmp(prefix, rt_prefix); + free(rt_prefix); + if (ret) { +- new_routes[n_new++] = lr->static_routes[i]; + continue; + } + } +@@ -4189,13 +4091,11 @@ nbctl_lr_route_del(struct ctl_context *ctx) + normalize_prefix_str(lr->static_routes[i]->nexthop); + if (!rt_nexthop) { + /* Ignore existing nexthop we couldn't parse. */ +- new_routes[n_new++] = lr->static_routes[i]; + continue; + } + int ret = strcmp(nexthop, rt_nexthop); + free(rt_nexthop); + if (ret) { +- new_routes[n_new++] = lr->static_routes[i]; + continue; + } + } +@@ -4204,18 +4104,17 @@ nbctl_lr_route_del(struct ctl_context *ctx) + if (output_port) { + char *rt_output_port = lr->static_routes[i]->output_port; + if (!rt_output_port || strcmp(output_port, rt_output_port)) { +- new_routes[n_new++] = lr->static_routes[i]; ++ continue; + } + } +- } + +- if (n_new < lr->n_static_routes) { +- nbrec_logical_router_verify_static_routes(lr); +- nbrec_logical_router_set_static_routes(lr, new_routes, n_new); +- goto out; ++ /* Everything matched. Removing. */ ++ nbrec_logical_router_update_static_routes_delvalue( ++ lr, lr->static_routes[i]); ++ n_removed++; + } + +- if (!shash_find(&ctx->options, "--if-exists")) { ++ if (!n_removed && !shash_find(&ctx->options, "--if-exists")) { + ctl_error(ctx, "no matching route: policy '%s', prefix '%s', nexthop " + "'%s', output_port '%s'.", + policy ? policy : "any", +@@ -4224,8 +4123,6 @@ nbctl_lr_route_del(struct ctl_context *ctx) + output_port ? output_port : "any"); + } + +-out: +- free(new_routes); + free(prefix); + free(nexthop); + } +@@ -4497,11 +4394,7 @@ nbctl_lr_nat_add(struct ctl_context *ctx) + + /* Insert the NAT into the logical router. */ + nbrec_logical_router_verify_nat(lr); +- struct nbrec_nat **new_nats = xmalloc(sizeof *new_nats * (lr->n_nat + 1)); +- nullable_memcpy(new_nats, lr->nat, sizeof *new_nats * lr->n_nat); +- new_nats[lr->n_nat] = nat; +- nbrec_logical_router_set_nat(lr, new_nats, lr->n_nat + 1); +- free(new_nats); ++ nbrec_logical_router_update_nat_addvalue(lr, nat); + + cleanup: + free(new_logical_ip); +@@ -4537,17 +4430,12 @@ nbctl_lr_nat_del(struct ctl_context *ctx) + + if (ctx->argc == 3) { + /*Deletes all NATs with the specified type. */ +- struct nbrec_nat **new_nats = xmalloc(sizeof *new_nats * lr->n_nat); +- int n_nat = 0; ++ nbrec_logical_router_verify_nat(lr); + for (size_t i = 0; i < lr->n_nat; i++) { +- if (strcmp(nat_type, lr->nat[i]->type)) { +- new_nats[n_nat++] = lr->nat[i]; ++ if (!strcmp(nat_type, lr->nat[i]->type)) { ++ nbrec_logical_router_update_nat_delvalue(lr, lr->nat[i]); + } + } +- +- nbrec_logical_router_verify_nat(lr); +- nbrec_logical_router_set_nat(lr, new_nats, n_nat); +- free(new_nats); + return; + } + +@@ -4569,13 +4457,8 @@ nbctl_lr_nat_del(struct ctl_context *ctx) + continue; + } + if (!strcmp(nat_type, nat->type) && !strcmp(nat_ip, old_ip)) { +- struct nbrec_nat **new_nats +- = xmemdup(lr->nat, sizeof *new_nats * lr->n_nat); +- new_nats[i] = lr->nat[lr->n_nat - 1]; + nbrec_logical_router_verify_nat(lr); +- nbrec_logical_router_set_nat(lr, new_nats, +- lr->n_nat - 1); +- free(new_nats); ++ nbrec_logical_router_update_nat_delvalue(lr, nat); + should_return = true; + } + free(old_ip); +@@ -4854,14 +4737,7 @@ nbctl_lrp_set_gateway_chassis(struct ctl_context *ctx) + + /* Insert the logical gateway chassis into the logical router port. */ + nbrec_logical_router_port_verify_gateway_chassis(lrp); +- struct nbrec_gateway_chassis **new_gc = xmalloc( +- sizeof *new_gc * (lrp->n_gateway_chassis + 1)); +- nullable_memcpy(new_gc, lrp->gateway_chassis, +- sizeof *new_gc * lrp->n_gateway_chassis); +- new_gc[lrp->n_gateway_chassis] = gc; +- nbrec_logical_router_port_set_gateway_chassis( +- lrp, new_gc, lrp->n_gateway_chassis + 1); +- free(new_gc); ++ nbrec_logical_router_port_update_gateway_chassis_addvalue(lrp, gc); + free(gc_name); + } + +@@ -4878,14 +4754,8 @@ remove_gc(const struct nbrec_logical_router_port *lrp, size_t idx) + * will actually cause the gateway chassis to be deleted when the + * transaction is sent to the database server (due to garbage + * collection). */ +- struct nbrec_gateway_chassis **new_gc +- = xmemdup(lrp->gateway_chassis, +- sizeof *new_gc * lrp->n_gateway_chassis); +- new_gc[idx] = new_gc[lrp->n_gateway_chassis - 1]; + nbrec_logical_router_port_verify_gateway_chassis(lrp); +- nbrec_logical_router_port_set_gateway_chassis( +- lrp, new_gc, lrp->n_gateway_chassis - 1); +- free(new_gc); ++ nbrec_logical_router_port_update_gateway_chassis_delvalue(lrp, gc); + } + + /* Delete 'gc' from the IDL. This won't have a real effect on +@@ -5118,21 +4988,15 @@ nbctl_lrp_add(struct ctl_context *ctx) + + /* Insert the logical port into the logical router. */ + nbrec_logical_router_verify_ports(lr); +- struct nbrec_logical_router_port **new_ports = xmalloc(sizeof *new_ports * +- (lr->n_ports + 1)); +- nullable_memcpy(new_ports, lr->ports, sizeof *new_ports * lr->n_ports); +- new_ports[lr->n_ports] = CONST_CAST(struct nbrec_logical_router_port *, +- lrp); +- nbrec_logical_router_set_ports(lr, new_ports, lr->n_ports + 1); +- free(new_ports); ++ nbrec_logical_router_update_ports_addvalue(lr, lrp); + + /* Updating runtime cache. */ + shash_add(&nbctx->lrp_to_lr_map, lrp->name, lr); + } + +-/* Removes logical router port 'lr->ports[idx]'. */ ++/* Removes logical router port 'lrp' from logical router 'lr'. */ + static void +-remove_lrp(struct ctl_context *ctx, size_t idx, ++remove_lrp(struct ctl_context *ctx, + const struct nbrec_logical_router *lr, + const struct nbrec_logical_router_port *lrp) + { +@@ -5144,12 +5008,8 @@ remove_lrp(struct ctl_context *ctx, size_t idx, + /* First remove 'lrp' from the array of ports. This is what will + * actually cause the logical port to be deleted when the transaction is + * sent to the database server (due to garbage collection). */ +- struct nbrec_logical_router_port **new_ports +- = xmemdup(lr->ports, sizeof *new_ports * lr->n_ports); +- new_ports[idx] = new_ports[lr->n_ports - 1]; + nbrec_logical_router_verify_ports(lr); +- nbrec_logical_router_set_ports(lr, new_ports, lr->n_ports - 1); +- free(new_ports); ++ nbrec_logical_router_update_ports_delvalue(lr, lrp); + + /* Delete 'lrp' from the IDL. This won't have a real effect on + * the database server (the IDL will suppress it in fact) but it +@@ -5181,12 +5041,7 @@ nbctl_lrp_del(struct ctl_context *ctx) + ctx->error = error; + return; + } +- for (size_t i = 0; i < lr->n_ports; i++) { +- if (lr->ports[i] == lrp) { +- remove_lrp(ctx, i, lr, lrp); +- break; +- } +- } ++ remove_lrp(ctx, lr, lrp); + } + + /* Print a list of logical router ports. */ +@@ -5458,15 +5313,7 @@ nbctl_fwd_group_add(struct ctl_context *ctx) + nbrec_forwarding_group_set_liveness(fwd_group, true); + } + +- struct nbrec_forwarding_group **new_fwd_groups = +- xmalloc(sizeof(*new_fwd_groups) * (ls->n_forwarding_groups + 1)); +- memcpy(new_fwd_groups, ls->forwarding_groups, +- sizeof *new_fwd_groups * ls->n_forwarding_groups); +- new_fwd_groups[ls->n_forwarding_groups] = fwd_group; +- nbrec_logical_switch_set_forwarding_groups(ls, new_fwd_groups, +- (ls->n_forwarding_groups + 1)); +- free(new_fwd_groups); +- ++ nbrec_logical_switch_update_forwarding_groups_addvalue(ls, fwd_group); + } + + static void +@@ -5488,14 +5335,8 @@ nbctl_fwd_group_del(struct ctl_context *ctx) + + for (int i = 0; i < ls->n_forwarding_groups; ++i) { + if (!strcmp(ls->forwarding_groups[i]->name, fwd_group->name)) { +- struct nbrec_forwarding_group **new_fwd_groups = +- xmemdup(ls->forwarding_groups, +- sizeof *new_fwd_groups * ls->n_forwarding_groups); +- new_fwd_groups[i] = +- ls->forwarding_groups[ls->n_forwarding_groups - 1]; +- nbrec_logical_switch_set_forwarding_groups(ls, new_fwd_groups, +- (ls->n_forwarding_groups - 1)); +- free(new_fwd_groups); ++ nbrec_logical_switch_update_forwarding_groups_delvalue( ++ ls, ls->forwarding_groups[i]); + nbrec_forwarding_group_delete(fwd_group); + return; + } +@@ -6093,16 +5934,7 @@ cmd_ha_ch_grp_add_chassis(struct ctl_context *ctx) + nbrec_ha_chassis_set_priority(ha_chassis, priority); + + nbrec_ha_chassis_group_verify_ha_chassis(ha_ch_grp); +- +- struct nbrec_ha_chassis **new_ha_chs = +- xmalloc(sizeof *new_ha_chs * (ha_ch_grp->n_ha_chassis + 1)); +- nullable_memcpy(new_ha_chs, ha_ch_grp->ha_chassis, +- sizeof *new_ha_chs * ha_ch_grp->n_ha_chassis); +- new_ha_chs[ha_ch_grp->n_ha_chassis] = +- CONST_CAST(struct nbrec_ha_chassis *, ha_chassis); +- nbrec_ha_chassis_group_set_ha_chassis(ha_ch_grp, new_ha_chs, +- ha_ch_grp->n_ha_chassis + 1); +- free(new_ha_chs); ++ nbrec_ha_chassis_group_update_ha_chassis_addvalue(ha_ch_grp, ha_chassis); + } + + static void +@@ -6117,11 +5949,9 @@ cmd_ha_ch_grp_remove_chassis(struct ctl_context *ctx) + + const char *chassis_name = ctx->argv[2]; + struct nbrec_ha_chassis *ha_chassis = NULL; +- size_t idx = 0; + for (size_t i = 0; i < ha_ch_grp->n_ha_chassis; i++) { + if (!strcmp(ha_ch_grp->ha_chassis[i]->chassis_name, chassis_name)) { + ha_chassis = ha_ch_grp->ha_chassis[i]; +- idx = i; + break; + } + } +@@ -6132,14 +5962,8 @@ cmd_ha_ch_grp_remove_chassis(struct ctl_context *ctx) + return; + } + +- struct nbrec_ha_chassis **new_ha_ch +- = xmemdup(ha_ch_grp->ha_chassis, +- sizeof *new_ha_ch * ha_ch_grp->n_ha_chassis); +- new_ha_ch[idx] = new_ha_ch[ha_ch_grp->n_ha_chassis - 1]; + nbrec_ha_chassis_group_verify_ha_chassis(ha_ch_grp); +- nbrec_ha_chassis_group_set_ha_chassis(ha_ch_grp, new_ha_ch, +- ha_ch_grp->n_ha_chassis - 1); +- free(new_ha_ch); ++ nbrec_ha_chassis_group_update_ha_chassis_delvalue(ha_ch_grp, ha_chassis); + nbrec_ha_chassis_delete(ha_chassis); + } + +-- +2.28.0 + diff --git a/SOURCES/0004-nbctl-Remove-column-verification-for-partial-updates.patch b/SOURCES/0004-nbctl-Remove-column-verification-for-partial-updates.patch new file mode 100644 index 0000000..1231213 --- /dev/null +++ b/SOURCES/0004-nbctl-Remove-column-verification-for-partial-updates.patch @@ -0,0 +1,272 @@ +From 63807420f208364d5143dfc9246f9777e6ae934f Mon Sep 17 00:00:00 2001 +From: Ilya Maximets +Date: Fri, 11 Dec 2020 11:59:17 +0100 +Subject: [PATCH 4/7] nbctl: Remove column verification for partial updates. + +Since this is not a read-modify-write sequence, it's not strictly +necessary to verify that current value is exactly same while performing +mutations. Verification removal will allow to significantly reduce +size of ovsdb transaction, because it will no longer contain all the +data from the mutated column. + +Before this change, addition of 1 new switch port to a logical switch +with 1000 ports looked like this: + + {transact, + where: _uuid == 'logical switch row uuid' + op : wait + until: == + rows : ports ['< list of current 1000 ports >'] + + < create new lsp in Logical_Switch_Port table > + + where: _uuid == 'logical switch row uuid' + op : mutate + mutations : ports insert ['< 1 uuid of a new lsp >'] + } + +After: + + {transact, + < create new lsp in Logical_Switch_Port table > + + where: _uuid == 'logical switch row uuid' + op : mutate + mutations : ports insert ['< 1 uuid of a new lsp >'] + } + +This should relieve some pressure from the ovsdb-server since it will +not need to parse and execute this huge 'wait' operation. + +Signed-off-by: Ilya Maximets +Signed-off-by: Numan Siddique +--- + utilities/ovn-nbctl.c | 33 --------------------------------- + 1 file changed, 33 deletions(-) + +diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c +index 7c4dce12a..835161f25 100644 +--- a/utilities/ovn-nbctl.c ++++ b/utilities/ovn-nbctl.c +@@ -1511,7 +1511,6 @@ nbctl_lsp_add(struct ctl_context *ctx) + } + + /* Insert the logical port into the logical switch. */ +- nbrec_logical_switch_verify_ports(ls); + nbrec_logical_switch_update_ports_addvalue(ls, lsp); + + /* Updating runtime cache. */ +@@ -1532,7 +1531,6 @@ remove_lsp(struct ctl_context *ctx, + /* First remove 'lsp' from the array of ports. This is what will + * actually cause the logical port to be deleted when the transaction is + * sent to the database server (due to garbage collection). */ +- nbrec_logical_switch_verify_ports(ls); + nbrec_logical_switch_update_ports_delvalue(ls, lsp); + + /* Delete 'lsp' from the IDL. This won't have a real effect on the +@@ -2356,10 +2354,8 @@ nbctl_acl_add(struct ctl_context *ctx) + + /* Insert the acl into the logical switch/port group. */ + if (pg) { +- nbrec_port_group_verify_acls(pg); + nbrec_port_group_update_acls_addvalue(pg, acl); + } else { +- nbrec_logical_switch_verify_acls(ls); + nbrec_logical_switch_update_acls_addvalue(ls, acl); + } + } +@@ -2401,12 +2397,6 @@ nbctl_acl_del(struct ctl_context *ctx) + /* If priority and match are not specified, delete all ACLs with the + * specified direction. */ + if (ctx->argc == 3) { +- if (pg) { +- nbrec_port_group_verify_acls(pg); +- } else { +- nbrec_logical_switch_verify_acls(ls); +- } +- + for (size_t i = 0; i < n_acls; i++) { + if (!strcmp(direction, acls[i]->direction)) { + if (pg) { +@@ -2438,10 +2428,8 @@ nbctl_acl_del(struct ctl_context *ctx) + if (priority == acl->priority && !strcmp(ctx->argv[4], acl->match) && + !strcmp(direction, acl->direction)) { + if (pg) { +- nbrec_port_group_verify_acls(pg); + nbrec_port_group_update_acls_delvalue(pg, acl); + } else { +- nbrec_logical_switch_verify_acls(ls); + nbrec_logical_switch_update_acls_delvalue(ls, acl); + } + return; +@@ -2596,7 +2584,6 @@ nbctl_qos_add(struct ctl_context *ctx) + } + + /* Insert the qos rule the logical switch. */ +- nbrec_logical_switch_verify_qos_rules(ls); + nbrec_logical_switch_update_qos_rules_addvalue(ls, qos); + } + +@@ -2636,7 +2623,6 @@ nbctl_qos_del(struct ctl_context *ctx) + if (ctx->argc == 3) { + size_t i; + +- nbrec_logical_switch_verify_qos_rules(ls); + if (qos_rule_uuid) { + for (i = 0; i < ls->n_qos_rules; i++) { + if (uuid_equals(qos_rule_uuid, +@@ -2686,7 +2672,6 @@ nbctl_qos_del(struct ctl_context *ctx) + + if (priority == qos->priority && !strcmp(ctx->argv[4], qos->match) && + !strcmp(direction, qos->direction)) { +- nbrec_logical_switch_verify_qos_rules(ls); + nbrec_logical_switch_update_qos_rules_delvalue(ls, qos); + return; + } +@@ -3149,7 +3134,6 @@ nbctl_lr_lb_add(struct ctl_context *ctx) + } + + /* Insert the load balancer into the logical router. */ +- nbrec_logical_router_verify_load_balancer(lr); + nbrec_logical_router_update_load_balancer_addvalue(lr, new_lb); + } + +@@ -3183,7 +3167,6 @@ nbctl_lr_lb_del(struct ctl_context *ctx) + + if (uuid_equals(&del_lb->header_.uuid, &lb->header_.uuid)) { + /* Remove the matching rule. */ +- nbrec_logical_router_verify_load_balancer(lr); + nbrec_logical_router_update_load_balancer_delvalue(lr, lb); + return; + } +@@ -3258,7 +3241,6 @@ nbctl_ls_lb_add(struct ctl_context *ctx) + } + + /* Insert the load balancer into the logical switch. */ +- nbrec_logical_switch_verify_load_balancer(ls); + nbrec_logical_switch_update_load_balancer_addvalue(ls, new_lb); + } + +@@ -3292,7 +3274,6 @@ nbctl_ls_lb_del(struct ctl_context *ctx) + + if (uuid_equals(&del_lb->header_.uuid, &lb->header_.uuid)) { + /* Remove the matching rule. */ +- nbrec_logical_switch_verify_load_balancer(ls); + nbrec_logical_switch_update_load_balancer_delvalue(ls, lb); + return; + } +@@ -3721,7 +3702,6 @@ nbctl_lr_policy_add(struct ctl_context *ctx) + nbrec_logical_router_policy_set_options(policy, &options); + smap_destroy(&options); + +- nbrec_logical_router_verify_policies(lr); + nbrec_logical_router_update_policies_addvalue(lr, policy); + if (next_hop != NULL) { + free(next_hop); +@@ -3761,7 +3741,6 @@ nbctl_lr_policy_del(struct ctl_context *ctx) + if (ctx->argc == 3) { + size_t i; + +- nbrec_logical_router_verify_policies(lr); + if (lr_policy_uuid) { + for (i = 0; i < lr->n_policies; i++) { + if (uuid_equals(lr_policy_uuid, +@@ -3796,7 +3775,6 @@ nbctl_lr_policy_del(struct ctl_context *ctx) + struct nbrec_logical_router_policy *routing_policy = lr->policies[i]; + if (priority == routing_policy->priority && + !strcmp(ctx->argv[3], routing_policy->match)) { +- nbrec_logical_router_verify_policies(lr); + nbrec_logical_router_update_policies_delvalue(lr, routing_policy); + return; + } +@@ -3996,7 +3974,6 @@ nbctl_lr_route_add(struct ctl_context *ctx) + nbrec_logical_router_static_route_set_options(route, &options); + } + +- nbrec_logical_router_verify_static_routes(lr); + nbrec_logical_router_update_static_routes_addvalue(lr, route); + + cleanup: +@@ -4055,7 +4032,6 @@ nbctl_lr_route_del(struct ctl_context *ctx) + } + + size_t n_removed = 0; +- nbrec_logical_router_verify_static_routes(lr); + for (size_t i = 0; i < lr->n_static_routes; i++) { + /* Compare route policy, if specified. */ + if (policy) { +@@ -4393,7 +4369,6 @@ nbctl_lr_nat_add(struct ctl_context *ctx) + smap_destroy(&nat_options); + + /* Insert the NAT into the logical router. */ +- nbrec_logical_router_verify_nat(lr); + nbrec_logical_router_update_nat_addvalue(lr, nat); + + cleanup: +@@ -4430,7 +4405,6 @@ nbctl_lr_nat_del(struct ctl_context *ctx) + + if (ctx->argc == 3) { + /*Deletes all NATs with the specified type. */ +- nbrec_logical_router_verify_nat(lr); + for (size_t i = 0; i < lr->n_nat; i++) { + if (!strcmp(nat_type, lr->nat[i]->type)) { + nbrec_logical_router_update_nat_delvalue(lr, lr->nat[i]); +@@ -4457,7 +4431,6 @@ nbctl_lr_nat_del(struct ctl_context *ctx) + continue; + } + if (!strcmp(nat_type, nat->type) && !strcmp(nat_ip, old_ip)) { +- nbrec_logical_router_verify_nat(lr); + nbrec_logical_router_update_nat_delvalue(lr, nat); + should_return = true; + } +@@ -4736,7 +4709,6 @@ nbctl_lrp_set_gateway_chassis(struct ctl_context *ctx) + nbrec_gateway_chassis_set_priority(gc, priority); + + /* Insert the logical gateway chassis into the logical router port. */ +- nbrec_logical_router_port_verify_gateway_chassis(lrp); + nbrec_logical_router_port_update_gateway_chassis_addvalue(lrp, gc); + free(gc_name); + } +@@ -4754,7 +4726,6 @@ remove_gc(const struct nbrec_logical_router_port *lrp, size_t idx) + * will actually cause the gateway chassis to be deleted when the + * transaction is sent to the database server (due to garbage + * collection). */ +- nbrec_logical_router_port_verify_gateway_chassis(lrp); + nbrec_logical_router_port_update_gateway_chassis_delvalue(lrp, gc); + } + +@@ -4987,7 +4958,6 @@ nbctl_lrp_add(struct ctl_context *ctx) + } + + /* Insert the logical port into the logical router. */ +- nbrec_logical_router_verify_ports(lr); + nbrec_logical_router_update_ports_addvalue(lr, lrp); + + /* Updating runtime cache. */ +@@ -5008,7 +4978,6 @@ remove_lrp(struct ctl_context *ctx, + /* First remove 'lrp' from the array of ports. This is what will + * actually cause the logical port to be deleted when the transaction is + * sent to the database server (due to garbage collection). */ +- nbrec_logical_router_verify_ports(lr); + nbrec_logical_router_update_ports_delvalue(lr, lrp); + + /* Delete 'lrp' from the IDL. This won't have a real effect on +@@ -5933,7 +5902,6 @@ cmd_ha_ch_grp_add_chassis(struct ctl_context *ctx) + nbrec_ha_chassis_set_chassis_name(ha_chassis, chassis_name); + nbrec_ha_chassis_set_priority(ha_chassis, priority); + +- nbrec_ha_chassis_group_verify_ha_chassis(ha_ch_grp); + nbrec_ha_chassis_group_update_ha_chassis_addvalue(ha_ch_grp, ha_chassis); + } + +@@ -5962,7 +5930,6 @@ cmd_ha_ch_grp_remove_chassis(struct ctl_context *ctx) + return; + } + +- nbrec_ha_chassis_group_verify_ha_chassis(ha_ch_grp); + nbrec_ha_chassis_group_update_ha_chassis_delvalue(ha_ch_grp, ha_chassis); + nbrec_ha_chassis_delete(ha_chassis); + } +-- +2.28.0 + diff --git a/SOURCES/0005-northd-Add-ECMP-support-to-router-policies.patch b/SOURCES/0005-northd-Add-ECMP-support-to-router-policies.patch new file mode 100644 index 0000000..49bb49b --- /dev/null +++ b/SOURCES/0005-northd-Add-ECMP-support-to-router-policies.patch @@ -0,0 +1,808 @@ +From c41499498db66fbe54155206a513e418d75f8d49 Mon Sep 17 00:00:00 2001 +From: Numan Siddique +Date: Wed, 2 Dec 2020 23:57:56 +0530 +Subject: [PATCH 5/7] northd: Add ECMP support to router policies. + +A user can add a policy now like: + +ovn-nbctl lr-policy-add 100 "ip4.src == 10.0.0.4" reroute 172.0.0.5,172.0.0.6 + +We do have ECMP support for logical router static routes, but since +policies are applied after the routing stage, ECMP support for +policies is desired by ovn-kubernetes. + +A new column 'nexthops' is added to the Logical_Router_Policy table +instead of modifying the existing column 'nexthop' to preserve +backward compatibility and avoid any upgrade issues. + +Change-Id: Ib5723d1de30f0ad86ee740bb4e3b593f1cca98eb +Requested-by: Alexander Constantinescu +Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1881826 +Acked-by: Mark Michelson +Signed-off-by: Numan Siddique +--- + northd/ovn-northd.8.xml | 80 +++++++++++++++++++-- + northd/ovn-northd.c | 148 ++++++++++++++++++++++++++++++++++---- + ovn-nb.ovsschema | 6 +- + ovn-nb.xml | 18 ++++- + tests/ovn-northd.at | 124 ++++++++++++++++++++++++++++++++ + tests/ovn.at | 16 ++--- + utilities/ovn-nbctl.8.xml | 12 ++-- + utilities/ovn-nbctl.c | 73 +++++++++++++++---- + 8 files changed, 429 insertions(+), 48 deletions(-) + +diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml +index d86f36ea6..1f0f71f34 100644 +--- a/northd/ovn-northd.8.xml ++++ b/northd/ovn-northd.8.xml +@@ -3041,14 +3041,36 @@ outport = P; + +
  • +

    +- If the policy action is reroute, then the logical +- flow is added with the following actions: ++ If the policy action is reroute with 2 or more nexthops ++ defined, then the logical flow is added with the following actions: ++

    ++ ++
    ++reg8[0..15] = GID;
    ++reg8[16..31] = select(1,..n);
    ++        
    ++ ++

    ++ where GID is the ECMP group id generated by ++ ovn-northd for this policy and n ++ is the number of nexthops. select action ++ selects one of the nexthop member id, stores it in the register ++ reg8[16..31] and advances the packet to the ++ next stage. ++

    ++
  • ++ ++
  • ++

    ++ If the policy action is reroute with just one nexhop, ++ then the logical flow is added with the following actions: +

    + +
    + [xx]reg0 = H;
    + eth.src = E;
    + outport = P;
    ++reg8[0..15] = 0;
    + flags.loopback = 1;
    + next;
    +         
    +@@ -3072,7 +3094,51 @@ next; +
  • + + +-

    Ingress Table 13: ARP/ND Resolution

    ++

    Ingress Table 13: ECMP handling for router policies

    ++

    ++ This table handles the ECMP for the router policies configured ++ with multiple nexthops. ++

    ++ ++
      ++
    • ++

      ++ A priority-150 flow is added to advance the packet to the next stage ++ if the ECMP group id register reg8[0..15] is 0. ++

      ++
    • ++ ++
    • ++

      ++ For each ECMP reroute router policy with multiple nexthops, ++ a priority-100 flow is added for each nexthop H ++ with the match reg8[0..15] == GID && ++ reg8[16..31] == M where GID ++ is the router policy group id generated by ovn-northd ++ and M is the member id of the nexthop H ++ generated by ovn-northd. The following actions are added ++ to the flow: ++

      ++ ++
      ++[xx]reg0 = H;
      ++eth.src = E;
      ++outport = P
      ++"flags.loopback = 1; "
      ++"next;"
      ++        
      ++ ++

      ++ where H is the nexthop defined in the ++ router policy, E is the ethernet address of the ++ logical router port from which the nexthop is ++ reachable and P is the logical router port from ++ which the nexthop is reachable. ++

      ++
    • ++
    ++ ++

    Ingress Table 14: ARP/ND Resolution

    + +

    + Any packet that reaches this table is an IP packet whose next-hop +@@ -3258,7 +3324,7 @@ next; + + + +-

    Ingress Table 14: Check packet length

    ++

    Ingress Table 15: Check packet length

    + +

    + For distributed logical routers with distributed gateway port configured +@@ -3288,7 +3354,7 @@ REGBIT_PKT_LARGER = check_pkt_larger(L); next; + and advances to the next table. +

    + +-

    Ingress Table 15: Handle larger packets

    ++

    Ingress Table 16: Handle larger packets

    + +

    + For distributed logical routers with distributed gateway port configured +@@ -3349,7 +3415,7 @@ icmp6 { + and advances to the next table. +

    + +-

    Ingress Table 16: Gateway Redirect

    ++

    Ingress Table 17: Gateway Redirect

    + +

    + For distributed logical routers where one of the logical router +@@ -3389,7 +3455,7 @@ icmp6 { + + + +-

    Ingress Table 17: ARP Request

    ++

    Ingress Table 18: ARP Request

    + +

    + In the common case where the Ethernet destination has been resolved, this +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index 478f1a339..dfd7d69d0 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -188,11 +188,12 @@ enum ovn_stage { + PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 10, "lr_in_ip_routing") \ + PIPELINE_STAGE(ROUTER, IN, IP_ROUTING_ECMP, 11, "lr_in_ip_routing_ecmp") \ + PIPELINE_STAGE(ROUTER, IN, POLICY, 12, "lr_in_policy") \ +- PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 13, "lr_in_arp_resolve") \ +- PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN , 14, "lr_in_chk_pkt_len") \ +- PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 15,"lr_in_larger_pkts") \ +- PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 16, "lr_in_gw_redirect") \ +- PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 17, "lr_in_arp_request") \ ++ PIPELINE_STAGE(ROUTER, IN, POLICY_ECMP, 13, "lr_in_policy_ecmp") \ ++ PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 14, "lr_in_arp_resolve") \ ++ PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN , 15, "lr_in_chk_pkt_len") \ ++ PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 16, "lr_in_larger_pkts") \ ++ PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 17, "lr_in_gw_redirect") \ ++ PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 18, "lr_in_arp_request") \ + \ + /* Logical router egress stages. */ \ + PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \ +@@ -7562,33 +7563,39 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od, + struct ds actions = DS_EMPTY_INITIALIZER; + + if (!strcmp(rule->action, "reroute")) { ++ ovs_assert(rule->n_nexthops <= 1); ++ ++ char *nexthop = ++ (rule->n_nexthops == 1 ? rule->nexthops[0] : rule->nexthop); + struct ovn_port *out_port = get_outport_for_routing_policy_nexthop( +- od, ports, rule->priority, rule->nexthop); ++ od, ports, rule->priority, nexthop); + if (!out_port) { + return; + } + +- const char *lrp_addr_s = find_lrp_member_ip(out_port, rule->nexthop); ++ const char *lrp_addr_s = find_lrp_member_ip(out_port, nexthop); + if (!lrp_addr_s) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "lrp_addr not found for routing policy " + " priority %"PRId64" nexthop %s", +- rule->priority, rule->nexthop); ++ rule->priority, nexthop); + return; + } + uint32_t pkt_mark = ovn_smap_get_uint(&rule->options, "pkt_mark", 0); + if (pkt_mark) { + ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark); + } +- bool is_ipv4 = strchr(rule->nexthop, '.') ? true : false; ++ ++ bool is_ipv4 = strchr(nexthop, '.') ? true : false; + ds_put_format(&actions, "%s = %s; " + "%s = %s; " + "eth.src = %s; " + "outport = %s; " + "flags.loopback = 1; " ++ REG_ECMP_GROUP_ID" = 0; " + "next;", + is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6, +- rule->nexthop, ++ nexthop, + is_ipv4 ? REG_SRC_IPV4 : REG_SRC_IPV6, + lrp_addr_s, + out_port->lrp_networks.ea_s, +@@ -7601,7 +7608,7 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od, + if (pkt_mark) { + ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark); + } +- ds_put_cstr(&actions, "next;"); ++ ds_put_cstr(&actions, REG_ECMP_GROUP_ID" = 0; next;"); + } + ds_put_format(&match, "%s", rule->match); + +@@ -7611,6 +7618,107 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od, + ds_destroy(&actions); + } + ++static void ++build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od, ++ struct hmap *ports, ++ const struct nbrec_logical_router_policy *rule, ++ uint16_t ecmp_group_id) ++{ ++ ovs_assert(rule->n_nexthops > 1); ++ ++ bool nexthops_is_ipv4 = true; ++ ++ /* Check that all the nexthops belong to the same addr family before ++ * adding logical flows. */ ++ for (uint16_t i = 0; i < rule->n_nexthops; i++) { ++ bool is_ipv4 = strchr(rule->nexthops[i], '.') ? true : false; ++ ++ if (i == 0) { ++ nexthops_is_ipv4 = is_ipv4; ++ } ++ ++ if (is_ipv4 != nexthops_is_ipv4) { ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); ++ VLOG_WARN_RL(&rl, "nexthop [%s] of the router policy with " ++ "the match [%s] do not belong to the same address " ++ "family as other next hops", ++ rule->nexthops[i], rule->match); ++ return; ++ } ++ } ++ ++ struct ds match = DS_EMPTY_INITIALIZER; ++ struct ds actions = DS_EMPTY_INITIALIZER; ++ ++ for (uint16_t i = 0; i < rule->n_nexthops; i++) { ++ struct ovn_port *out_port = get_outport_for_routing_policy_nexthop( ++ od, ports, rule->priority, rule->nexthops[i]); ++ if (!out_port) { ++ goto cleanup; ++ } ++ ++ const char *lrp_addr_s = ++ find_lrp_member_ip(out_port, rule->nexthops[i]); ++ if (!lrp_addr_s) { ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); ++ VLOG_WARN_RL(&rl, "lrp_addr not found for routing policy " ++ " priority %"PRId64" nexthop %s", ++ rule->priority, rule->nexthops[i]); ++ goto cleanup; ++ } ++ ++ ds_clear(&actions); ++ uint32_t pkt_mark = ovn_smap_get_uint(&rule->options, "pkt_mark", 0); ++ if (pkt_mark) { ++ ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark); ++ } ++ ++ bool is_ipv4 = strchr(rule->nexthops[i], '.') ? true : false; ++ ++ ds_put_format(&actions, "%s = %s; " ++ "%s = %s; " ++ "eth.src = %s; " ++ "outport = %s; " ++ "flags.loopback = 1; " ++ "next;", ++ is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6, ++ rule->nexthops[i], ++ is_ipv4 ? REG_SRC_IPV4 : REG_SRC_IPV6, ++ lrp_addr_s, ++ out_port->lrp_networks.ea_s, ++ out_port->json_key); ++ ++ ds_clear(&match); ++ ds_put_format(&match, REG_ECMP_GROUP_ID" == %"PRIu16" && " ++ REG_ECMP_MEMBER_ID" == %"PRIu16, ++ ecmp_group_id, i + 1); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY_ECMP, ++ 100, ds_cstr(&match), ++ ds_cstr(&actions), &rule->header_); ++ } ++ ++ ds_clear(&actions); ++ ds_put_format(&actions, "%s = %"PRIu16 ++ "; %s = select(", REG_ECMP_GROUP_ID, ecmp_group_id, ++ REG_ECMP_MEMBER_ID); ++ ++ for (uint16_t i = 0; i < rule->n_nexthops; i++) { ++ if (i > 0) { ++ ds_put_cstr(&actions, ", "); ++ } ++ ++ ds_put_format(&actions, "%"PRIu16, i + 1); ++ } ++ ds_put_cstr(&actions, ");"); ++ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY, ++ rule->priority, rule->match, ++ ds_cstr(&actions), &rule->header_); ++ ++cleanup: ++ ds_destroy(&match); ++ ds_destroy(&actions); ++} ++ + struct parsed_route { + struct ovs_list list_node; + struct in6_addr prefix; +@@ -10300,13 +10408,27 @@ build_ingress_policy_flows_for_lrouter( + if (od->nbr) { + /* This is a catch-all rule. It has the lowest priority (0) + * does a match-all("1") and pass-through (next) */ +- ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1", "next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1", ++ REG_ECMP_GROUP_ID" = 0; next;"); ++ ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY_ECMP, 150, ++ REG_ECMP_GROUP_ID" == 0", "next;"); + + /* Convert routing policies to flows. */ ++ uint16_t ecmp_group_id = 1; + for (int i = 0; i < od->nbr->n_policies; i++) { + const struct nbrec_logical_router_policy *rule + = od->nbr->policies[i]; +- build_routing_policy_flow(lflows, od, ports, rule, &rule->header_); ++ bool is_ecmp_reroute = ++ (!strcmp(rule->action, "reroute") && rule->n_nexthops > 1); ++ ++ if (is_ecmp_reroute) { ++ build_ecmp_routing_policy_flows(lflows, od, ports, rule, ++ ecmp_group_id); ++ ecmp_group_id++; ++ } else { ++ build_routing_policy_flow(lflows, od, ports, rule, ++ &rule->header_); ++ } + } + } + } +diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema +index af77dd138..b77a2308c 100644 +--- a/ovn-nb.ovsschema ++++ b/ovn-nb.ovsschema +@@ -1,7 +1,7 @@ + { + "name": "OVN_Northbound", +- "version": "5.29.0", +- "cksum": "328602112 27064", ++ "version": "5.30.0", ++ "cksum": "3273824429 27172", + "tables": { + "NB_Global": { + "columns": { +@@ -391,6 +391,8 @@ + "key": {"type": "string", + "enum": ["set", ["allow", "drop", "reroute"]]}}}, + "nexthop": {"type": {"key": "string", "min": 0, "max": 1}}, ++ "nexthops": {"type": { ++ "key": "string", "min": 0, "max": "unlimited"}}, + "options": { + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}, +diff --git a/ovn-nb.xml b/ovn-nb.xml +index e7a8d6833..0cf043790 100644 +--- a/ovn-nb.xml ++++ b/ovn-nb.xml +@@ -2723,18 +2723,34 @@ + + +

  • +- reroute: Reroute packet to . ++ reroute: Reroute packet to or ++ . +
  • + + + + ++

    ++ Note: This column is deprecated in favor of . ++

    +

    + Next-hop IP address for this route, which should be the IP + address of a connected router port or the IP address of a logical port. +

    +
    + ++ ++

    ++ Next-hop ECMP IP addresses for this route. Each IP in the list should ++ be the IP address of a connected router port or the IP address of a ++ logical port. ++

    ++ ++

    ++ One IP from the list is selected as next hop. ++

    ++
    ++ + +

    + Marks the packet with the value specified when the router policy +diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at +index 50a4cae76..ce6c44db4 100644 +--- a/tests/ovn-northd.at ++++ b/tests/ovn-northd.at +@@ -2198,3 +2198,127 @@ dnl Number of common flows should be the same. + check_row_count Logical_Flow ${n_flows_common} logical_dp_group=${dp_group_uuid} + + AT_CLEANUP ++ ++AT_SETUP([ovn -- Router policies - ECMP reroute]) ++AT_KEYWORDS([router policies ecmp reroute]) ++ovn_start ++ ++check ovn-nbctl ls-add sw0 ++check ovn-nbctl lsp-add sw0 sw0-port1 ++check ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3" ++ ++check ovn-nbctl ls-add sw1 ++check ovn-nbctl lsp-add sw1 sw1-port1 ++check ovn-nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3" ++ ++# Create a logical router and attach both logical switches ++check ovn-nbctl lr-add lr0 ++check ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64 ++check ovn-nbctl lsp-add sw0 sw0-lr0 ++check ovn-nbctl lsp-set-type sw0-lr0 router ++check ovn-nbctl lsp-set-addresses sw0-lr0 00:00:00:00:ff:01 ++check ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 ++ ++check ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64 ++check ovn-nbctl lsp-add sw1 sw1-lr0 ++check ovn-nbctl lsp-set-type sw1-lr0 router ++check ovn-nbctl lsp-set-addresses sw1-lr0 00:00:00:00:ff:02 ++check ovn-nbctl lsp-set-options sw1-lr0 router-port=lr-sw1 ++ ++check ovn-nbctl ls-add public ++check ovn-nbctl lrp-add lr0 lr0-public 00:00:20:20:12:13 172.168.0.100/24 ++check ovn-nbctl lsp-add public public-lr0 ++check ovn-nbctl lsp-set-type public-lr0 router ++check ovn-nbctl lsp-set-addresses public-lr0 router ++check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public ++ ++check ovn-nbctl --wait=sb lr-policy-add lr0 10 "ip4.src == 10.0.0.3" reroute 172.168.0.101,172.168.0.102 ++ ++ovn-sbctl dump-flows lr0 > lr0flows3 ++AT_CAPTURE_FILE([lr0flows3]) ++ ++AT_CHECK([grep "lr_in_policy" lr0flows3 | sort], [0], [dnl ++ table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = 0; next;) ++ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.3), action=(reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) ++ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ++ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ++ table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == 0), action=(next;) ++]) ++ ++check ovn-nbctl --wait=sb lr-policy-add lr0 10 "ip4.src == 10.0.0.4" reroute 172.168.0.101,172.168.0.102,172.168.0.103 ++ovn-sbctl dump-flows lr0 > lr0flows3 ++AT_CAPTURE_FILE([lr0flows3]) ++ ++AT_CHECK([grep "lr_in_policy" lr0flows3 | \ ++sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = /' | \ ++sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == /' | sort], [0], [dnl ++ table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = ; next;) ++ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.3), action=(reg8[[0..15]] = ; reg8[[16..31]] = select(1, 2);) ++ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.4), action=(reg8[[0..15]] = ; reg8[[16..31]] = select(1, 2, 3);) ++ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ++ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ++ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ++ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ++ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 3), action=(reg0 = 172.168.0.103; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ++ table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == ), action=(next;) ++]) ++ ++check ovn-nbctl --wait=sb lr-policy-add lr0 10 "ip4.src == 10.0.0.5" reroute 172.168.0.110 ++ovn-sbctl dump-flows lr0 > lr0flows3 ++AT_CAPTURE_FILE([lr0flows3]) ++ ++AT_CHECK([grep "lr_in_policy" lr0flows3 | \ ++sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = /' | \ ++sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == /' | sort], [0], [dnl ++ table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = ; next;) ++ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.3), action=(reg8[[0..15]] = ; reg8[[16..31]] = select(1, 2);) ++ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.4), action=(reg8[[0..15]] = ; reg8[[16..31]] = select(1, 2, 3);) ++ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.5), action=(reg0 = 172.168.0.110; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg8[[0..15]] = ; next;) ++ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ++ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ++ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ++ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ++ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 3), action=(reg0 = 172.168.0.103; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ++ table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == ), action=(next;) ++]) ++ ++check ovn-nbctl --wait=sb lr-policy-del lr0 10 "ip4.src == 10.0.0.3" ++ovn-sbctl dump-flows lr0 > lr0flows3 ++AT_CAPTURE_FILE([lr0flows3]) ++ ++AT_CHECK([grep "lr_in_policy" lr0flows3 | \ ++sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = /' | \ ++sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == /' | sort], [0], [dnl ++ table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = ; next;) ++ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.4), action=(reg8[[0..15]] = ; reg8[[16..31]] = select(1, 2, 3);) ++ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.5), action=(reg0 = 172.168.0.110; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg8[[0..15]] = ; next;) ++ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ++ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ++ table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 3), action=(reg0 = 172.168.0.103; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ++ table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == ), action=(next;) ++]) ++ ++check ovn-nbctl --wait=sb lr-policy-del lr0 10 "ip4.src == 10.0.0.4" ++ovn-sbctl dump-flows lr0 > lr0flows3 ++AT_CAPTURE_FILE([lr0flows3]) ++ ++AT_CHECK([grep "lr_in_policy" lr0flows3 | \ ++sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = /' | \ ++sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == /' | sort], [0], [dnl ++ table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = ; next;) ++ table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.5), action=(reg0 = 172.168.0.110; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg8[[0..15]] = ; next;) ++ table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == ), action=(next;) ++]) ++ ++check ovn-nbctl --wait=sb add logical_router_policy . nexthops "2000\:\:b" ++ovn-sbctl dump-flows lr0 > lr0flows3 ++AT_CAPTURE_FILE([lr0flows3]) ++ ++AT_CHECK([grep "lr_in_policy" lr0flows3 | \ ++sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = /' | \ ++sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == /' | sort], [0], [dnl ++ table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = ; next;) ++ table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == ), action=(next;) ++]) ++ ++AT_CLEANUP +diff --git a/tests/ovn.at b/tests/ovn.at +index 2e0bc9c53..66088a7f5 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -21156,31 +21156,31 @@ AT_CHECK([ + + AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl + table=12(lr_in_policy ), priority=1001 , dnl +-match=(ip6), action=(pkt.mark = 4294967295; next;) ++match=(ip6), action=(pkt.mark = 4294967295; reg8[[0..15]] = 0; next;) + ]) + + ovn-nbctl --wait=hv set logical_router_policy $pol5 options:pkt_mark=-1 + AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl + table=12(lr_in_policy ), priority=1001 , dnl +-match=(ip6), action=(next;) ++match=(ip6), action=(reg8[[0..15]] = 0; next;) + ]) + + ovn-nbctl --wait=hv set logical_router_policy $pol5 options:pkt_mark=2147483648 + AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl + table=12(lr_in_policy ), priority=1001 , dnl +-match=(ip6), action=(pkt.mark = 2147483648; next;) ++match=(ip6), action=(pkt.mark = 2147483648; reg8[[0..15]] = 0; next;) + ]) + + ovn-nbctl --wait=hv set logical_router_policy $pol5 options:pkt_mark=foo + AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl + table=12(lr_in_policy ), priority=1001 , dnl +-match=(ip6), action=(next;) ++match=(ip6), action=(reg8[[0..15]] = 0; next;) + ]) + + ovn-nbctl --wait=hv set logical_router_policy $pol5 options:pkt_mark=4294967296 + AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl + table=12(lr_in_policy ), priority=1001 , dnl +-match=(ip6), action=(next;) ++match=(ip6), action=(reg8[[0..15]] = 0; next;) + ]) + + OVN_CLEANUP([hv1]) +@@ -21759,7 +21759,7 @@ AT_CHECK([ + grep "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" -c) + ]) + AT_CHECK([ +- test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=21 | \ ++ test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=22 | \ + grep "priority=200" | \ + grep "actions=move:NXM_NX_CT_LABEL\\[[32..79\\]]->NXM_OF_ETH_DST\\[[\\]]" -c) + ]) +@@ -21770,7 +21770,7 @@ AT_CHECK([ + grep "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" -c) + ]) + AT_CHECK([ +- test 0 -eq $(as hv2 ovs-ofctl dump-flows br-int table=21 | \ ++ test 0 -eq $(as hv2 ovs-ofctl dump-flows br-int table=22 | \ + grep "priority=200" | \ + grep "actions=move:NXM_NX_CT_LABEL\\[[32..79\\]]->NXM_OF_ETH_DST\\[[\\]]" -c) + ]) +@@ -22208,7 +22208,7 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int | grep "actions=controller" | grep + ]) + + # The packet should've been dropped in the lr_in_arp_resolve stage. +-AT_CHECK([as hv1 ovs-ofctl dump-flows br-int | grep -E "table=21, n_packets=1,.* priority=1,ip,metadata=0x${sw_key},nw_dst=10.0.1.1 actions=drop" -c], [0], [dnl ++AT_CHECK([as hv1 ovs-ofctl dump-flows br-int | grep -E "table=22, n_packets=1,.* priority=1,ip,metadata=0x${sw_key},nw_dst=10.0.1.1 actions=drop" -c], [0], [dnl + 1 + ]) + +diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml +index e5a35f307..e6fec9980 100644 +--- a/utilities/ovn-nbctl.8.xml ++++ b/utilities/ovn-nbctl.8.xml +@@ -739,7 +739,7 @@ +

    +
    [--may-exist]lr-policy-add + router priority match +- action [nexthop] ++ action [nexthop[,nexthop,...]] + [options key=value]]
    +
    +

    +@@ -748,10 +748,12 @@ + are similar to OVN ACLs, but exist on the logical-router. Reroute + policies are needed for service-insertion and service-chaining. + nexthop is an optional parameter. It needs to be provided +- only when action is reroute. A policy is +- uniquely identified by priority and match. +- Multiple policies can have the same priority. +- options sets the router policy options as key-value pair. ++ only when action is reroute. Multiple ++ nexthops can be specified for ECMP routing. ++ A policy is uniquely identified by priority and ++ match. Multiple policies can have the same ++ priority. options sets the router policy ++ options as key-value pair. + The supported option is : pkt_mark. +

    + +diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c +index 835161f25..94e7eedeb 100644 +--- a/utilities/ovn-nbctl.c ++++ b/utilities/ovn-nbctl.c +@@ -766,7 +766,7 @@ Route commands:\n\ + lr-route-list ROUTER print routes for ROUTER\n\ + \n\ + Policy commands:\n\ +- lr-policy-add ROUTER PRIORITY MATCH ACTION [NEXTHOP] \ ++ lr-policy-add ROUTER PRIORITY MATCH ACTION [NEXTHOP,[NEXTHOP,...]] \ + [OPTIONS KEY=VALUE ...] \n\ + add a policy to router\n\ + lr-policy-del ROUTER [{PRIORITY | UUID} [MATCH]]\n\ +@@ -3634,7 +3634,8 @@ nbctl_lr_policy_add(struct ctl_context *ctx) + return; + } + const char *action = ctx->argv[4]; +- char *next_hop = NULL; ++ size_t n_nexthops = 0; ++ char **nexthops = NULL; + + bool reroute = false; + /* Validate action. */ +@@ -3654,7 +3655,8 @@ nbctl_lr_policy_add(struct ctl_context *ctx) + /* Check if same routing policy already exists. + * A policy is uniquely identified by priority and match */ + bool may_exist = !!shash_find(&ctx->options, "--may-exist"); +- for (int i = 0; i < lr->n_policies; i++) { ++ size_t i; ++ for (i = 0; i < lr->n_policies; i++) { + const struct nbrec_logical_router_policy *policy = lr->policies[i]; + if (policy->priority == priority && + !strcmp(policy->match, ctx->argv[3])) { +@@ -3665,12 +3667,53 @@ nbctl_lr_policy_add(struct ctl_context *ctx) + return; + } + } ++ + if (reroute) { +- next_hop = normalize_prefix_str(ctx->argv[5]); +- if (!next_hop) { +- ctl_error(ctx, "bad next hop argument: %s", ctx->argv[5]); +- return; ++ char *nexthops_arg = xstrdup(ctx->argv[5]); ++ char *save_ptr, *next_hop, *token; ++ ++ n_nexthops = 0; ++ size_t n_allocs = 0; ++ ++ bool nexthops_is_ipv4 = true; ++ for (token = strtok_r(nexthops_arg, ",", &save_ptr); ++ token != NULL; token = strtok_r(NULL, ",", &save_ptr)) { ++ next_hop = normalize_addr_str(token); ++ ++ if (!next_hop) { ++ ctl_error(ctx, "bad next hop argument: %s", ctx->argv[5]); ++ free(nexthops_arg); ++ for (i = 0; i < n_nexthops; i++) { ++ free(nexthops[i]); ++ } ++ free(nexthops); ++ return; ++ } ++ if (n_nexthops == n_allocs) { ++ nexthops = x2nrealloc(nexthops, &n_allocs, sizeof *nexthops); ++ } ++ ++ bool is_ipv4 = strchr(next_hop, '.') ? true : false; ++ if (n_nexthops == 0) { ++ nexthops_is_ipv4 = is_ipv4; ++ } ++ ++ if (is_ipv4 != nexthops_is_ipv4) { ++ ctl_error(ctx, "bad next hops argument, not in the same " ++ "addr family : %s", ctx->argv[5]); ++ free(nexthops_arg); ++ free(next_hop); ++ for (i = 0; i < n_nexthops; i++) { ++ free(nexthops[i]); ++ } ++ free(nexthops); ++ return; ++ } ++ nexthops[n_nexthops] = next_hop; ++ n_nexthops++; + } ++ ++ free(nexthops_arg); + } + + struct nbrec_logical_router_policy *policy; +@@ -3679,12 +3722,13 @@ nbctl_lr_policy_add(struct ctl_context *ctx) + nbrec_logical_router_policy_set_match(policy, ctx->argv[3]); + nbrec_logical_router_policy_set_action(policy, action); + if (reroute) { +- nbrec_logical_router_policy_set_nexthop(policy, next_hop); ++ nbrec_logical_router_policy_set_nexthops( ++ policy, (const char **)nexthops, n_nexthops); + } + + /* Parse the options. */ + struct smap options = SMAP_INITIALIZER(&options); +- for (size_t i = reroute ? 6 : 5; i < ctx->argc; i++) { ++ for (i = reroute ? 6 : 5; i < ctx->argc; i++) { + char *key, *value; + value = xstrdup(ctx->argv[i]); + key = strsep(&value, "="); +@@ -3694,7 +3738,10 @@ nbctl_lr_policy_add(struct ctl_context *ctx) + ctl_error(ctx, "No value specified for the option : %s", key); + smap_destroy(&options); + free(key); +- free(next_hop); ++ for (i = 0; i < n_nexthops; i++) { ++ free(nexthops[i]); ++ } ++ free(nexthops); + return; + } + free(key); +@@ -3703,9 +3750,11 @@ nbctl_lr_policy_add(struct ctl_context *ctx) + smap_destroy(&options); + + nbrec_logical_router_update_policies_addvalue(lr, policy); +- if (next_hop != NULL) { +- free(next_hop); ++ ++ for (i = 0; i < n_nexthops; i++) { ++ free(nexthops[i]); + } ++ free(nexthops); + } + + static void +-- +2.28.0 + diff --git a/SOURCES/0006-osx-Fix-compilation-error.patch b/SOURCES/0006-osx-Fix-compilation-error.patch new file mode 100644 index 0000000..0de35f6 --- /dev/null +++ b/SOURCES/0006-osx-Fix-compilation-error.patch @@ -0,0 +1,69 @@ +From befea4cc873c25574a62ead84cc4413fcbf23619 Mon Sep 17 00:00:00 2001 +From: Numan Siddique +Date: Tue, 15 Dec 2020 17:30:17 +0530 +Subject: [PATCH 6/7] osx: Fix compilation error. + +The commit "northd: Add ECMP support to router policies." introduced +compilaton error on osx platform. This patch fixes the issue. + +The errors are: + +---- +northd/ovn-northd.c:7697:38: error: format specifies type 'unsigned short' +but the argument has type 'int' [-Werror,-Wformat] + ecmp_group_id, i + 1); + ^~~~~ +northd/ovn-northd.c:7713:44: error: format specifies type 'unsigned short' +but the argument has type 'int' [-Werror,-Wformat] + ds_put_format(&actions, "%"PRIu16, i + 1); + ~~~~~~~~ ^~~~~ +---- + +Fixes: 35b00c7e7990("northd: Add ECMP support to router policies.") +Suggested-by: Dumitru Ceara +Acked-by: Dumitru Ceara +Signed-off-by: Numan Siddique +--- + northd/ovn-northd.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c +index dfd7d69d0..b377dffa1 100644 +--- a/northd/ovn-northd.c ++++ b/northd/ovn-northd.c +@@ -7650,7 +7650,7 @@ build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od, + struct ds match = DS_EMPTY_INITIALIZER; + struct ds actions = DS_EMPTY_INITIALIZER; + +- for (uint16_t i = 0; i < rule->n_nexthops; i++) { ++ for (size_t i = 0; i < rule->n_nexthops; i++) { + struct ovn_port *out_port = get_outport_for_routing_policy_nexthop( + od, ports, rule->priority, rule->nexthops[i]); + if (!out_port) { +@@ -7690,7 +7690,7 @@ build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od, + + ds_clear(&match); + ds_put_format(&match, REG_ECMP_GROUP_ID" == %"PRIu16" && " +- REG_ECMP_MEMBER_ID" == %"PRIu16, ++ REG_ECMP_MEMBER_ID" == %"PRIuSIZE, + ecmp_group_id, i + 1); + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY_ECMP, + 100, ds_cstr(&match), +@@ -7702,12 +7702,12 @@ build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od, + "; %s = select(", REG_ECMP_GROUP_ID, ecmp_group_id, + REG_ECMP_MEMBER_ID); + +- for (uint16_t i = 0; i < rule->n_nexthops; i++) { ++ for (size_t i = 0; i < rule->n_nexthops; i++) { + if (i > 0) { + ds_put_cstr(&actions, ", "); + } + +- ds_put_format(&actions, "%"PRIu16, i + 1); ++ ds_put_format(&actions, "%"PRIuSIZE, i + 1); + } + ds_put_cstr(&actions, ");"); + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY, +-- +2.28.0 + diff --git a/SOURCES/0007-tests-Make-ovn-ovn-controller-incremental-processing.patch b/SOURCES/0007-tests-Make-ovn-ovn-controller-incremental-processing.patch new file mode 100644 index 0000000..eccf032 --- /dev/null +++ b/SOURCES/0007-tests-Make-ovn-ovn-controller-incremental-processing.patch @@ -0,0 +1,179 @@ +From df617604c7b1f366601bcec6fd0c752f8a52977c Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Wed, 16 Dec 2020 13:59:02 +0100 +Subject: [PATCH 7/7] tests: Make "ovn -- ovn-controller incremental + processing" more reliable. + +Relax the full recompute checks as changes to tunnel interfaces, e.g. due +to BFD state changes, are not processed incrementally and cause full +recomputes. On slower systems (like in CI) this can happen more often. + +Signed-off-by: Dumitru Ceara +Signed-off-by: Mark Michelson +Acked-by: Ilya Maximets +--- + tests/ovn-performance.at | 111 +++++++++++++++++++-------------------- + 1 file changed, 53 insertions(+), 58 deletions(-) + +diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at +index 6cc5b2174..e510c6cef 100644 +--- a/tests/ovn-performance.at ++++ b/tests/ovn-performance.at +@@ -232,37 +232,32 @@ AT_SETUP([ovn -- ovn-controller incremental processing]) + + ovn_start + net_add n1 +-for i in 1 2; do ++for i in `seq 1 5`; do + sim_add hv$i + as hv$i + ovs-vsctl add-br br-phys + ovn_attach n1 br-phys 192.168.0.$i +-done +- +-for i in 1 2 3; do +- sim_add gw$i +- as gw$i +- ovs-vsctl add-br br-phys +- ovs-vsctl add-br br-ex +- ovs-vsctl set open . external_ids:ovn-bridge-mappings="public:br-ex" +- j=$((i + 2)) +- ovn_attach n1 br-phys 192.168.0.$j +- ip link add vgw$i type dummy +- ovs-vsctl add-port br-ex vgw$i ++ if [[ $i -ge 3 ]] ; then ++ ovs-vsctl add-br br-ex ++ ovs-vsctl set open . external_ids:ovn-bridge-mappings="public:br-ex" ++ ip link add vgw$i type dummy ++ ovs-vsctl add-port br-ex vgw$i ++ fi + done + + # Wait for the tunnel ports to be created and up. + # Otherwise this may affect the lflow_run count. ++for i in `seq 1 5`; do ++ for j in `seq 1 5`; do ++ if [[ $i -ne $j ]] ; then ++ OVS_WAIT_UNTIL([ ++ test $(as hv$i ovs-vsctl list interface ovn-hv$j-0 | \ ++ grep -c tunnel_egress_iface_carrier=up) -eq 1 ++ ]) ++ fi ++ done ++done + +-OVS_WAIT_UNTIL([ +- test $(as hv1 ovs-vsctl list interface ovn-hv2-0 | \ +-grep tunnel_egress_iface_carrier=up | wc -l) -eq 1 +-]) +- +-OVS_WAIT_UNTIL([ +- test $(as hv2 ovs-vsctl list interface ovn-hv1-0 | \ +-grep tunnel_egress_iface_carrier=up | wc -l) -eq 1 +-]) + + # Add router lr1 + OVN_CONTROLLER_EXPECT_NO_HIT( +@@ -463,63 +458,63 @@ OVN_CONTROLLER_EXPECT_NO_HIT( + ) + + OVN_CONTROLLER_EXPECT_HIT_COND( +- [hv1 hv2 gw1 gw2 gw3], [lflow_run], [=0 =0 >0 =0 =0], +- [ovn-nbctl --wait=hv lrp-set-gateway-chassis lr1-public gw1 30 && ovn-nbctl --wait=hv sync] ++ [hv1 hv2 hv3 hv4 hv5], [lflow_run], [=0 =0 >0 =0 =0], ++ [ovn-nbctl --wait=hv lrp-set-gateway-chassis lr1-public hv3 30 && ovn-nbctl --wait=hv sync] + ) + +-# After this, BFD should be enabled from hv1 and hv2 to gw1. +-# So there should be lflow_run hits in hv1, hv2, gw1 and gw2 ++# After this, BFD should be enabled from hv1 and hv2 to hv3. ++# So there should be lflow_run hits in hv1, hv2, hv3 and hv4 + OVN_CONTROLLER_EXPECT_HIT_COND( +- [hv1 hv2 gw1 gw2 gw3], [lflow_run], [>0 >0 >0 >0 =0], +- [ovn-nbctl --wait=hv lrp-set-gateway-chassis lr1-public gw2 20 && ovn-nbctl --wait=hv sync] +-) +- +-OVN_CONTROLLER_EXPECT_HIT( +- [hv1 hv2 gw1 gw2 gw3], [lflow_run], +- [ovn-nbctl --wait=hv lrp-set-gateway-chassis lr1-public gw3 10 && ovn-nbctl --wait=hv sync] +-) +- +-# create QoS rule +-OVN_CONTROLLER_EXPECT_NO_HIT( +- [hv1 hv2 gw1 gw2 gw3], [lflow_run], +- [ovn-nbctl --wait=hv set Logical_Switch_Port ln-public options:qos_burst=1000] ++ [hv1 hv2 hv3 hv4 hv5], [lflow_run], [>0 >0 >0 >0 =0], ++ [ovn-nbctl --wait=hv lrp-set-gateway-chassis lr1-public hv4 20 && ovn-nbctl --wait=hv sync] + ) + + OVN_CONTROLLER_EXPECT_HIT( +- [gw1], [lflow_run], +- [as gw1 ovs-vsctl set interface vgw1 external-ids:ovn-egress-iface=true] ++ [hv1 hv2 hv3 hv4 hv5], [lflow_run], ++ [ovn-nbctl --wait=hv lrp-set-gateway-chassis lr1-public hv5 10 && ovn-nbctl --wait=hv sync] + ) + +-# Make gw2 master. There is remote possibility that full recompute +-# triggers for gw2 after it becomes master. Most of the time +-# there will be no recompute. +-ovn-nbctl --wait=hv lrp-set-gateway-chassis lr1-public gw2 40 +-gw2_ch=$(ovn-sbctl --bare --columns _uuid list chassis gw2) +-OVS_WAIT_UNTIL([ovn-sbctl find port_binding logical_port=cr-lr1-public chassis=$gw2_ch]) ++# Make hv4 master. There is remote possibility that full recompute ++# triggers for hv1-hv5 after hv4 becomes master because of updates to the ++# ovn-hv$i-0 interfaces. Most of the time there will be no recompute. ++ovn-nbctl --wait=hv lrp-set-gateway-chassis lr1-public hv4 40 ++hv4_ch=$(ovn-sbctl --bare --columns _uuid list chassis hv4) ++OVS_WAIT_UNTIL([ovn-sbctl find port_binding logical_port=cr-lr1-public chassis=$hv4_ch]) + + OVN_CONTROLLER_EXPECT_HIT_COND( +- [hv1 hv2 gw1 gw2 gw3], [lflow_run], [=0 =0 =0 >=0 =0], ++ [hv1 hv2 hv3 hv4 hv5], [lflow_run], [>=0 >=0 >=0 >=0 >=0], + [ovn-nbctl --wait=hv sync] + ) + +-# Delete gw2 from gateway chassis ++# Delete hv4 from gateway chassis + OVN_CONTROLLER_EXPECT_HIT( +- [hv1 hv2 gw1 gw2 gw3], [lflow_run], +- [ovn-nbctl --wait=hv lrp-del-gateway-chassis lr1-public gw2 && ovn-nbctl --wait=hv sync] ++ [hv1 hv2 hv3 hv4 hv5], [lflow_run], ++ [ovn-nbctl --wait=hv lrp-del-gateway-chassis lr1-public hv4 && ovn-nbctl --wait=hv sync] + ) + +-# Delete gw1 from gateway chassis +-# After this, the BFD should be disabled entirely as gw3 is the ++# Delete hv3 from gateway chassis ++# After this, the BFD should be disabled entirely as hv5 is the + # only gateway chassis. + OVN_CONTROLLER_EXPECT_HIT_COND( +- [hv1 hv2 gw1 gw2 gw3], [lflow_run], [>0 >0 >0 =0 >0], +- [ovn-nbctl --wait=hv lrp-del-gateway-chassis lr1-public gw1] ++ [hv1 hv2 hv3 hv4 hv5], [lflow_run], [>0 >0 >0 =0 >0], ++ [ovn-nbctl --wait=hv lrp-del-gateway-chassis lr1-public hv3] + ) + +-# Delete gw3 from gateway chassis. There should be no lflow_run. ++# Delete hv5 from gateway chassis. There should be no lflow_run. + OVN_CONTROLLER_EXPECT_NO_HIT( +- [hv1 hv2 gw1 gw2 gw3], [lflow_run], +- [ovn-nbctl --wait=hv lrp-del-gateway-chassis lr1-public gw3] ++ [hv1 hv2 hv3 hv4 hv5], [lflow_run], ++ [ovn-nbctl --wait=hv lrp-del-gateway-chassis lr1-public hv5] ++) ++ ++# create QoS rule ++OVN_CONTROLLER_EXPECT_NO_HIT( ++ [hv1 hv2 hv3 hv4 hv5], [lflow_run], ++ [ovn-nbctl --wait=hv set Logical_Switch_Port ln-public options:qos_burst=1000] ++) ++ ++OVN_CONTROLLER_EXPECT_HIT( ++ [hv3], [lflow_run], ++ [as hv3 ovs-vsctl set interface vgw3 external-ids:ovn-egress-iface=true] + ) + + for i in 1 2; do +-- +2.28.0 + diff --git a/SPECS/ovn2.13.spec b/SPECS/ovn2.13.spec index 97b0686..1d3e6c6 100644 --- a/SPECS/ovn2.13.spec +++ b/SPECS/ovn2.13.spec @@ -16,7 +16,7 @@ %define pkgver 2.13 %define pkgname ovn%{pkgver} -%define upstreamver 20.09 +%define upstreamver 20.12 #%%global commit0 7886ac9ed807d6ff942edde624a3f9331da7332a #%%global date 20200217 @@ -60,7 +60,7 @@ Summary: Open Virtual Network support Group: System Environment/Daemons URL: http://www.openvswitch.org/ Version: %{upstreamver}.0 -Release: 17%{?commit0:.%{date}git%{shortcommit0}}%{?dist} +Release: 1%{?commit0:.%{date}git%{shortcommit0}}%{?dist} Provides: openvswitch%{pkgver}-ovn-common = %{?epoch:%{epoch}:}%{version}-%{release} Obsoletes: openvswitch%{pkgver}-ovn-common < 2.11.0-1 @@ -78,7 +78,8 @@ Source: https://www.openvswitch.org/releases/ovn-%{version}.tar.gz %endif -%define ovsver %{pkgver}.0 +#%define ovsver %{pkgver}.0 +%define ovsver 2.14.90 %if 0%{?commit1:1} Source10: https://github.com/openvswitch/ovs/archive/%{commit1}.tar.gz#/openvswitch-%{shortcommit1}.tar.gz @@ -106,102 +107,19 @@ Source505: ppc_64-power8-linuxapp-gcc-config Source506: x86_64-native-linuxapp-gcc-config # ovn-patches - -# OVN (including OVS if required) backports (0 - 399) - -# Bug 1845109 -Patch001: 0001-ovn-nbctl-add-may-exist-if-exists-options-for-policy.patch - -# Bug 1886314 -Patch010: 0001-ovn-northd-Add-localnet-ports-to-Multicast_Groups-cr.patch - -# Bug 1865866 -Patch020: 0001-northd-properly-reconfigure-ipam-when-subnet-is-chan.patch - -# Bug 1871931 -Patch030: 0001-ofctrl.c-Fix-duplicated-flow-handling-in-I-P-while-m.patch -Patch031: 0002-ofctrl.c-Avoid-repeatedly-linking-an-installed-flow-.patch -Patch032: 0003-ofctrl.c-Only-merge-actions-for-conjunctive-flows.patch -Patch033: 0004-ofctrl.c-Do-not-change-flow-ordering-when-merging-op.patch -Patch034: 0005-ofctrl.c-Simplify-active-desired-flow-selection.patch -Patch035: 0006-ofctrl.c-Always-log-the-most-recent-flow-changes.patch -Patch036: 0007-ofctrl.c-Add-a-predictable-resolution-for-conflictin.patch - -# Bug 1826686 -Patch040: 0001-controller-IPv6-Prefix-Delegation-introduce-RENEW-RE.patch - -# Bug 1876990 -Patch050: 0001-northd-Use-enum-ovn_stage-for-the-table-value-in-the.patch -Patch051: 0002-ovn-trace-Don-t-assert-for-next-stage-ingress.patch -Patch052: 0003-actions-Add-a-new-OVN-action-reject.patch -Patch053: 0004-ovn-northd-Optimize-logical-flow-generation-for-reje.patch -Patch054: 0005-ovn-trace-Handle-IPv6-packets-for-tcp_reset-action.patch - -# Bug 1856898 -Patch060: 0001-ovn-northd-Handle-IPv6-addresses-with-prefixes-for-p.patch - -# Bug 1890803 -Patch070: 0001-ovn-detrace-Only-decode-br-int-OVS-interfaces.patch -Patch071: 0002-ovn-detrace-Improve-DB-connection-error-messages.patch - -# Bug 1765506 -Patch080: 0001-dhcp-add-iPXE-support-to-OVN.patch - -# Bug 1894478 -Patch090: 0001-pinctrl-Directly-update-MAC_Bindings-created-by-self.patch -Patch091: 0002-ovn-northd-Limit-self-originated-ARP-ND-broadcast-do.patch - -# Bug 1896671 -Patch100: 0001-northd-Don-t-poll-ovsdb-before-the-connection-is-ful.patch - -# Bug 1846018 -Patch110: 0001-Allow-VLAN-traffic-when-LS-vlan-passthru-true.patch - -# Bug 1888445 -Patch120: 0001-northd-Fix-lb_action-when-there-are-no-active-backen.patch - -# Bug 1833373 -Patch130: 0001-ovn-nbctl-ovn-sbctl-Add-convenient-names-for-more-ta.patch -Patch131: 0002-northd-Move-functions-from-ovn-northd.c-into-ovn-uti.patch -Patch132: 0003-tests-Introduce-new-testing-helpers.patch -Patch133: 0004-Add-new-table-Load_Balancer-in-Southbound-database.patch -Patch134: 0005-northd-Refactor-load-balancer-vip-parsing.patch -Patch135: 0006-controller-Add-load-balancer-hairpin-OF-flows.patch -Patch136: 0007-actions-Add-new-actions-chk_lb_hairpin-chk_lb_hairpi.patch -Patch137: 0008-northd-Make-use-of-new-hairpin-actions.patch -Patch138: 0009-ovn-detrace-Add-SB-Load-Balancer-cookier-handler.patch -Patch139: 0010-sbctl-Add-Load-Balancer-support-for-vflows-option.patch - -# Bug 1899936 and memory leak fixes. -Patch140: 0001-Provide-the-option-to-pin-ovn-controller-and-ovn-nor.patch -Patch141: 0002-controller-Allow-pinctrl-thread-to-handle-packet-ins.patch -Patch142: 0003-northd-Fix-leaks-of-strings-while-formatting-ecmp-fl.patch -Patch143: 0004-test-ovn-Fix-expression-leak.patch -Patch144: 0005-actions-Fix-leak-of-child-ports-in-fwd-group.patch -Patch145: 0006-actions-Fix-leak-of-select-group-members.patch -Patch146: 0007-ofctrl-Fix-leak-of-meter-mod-bands.patch -Patch147: 0008-pinctrl-Fix-leak-of-DNS-cache-records.patch -Patch148: 0009-ovn-controller-Fix-leak-of-pending-ct-zones.patch -Patch149: 0010-ovn-nbctl-Fix-error-leak-on-duplicated-switch-port.patch -Patch150: 0011-northd-Fix-leak-of-dynamic-string-for-fwd-group-port.patch -Patch151: 0012-actions-Fix-leak-of-dynamic-string-on-fwd-group-enco.patch -Patch152: 0013-ovn-nbctl-Fix-leak-of-IPs-while-configuring-NAT.patch -Patch153: 0014-ovn-nbctl-Fix-IP-leak-on-router-NAT-addition-failure.patch -Patch154: 0015-ovn-nbctl-Fix-IP-leak-on-failure-of-lr-policy-additi.patch -Patch155: 0016-ovn-nbctl-Fix-leak-of-array-of-new-policies.patch - -# Bug 1900484 -Patch160: 0001-Fix-OVN-update-issue-when-ovn-controller-is-updated-.patch +# OVN backports (0 - 799) + +# Bug 1883957 +# Bug 1881826 +Patch01: 0001-northd-add-reject-action-for-lb-with-no-backends.patch +Patch02: 0002-nbctl-Cache-to-which-switch-or-router-particular-por.patch +Patch03: 0003-nbctl-Use-partial-set-updates-instead-of-re-setting-.patch +Patch04: 0004-nbctl-Remove-column-verification-for-partial-updates.patch +Patch05: 0005-northd-Add-ECMP-support-to-router-policies.patch +Patch06: 0006-osx-Fix-compilation-error.patch +Patch07: 0007-tests-Make-ovn-ovn-controller-incremental-processing.patch # OpenvSwitch backports (800-) if required. -Patch800: 0001-Revert-ovsdb-idl-Avoid-sending-redundant-conditional.patch -Patch801: 0002-ovsdb-idl-Try-committing-the-pending-txn-in-ovsdb_id.patch - -# Bug 1808580 -Patch802: 0003-ovsdb-idl-Avoid-inconsistent-IDL-state-with-OVSDB_MO.patch - -# Bug 1829762 -Patch803: 0004-ovsdb-idl-Add-function-to-reset-min_index.patch # FIXME Sphinx is used to generate some manpages, unfortunately, on RHEL, it's # in the -optional repository and so we can't require it directly since RHV @@ -645,6 +563,31 @@ fi %{_unitdir}/ovn-controller-vtep.service %changelog +* Fri Dec 18 2020 Numan Siddique - 20.12.0-1 +- Rebase to OVN v20.12.0. +- Re-backport patches for #1883957 and #1881826 as there are not in v20.12.0. +- Use ovs sources from master commit - 252e1e576443("dpdk: Update to use DPDK v20.11."). + +* Tue Dec 15 2020 Numan Siddique - 20.09.0-23 +- Backport "northd: Add ECMP support to router policies." (#1881826) +- Backport "Add missing documentation for router policy and ecmp sym reply stage." (#1881826) + +* Mon Dec 14 2020 Lorenzo Bianconi - 20.09.0-22 +- Backport "northd: add reject action for lb with no backends" (#1883957) + +* Tue Dec 8 2020 Lorenzo Bianconi - 20.09.0-21 +- Backport "northd: Fix iteration over vip backends." (#1904489) + +* Tue Dec 1 2020 Dumitru Ceara - 20.09.0-20 +- Backport "pinctrl: Honor always_learn_from_arp_request for self created MAC_Bindings." (#1903199) + +* Mon Nov 30 2020 Mark Michelson - 20.09.0-19 +- Backport "Allow explicit setting of the SNAT zone on a gateway router" (#1892311) +- Backport "Clear port binding flows when datapath CT zone changes." + +* Thu Nov 26 2020 Dumitru Ceara - 20.09.0-18 +* Backport "pinctrl: Fix segfault seen when creating mac_binding for local GARPs." (#1901880) + * Mon Nov 23 2020 Numan Siddique - 20.09.0-17 - Backport "Fix OVN update issue when ovn-controller is updated first from 20.06 to 20.09. (#1900484)