From ddbdb15a4b6050d9c667ca2bc546a118e208a342 Mon Sep 17 00:00:00 2001 From: Han Zhou Date: Thu, 30 Jul 2020 23:18:58 -0700 Subject: [PATCH 22/22] Avoid nb_cfg update notification flooding nb_cfg as a mechanism to "ping" OVN control plane is very useful in many ways. However, the current implementation will trigger update notifications flooding in the whole control plane. Each HV updates to SB the nb_cfg number and all these updates are notified to all the other HVs, which is O(n^2). Although updates are batched in fewers notifications than n^2, it still generates significant load on SB DB and ovn-controllers. To solve this problem and make the mechanism more useful in large scale producation deployment, this patch separates the per HV *private* data (write only by the owning chassis and not interesting to any other HVs) from the Chassis table to a separate table, so that each HV can conditionally monitor and get updates only for its own record. Test result shows great improvement: In a test environment with 1200 sandbox HVs, and 12K ports created on 80 lswitches and 1 lrouter, do the sync test when the system is idle, with command: time ovn-nbctl --wait=hv sync Original result: real 0m13.724s user 0m0.295s sys 0m0.012s With this patch: real 0m3.255s user 0m0.248s sys 0m0.020s Also, regarding backwards compatibility note that the nb_cfg from the Chassis table is no longer updated. If any system is relying on this mechanism they should start using the nb_cfg from the Chassis_Private table from now on. Change-Id: I9be2449f3317ff6b91d9afc8f53a9caa8e14c062 Co-authored-by: Lucas Alvares Gomes Signed-off-by: Lucas Alvares Gomes Signed-off-by: Han Zhou Acked-by: Dumitru Ceara (cherry-picked from upstream master commit 4adc10f58127e45b5883f2e7cb1c702720b95043) --- controller/chassis.c | 30 ++++++++++++++++++++---- controller/chassis.h | 8 +++++-- controller/ovn-controller.c | 42 ++++++++++++++++++++++++++++----- lib/chassis-index.c | 26 +++++++++++++++++++++ lib/chassis-index.h | 6 +++++ northd/ovn-northd.c | 46 +++++++++++++++++++++++++++++++------ ovn-sb.ovsschema | 17 ++++++++++++-- ovn-sb.xml | 42 +++++++++++++++++++++++++++++---- tests/ovn-controller.at | 26 +++++++++++++++++++++ 9 files changed, 218 insertions(+), 25 deletions(-) diff --git a/controller/chassis.c b/controller/chassis.c index bdf3fb950..6ac591e02 100644 --- a/controller/chassis.c +++ b/controller/chassis.c @@ -621,14 +621,18 @@ chassis_update(const struct sbrec_chassis *chassis_rec, const struct sbrec_chassis * chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn, struct ovsdb_idl_index *sbrec_chassis_by_name, + struct ovsdb_idl_index *sbrec_chassis_private_by_name, const struct ovsrec_open_vswitch_table *ovs_table, const struct sbrec_chassis_table *chassis_table, const char *chassis_id, const struct ovsrec_bridge *br_int, - const struct sset *transport_zones) + const struct sset *transport_zones, + const struct sbrec_chassis_private **chassis_private) { struct ovs_chassis_cfg ovs_cfg; + *chassis_private = NULL; + /* Get the chassis config from the ovs table. */ ovs_chassis_cfg_init(&ovs_cfg); if (!chassis_parse_ovs_config(ovs_table, br_int, &ovs_cfg)) { @@ -655,6 +659,18 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn, !existed ? "registering" : "updating", chassis_id); } + + const struct sbrec_chassis_private *chassis_private_rec = + chassis_private_lookup_by_name(sbrec_chassis_private_by_name, + chassis_id); + if (!chassis_private_rec && ovnsb_idl_txn) { + chassis_private_rec = sbrec_chassis_private_insert(ovnsb_idl_txn); + sbrec_chassis_private_set_name(chassis_private_rec, + chassis_id); + sbrec_chassis_private_set_chassis(chassis_private_rec, + chassis_rec); + } + *chassis_private = chassis_private_rec; } ovs_chassis_cfg_destroy(&ovs_cfg); @@ -710,16 +726,22 @@ chassis_get_mac(const struct sbrec_chassis *chassis_rec, * required. */ bool chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn, - const struct sbrec_chassis *chassis_rec) + const struct sbrec_chassis *chassis_rec, + const struct sbrec_chassis_private *chassis_private_rec) { - if (!chassis_rec) { + if (!chassis_rec && !chassis_private_rec) { return true; } if (ovnsb_idl_txn) { ovsdb_idl_txn_add_comment(ovnsb_idl_txn, "ovn-controller: unregistering chassis '%s'", chassis_rec->name); - sbrec_chassis_delete(chassis_rec); + if (chassis_rec) { + sbrec_chassis_delete(chassis_rec); + } + if (chassis_private_rec) { + sbrec_chassis_private_delete(chassis_private_rec); + } } return false; } diff --git a/controller/chassis.h b/controller/chassis.h index 178d2957e..81055b403 100644 --- a/controller/chassis.h +++ b/controller/chassis.h @@ -17,6 +17,7 @@ #define OVN_CHASSIS_H 1 #include +#include "lib/ovn-sb-idl.h" struct ovsdb_idl; struct ovsdb_idl_index; @@ -33,12 +34,15 @@ void chassis_register_ovs_idl(struct ovsdb_idl *); const struct sbrec_chassis *chassis_run( struct ovsdb_idl_txn *ovnsb_idl_txn, struct ovsdb_idl_index *sbrec_chassis_by_name, + struct ovsdb_idl_index *sbrec_chassis_private_by_name, const struct ovsrec_open_vswitch_table *, const struct sbrec_chassis_table *, const char *chassis_id, const struct ovsrec_bridge *br_int, - const struct sset *transport_zones); + const struct sset *transport_zones, + const struct sbrec_chassis_private **chassis_private); bool chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn, - const struct sbrec_chassis *); + const struct sbrec_chassis *, + const struct sbrec_chassis_private *); bool chassis_get_mac(const struct sbrec_chassis *chassis, const char *bridge_mapping, struct eth_addr *chassis_mac); diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index 67b3cd989..933acf676 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -155,6 +155,7 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, struct ovsdb_idl_condition ce = OVSDB_IDL_CONDITION_INIT(&ce); struct ovsdb_idl_condition ip_mcast = OVSDB_IDL_CONDITION_INIT(&ip_mcast); struct ovsdb_idl_condition igmp = OVSDB_IDL_CONDITION_INIT(&igmp); + struct ovsdb_idl_condition chprv = OVSDB_IDL_CONDITION_INIT(&chprv); if (monitor_all) { ovsdb_idl_condition_add_clause_true(&pb); @@ -165,6 +166,7 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, ovsdb_idl_condition_add_clause_true(&ce); ovsdb_idl_condition_add_clause_true(&ip_mcast); ovsdb_idl_condition_add_clause_true(&igmp); + ovsdb_idl_condition_add_clause_true(&chprv); goto out; } @@ -196,7 +198,16 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, &chassis->header_.uuid); sbrec_igmp_group_add_clause_chassis(&igmp, OVSDB_F_EQ, &chassis->header_.uuid); + + /* Monitors Chassis_Private record for current chassis only */ + sbrec_chassis_private_add_clause_name(&chprv, OVSDB_F_EQ, + chassis->name); + } else { + /* During initialization, we monitor all records in Chassis_Private so + * that we don't try to recreate existing ones. */ + ovsdb_idl_condition_add_clause_true(&chprv); } + if (local_ifaces) { const char *name; SSET_FOR_EACH (name, local_ifaces) { @@ -229,6 +240,7 @@ out: sbrec_controller_event_set_condition(ovnsb_idl, &ce); sbrec_ip_multicast_set_condition(ovnsb_idl, &ip_mcast); sbrec_igmp_group_set_condition(ovnsb_idl, &igmp); + sbrec_chassis_private_set_condition(ovnsb_idl, &chprv); ovsdb_idl_condition_destroy(&pb); ovsdb_idl_condition_destroy(&lf); ovsdb_idl_condition_destroy(&mb); @@ -237,6 +249,7 @@ out: ovsdb_idl_condition_destroy(&ce); ovsdb_idl_condition_destroy(&ip_mcast); ovsdb_idl_condition_destroy(&igmp); + ovsdb_idl_condition_destroy(&chprv); } static const char * @@ -2090,6 +2103,8 @@ main(int argc, char *argv[]) struct ovsdb_idl_index *sbrec_chassis_by_name = chassis_index_create(ovnsb_idl_loop.idl); + struct ovsdb_idl_index *sbrec_chassis_private_by_name + = chassis_private_index_create(ovnsb_idl_loop.idl); struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath = mcast_group_index_create(ovnsb_idl_loop.idl); struct ovsdb_idl_index *sbrec_logical_flow_by_logical_datapath @@ -2118,7 +2133,8 @@ main(int argc, char *argv[]) = igmp_group_index_create(ovnsb_idl_loop.idl); ovsdb_idl_track_add_all(ovnsb_idl_loop.idl); - ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg); + ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, + &sbrec_chassis_private_col_nb_cfg); /* Omit the external_ids column of all the tables except for - * - DNS. pinctrl.c uses the external_ids column of DNS, @@ -2155,6 +2171,10 @@ main(int argc, char *argv[]) * other_config column so we no longer need to monitor it */ ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, &sbrec_chassis_col_external_ids); + /* Do not monitor Chassis_Private external_ids */ + ovsdb_idl_omit(ovnsb_idl_loop.idl, + &sbrec_chassis_private_col_external_ids); + update_sb_monitors(ovnsb_idl_loop.idl, NULL, NULL, NULL, false); stopwatch_create(CONTROLLER_LOOP_STOPWATCH_NAME, SW_MS); @@ -2361,10 +2381,13 @@ main(int argc, char *argv[]) process_br_int(ovs_idl_txn, bridge_table, ovs_table); const char *chassis_id = get_ovs_chassis_id(ovs_table); const struct sbrec_chassis *chassis = NULL; + const struct sbrec_chassis_private *chassis_private = NULL; if (chassis_id) { chassis = chassis_run(ovnsb_idl_txn, sbrec_chassis_by_name, + sbrec_chassis_private_by_name, ovs_table, chassis_table, chassis_id, - br_int, &transport_zones); + br_int, &transport_zones, + &chassis_private); } if (br_int) { @@ -2489,10 +2512,10 @@ main(int argc, char *argv[]) engine_set_force_recompute(false); } - if (ovnsb_idl_txn && chassis) { + if (ovnsb_idl_txn && chassis_private) { int64_t cur_cfg = ofctrl_get_cur_cfg(); - if (cur_cfg && cur_cfg != chassis->nb_cfg) { - sbrec_chassis_set_nb_cfg(chassis, cur_cfg); + if (cur_cfg && cur_cfg != chassis_private->nb_cfg) { + sbrec_chassis_private_set_nb_cfg(chassis_private, cur_cfg); } } @@ -2595,10 +2618,17 @@ main(int argc, char *argv[]) ? chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id) : NULL); + const struct sbrec_chassis_private *chassis_private + = (chassis_id + ? chassis_private_lookup_by_name( + sbrec_chassis_private_by_name, chassis_id) + : NULL); + /* Run all of the cleanup functions, even if one of them returns * false. We're done if all of them return true. */ done = binding_cleanup(ovnsb_idl_txn, port_binding_table, chassis); - done = chassis_cleanup(ovnsb_idl_txn, chassis) && done; + done = chassis_cleanup(ovnsb_idl_txn, + chassis, chassis_private) && done; done = encaps_cleanup(ovs_idl_txn, br_int) && done; done = igmp_group_cleanup(ovnsb_idl_txn, sbrec_igmp_group) && done; if (done) { diff --git a/lib/chassis-index.c b/lib/chassis-index.c index 39066f4cc..13120fe3e 100644 --- a/lib/chassis-index.c +++ b/lib/chassis-index.c @@ -40,6 +40,32 @@ chassis_lookup_by_name(struct ovsdb_idl_index *sbrec_chassis_by_name, return retval; } +struct ovsdb_idl_index * +chassis_private_index_create(struct ovsdb_idl *idl) +{ + return ovsdb_idl_index_create1(idl, + &sbrec_chassis_private_col_name); +} + +/* Finds and returns the chassis with the given 'name', or NULL if no such + * chassis exists. */ +const struct sbrec_chassis_private * +chassis_private_lookup_by_name( + struct ovsdb_idl_index *sbrec_chassis_private_by_name, + const char *name) +{ + struct sbrec_chassis_private *target = + sbrec_chassis_private_index_init_row(sbrec_chassis_private_by_name); + sbrec_chassis_private_index_set_name(target, name); + + struct sbrec_chassis_private *retval = sbrec_chassis_private_index_find( + sbrec_chassis_private_by_name, target); + + sbrec_chassis_private_index_destroy_row(target); + + return retval; +} + struct ovsdb_idl_index * ha_chassis_group_index_create(struct ovsdb_idl *idl) { diff --git a/lib/chassis-index.h b/lib/chassis-index.h index 302e5f0fd..b9b331f34 100644 --- a/lib/chassis-index.h +++ b/lib/chassis-index.h @@ -23,6 +23,12 @@ struct ovsdb_idl_index *chassis_index_create(struct ovsdb_idl *); const struct sbrec_chassis *chassis_lookup_by_name( struct ovsdb_idl_index *sbrec_chassis_by_name, const char *name); +struct ovsdb_idl_index *chassis_private_index_create(struct ovsdb_idl *); + +const struct sbrec_chassis_private * +chassis_private_lookup_by_name( + struct ovsdb_idl_index *sbrec_chassis_private_by_name, const char *name); + struct ovsdb_idl_index *ha_chassis_group_index_create(struct ovsdb_idl *idl); const struct sbrec_ha_chassis_group *ha_chassis_group_lookup_by_name( struct ovsdb_idl_index *sbrec_ha_chassis_grp_by_name, const char *name); diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index fc05accde..c83f9d5c2 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -12024,6 +12024,11 @@ static const char *rbac_chassis_update[] = {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches", "other_config"}; +static const char *rbac_chassis_private_auth[] = + {"name"}; +static const char *rbac_chassis_private_update[] = + {"nb_cfg", "chassis"}; + static const char *rbac_encap_auth[] = {"chassis_name"}; static const char *rbac_encap_update[] = @@ -12061,6 +12066,14 @@ static struct rbac_perm_cfg { .update = rbac_chassis_update, .n_update = ARRAY_SIZE(rbac_chassis_update), .row = NULL + },{ + .table = "Chassis_Private", + .auth = rbac_chassis_private_auth, + .n_auth = ARRAY_SIZE(rbac_chassis_private_auth), + .insdel = true, + .update = rbac_chassis_private_update, + .n_update = ARRAY_SIZE(rbac_chassis_private_update), + .row = NULL },{ .table = "Encap", .auth = rbac_encap_auth, @@ -12230,12 +12243,23 @@ update_northbound_cfg(struct northd_context *ctx, /* Update northbound hv_cfg if appropriate. */ if (nbg) { /* Find minimum nb_cfg among all chassis. */ - const struct sbrec_chassis *chassis; + const struct sbrec_chassis_private *chassis_priv; int64_t hv_cfg = nbg->nb_cfg; - SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) { - if (!smap_get_bool(&chassis->other_config, "is-remote", false) && - chassis->nb_cfg < hv_cfg) { - hv_cfg = chassis->nb_cfg; + SBREC_CHASSIS_PRIVATE_FOR_EACH (chassis_priv, ctx->ovnsb_idl) { + const struct sbrec_chassis *chassis = chassis_priv->chassis; + if (chassis) { + if (smap_get_bool(&chassis->other_config, + "is-remote", false)) { + /* Skip remote chassises. */ + continue; + } + } else { + VLOG_WARN("Chassis not exist for Chassis_Private record, " + "name: %s", chassis_priv->name); + } + + if (chassis_priv->nb_cfg < hv_cfg) { + hv_cfg = chassis_priv->nb_cfg; } } @@ -12248,7 +12272,8 @@ update_northbound_cfg(struct northd_context *ctx, /* Handle a fairly small set of changes in the southbound database. */ static void -ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop, +ovnsb_db_run(struct northd_context *ctx, + struct ovsdb_idl_loop *sb_loop, struct hmap *ports) { if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) { @@ -12529,10 +12554,17 @@ main(int argc, char *argv[]) ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_band_col_burst_size); ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis); - ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg); ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_name); ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_other_config); + ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis_private); + ovsdb_idl_add_column(ovnsb_idl_loop.idl, + &sbrec_chassis_private_col_name); + ovsdb_idl_add_column(ovnsb_idl_loop.idl, + &sbrec_chassis_private_col_chassis); + ovsdb_idl_add_column(ovnsb_idl_loop.idl, + &sbrec_chassis_private_col_nb_cfg); + ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_ha_chassis); add_column_noalert(ovnsb_idl_loop.idl, &sbrec_ha_chassis_col_chassis); diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema index 99c5de822..3af76540a 100644 --- a/ovn-sb.ovsschema +++ b/ovn-sb.ovsschema @@ -1,7 +1,7 @@ { "name": "OVN_Southbound", - "version": "2.8.2", - "cksum": "464326363 21916", + "version": "2.9.0", + "cksum": "223619766 22548", "tables": { "SB_Global": { "columns": { @@ -46,6 +46,19 @@ "max": "unlimited"}}}, "isRoot": true, "indexes": [["name"]]}, + "Chassis_Private": { + "columns": { + "name": {"type": "string"}, + "chassis": {"type": {"key": {"type": "uuid", + "refTable": "Chassis", + "refType": "weak"}, + "min": 0, "max": 1}}, + "nb_cfg": {"type": {"key": "integer"}}, + "external_ids": { + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}}, + "isRoot": true, + "indexes": [["name"]]}, "Encap": { "columns": { "type": {"type": {"key": { diff --git a/ovn-sb.xml b/ovn-sb.xml index a74d9c3ea..59b21711b 100644 --- a/ovn-sb.xml +++ b/ovn-sb.xml @@ -256,10 +256,8 @@ - Sequence number for the configuration. When ovn-controller - updates the configuration of a chassis from the contents of the - southbound database, it copies - from the table into this column. + Deprecated. This column is replaced by the column of the table. @@ -366,6 +364,42 @@ + +

+ Each row in this table maintains per chassis private data that are + accessed only by the owning chassis (write only) and ovn-northd, not by + any other chassis. These data are stored in this separate table instead + of the table for performance considerations: + the rows in this table can be conditionally monitored by chassises so + that each chassis only get update notifications for its own row, to avoid + unnecessary chassis private data update flooding in a large scale + deployment. +

+ + + The name of the chassis that owns these chassis-private data. + + + + The reference to table for the chassis that owns + these chassis-private data. + + + + Sequence number for the configuration. When ovn-controller + updates the configuration of a chassis from the contents of the + southbound database, it copies + from the table into this column. + + + + The overall purpose of these columns is described under Common + Columns at the beginning of this document. + + + +
+

The column in the