From 483c2ccd6276542082bf0b07d2a19cc9cc21c879 Mon Sep 17 00:00:00 2001 From: Open vSwitch CI Date: Nov 10 2021 01:26:51 +0000 Subject: Import openvswitch2.16-2.16.0-27 from Fast DataPath --- diff --git a/SOURCES/openvswitch-2.16.0.patch b/SOURCES/openvswitch-2.16.0.patch index 75d73e4..bb19a71 100644 --- a/SOURCES/openvswitch-2.16.0.patch +++ b/SOURCES/openvswitch-2.16.0.patch @@ -2319,6 +2319,45 @@ index c4075cdae3..6d7be066b6 100644 } /* Sort and check constraints. */ +diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c +index 126d16a2f5..e6d866182c 100644 +--- a/ovsdb/ovsdb.c ++++ b/ovsdb/ovsdb.c +@@ -422,6 +422,8 @@ ovsdb_create(struct ovsdb_schema *schema, struct ovsdb_storage *storage) + ovs_list_init(&db->triggers); + db->run_triggers_now = db->run_triggers = false; + ++ db->n_atoms = 0; ++ + db->is_relay = false; + ovs_list_init(&db->txn_forward_new); + hmap_init(&db->txn_forward_sent); +@@ -518,6 +520,9 @@ ovsdb_get_memory_usage(const struct ovsdb *db, struct simap *usage) + } + + simap_increase(usage, "cells", cells); ++ simap_increase(usage, "atoms", db->n_atoms); ++ simap_increase(usage, "txn-history", db->n_txn_history); ++ simap_increase(usage, "txn-history-atoms", db->n_txn_history_atoms); + + if (db->storage) { + ovsdb_storage_get_memory_usage(db->storage, usage); +diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h +index 4a7bd0f0ec..ec2d235ec2 100644 +--- a/ovsdb/ovsdb.h ++++ b/ovsdb/ovsdb.h +@@ -90,8 +90,11 @@ struct ovsdb { + /* History trasanctions for incremental monitor transfer. */ + bool need_txn_history; /* Need to maintain history of transactions. */ + unsigned int n_txn_history; /* Current number of history transactions. */ ++ unsigned int n_txn_history_atoms; /* Total number of atoms in history. */ + struct ovs_list txn_history; /* Contains "struct ovsdb_txn_history_node. */ + ++ size_t n_atoms; /* Total number of ovsdb atoms in the database. */ ++ + /* Relay mode. */ + bool is_relay; /* True, if database is in relay mode. */ + /* List that holds transactions waiting to be forwarded to the server. */ diff --git a/ovsdb/raft-private.c b/ovsdb/raft-private.c index 26d39a087f..30760233ee 100644 --- a/ovsdb/raft-private.c @@ -2825,6 +2864,198 @@ index 2986027c90..ff411675f0 100644 if (!strcmp(name, column->name)) { return true; +diff --git a/ovsdb/row.c b/ovsdb/row.c +index 65a0546211..e83c60a218 100644 +--- a/ovsdb/row.c ++++ b/ovsdb/row.c +@@ -38,8 +38,7 @@ allocate_row(const struct ovsdb_table *table) + struct ovsdb_row *row = xmalloc(row_size); + row->table = CONST_CAST(struct ovsdb_table *, table); + row->txn_row = NULL; +- ovs_list_init(&row->src_refs); +- ovs_list_init(&row->dst_refs); ++ hmap_init(&row->dst_refs); + row->n_refs = 0; + return row; + } +@@ -61,6 +60,78 @@ ovsdb_row_create(const struct ovsdb_table *table) + return row; + } + ++static struct ovsdb_weak_ref * ++ovsdb_weak_ref_clone(struct ovsdb_weak_ref *src) ++{ ++ struct ovsdb_weak_ref *weak = xzalloc(sizeof *weak); ++ ++ hmap_node_nullify(&weak->dst_node); ++ ovs_list_init(&weak->src_node); ++ weak->src_table = src->src_table; ++ weak->src = src->src; ++ weak->dst_table = src->dst_table; ++ weak->dst = src->dst; ++ ovsdb_atom_clone(&weak->key, &src->key, src->type.key.type); ++ if (src->type.value.type != OVSDB_TYPE_VOID) { ++ ovsdb_atom_clone(&weak->value, &src->value, src->type.value.type); ++ } ++ ovsdb_type_clone(&weak->type, &src->type); ++ weak->column_idx = src->column_idx; ++ weak->by_key = src->by_key; ++ return weak; ++} ++ ++uint32_t ++ovsdb_weak_ref_hash(const struct ovsdb_weak_ref *weak) ++{ ++ return uuid_hash(&weak->src); ++} ++ ++static bool ++ovsdb_weak_ref_equals(const struct ovsdb_weak_ref *a, ++ const struct ovsdb_weak_ref *b) ++{ ++ if (a == b) { ++ return true; ++ } ++ return a->src_table == b->src_table ++ && a->dst_table == b->dst_table ++ && uuid_equals(&a->src, &b->src) ++ && uuid_equals(&a->dst, &b->dst) ++ && a->column_idx == b->column_idx ++ && a->by_key == b->by_key ++ && ovsdb_atom_equals(&a->key, &b->key, a->type.key.type); ++} ++ ++struct ovsdb_weak_ref * ++ovsdb_row_find_weak_ref(const struct ovsdb_row *row, ++ const struct ovsdb_weak_ref *ref) ++{ ++ struct ovsdb_weak_ref *weak; ++ HMAP_FOR_EACH_WITH_HASH (weak, dst_node, ++ ovsdb_weak_ref_hash(ref), &row->dst_refs) { ++ if (ovsdb_weak_ref_equals(weak, ref)) { ++ return weak; ++ } ++ } ++ return NULL; ++} ++ ++void ++ovsdb_weak_ref_destroy(struct ovsdb_weak_ref *weak) ++{ ++ if (!weak) { ++ return; ++ } ++ ovs_assert(ovs_list_is_empty(&weak->src_node)); ++ ovsdb_atom_destroy(&weak->key, weak->type.key.type); ++ if (weak->type.value.type != OVSDB_TYPE_VOID) { ++ ovsdb_atom_destroy(&weak->value, weak->type.value.type); ++ } ++ ovsdb_type_destroy(&weak->type); ++ free(weak); ++} ++ + struct ovsdb_row * + ovsdb_row_clone(const struct ovsdb_row *old) + { +@@ -75,6 +146,13 @@ ovsdb_row_clone(const struct ovsdb_row *old) + &old->fields[column->index], + &column->type); + } ++ ++ struct ovsdb_weak_ref *weak, *clone; ++ HMAP_FOR_EACH (weak, dst_node, &old->dst_refs) { ++ clone = ovsdb_weak_ref_clone(weak); ++ hmap_insert(&new->dst_refs, &clone->dst_node, ++ ovsdb_weak_ref_hash(clone)); ++ } + return new; + } + +@@ -85,20 +163,13 @@ ovsdb_row_destroy(struct ovsdb_row *row) + { + if (row) { + const struct ovsdb_table *table = row->table; +- struct ovsdb_weak_ref *weak, *next; ++ struct ovsdb_weak_ref *weak; + const struct shash_node *node; + +- LIST_FOR_EACH_SAFE (weak, next, dst_node, &row->dst_refs) { +- ovs_list_remove(&weak->src_node); +- ovs_list_remove(&weak->dst_node); +- free(weak); +- } +- +- LIST_FOR_EACH_SAFE (weak, next, src_node, &row->src_refs) { +- ovs_list_remove(&weak->src_node); +- ovs_list_remove(&weak->dst_node); +- free(weak); ++ HMAP_FOR_EACH_POP (weak, dst_node, &row->dst_refs) { ++ ovsdb_weak_ref_destroy(weak); + } ++ hmap_destroy(&row->dst_refs); + + SHASH_FOR_EACH (node, &table->schema->columns) { + const struct ovsdb_column *column = node->data; +diff --git a/ovsdb/row.h b/ovsdb/row.h +index 394ac8eb49..fe04555d0c 100644 +--- a/ovsdb/row.h ++++ b/ovsdb/row.h +@@ -36,11 +36,28 @@ struct ovsdb_column_set; + * ovsdb_weak_ref" structures are created for them. + */ + struct ovsdb_weak_ref { +- struct ovs_list src_node; /* In src->src_refs list. */ +- struct ovs_list dst_node; /* In destination row's dst_refs list. */ +- struct ovsdb_row *src; /* Source row. */ +- struct ovsdb_table *dst_table; /* Destination table. */ ++ struct hmap_node dst_node; /* In ovsdb_row's 'dst_refs' hmap. */ ++ struct ovs_list src_node; /* In txn_row's 'deleted/added_refs'. */ ++ ++ struct ovsdb_table *src_table; /* Source row table. */ ++ struct uuid src; /* Source row uuid. */ ++ ++ struct ovsdb_table *dst_table; /* Destination row table. */ + struct uuid dst; /* Destination row uuid. */ ++ ++ /* Source row's key-value pair that created this reference. ++ * This information is needed in order to find and delete the reference ++ * from the source row. We need both key and value in order to avoid ++ * accidential deletion of an updated data, i.e. if value in datum got ++ * updated and the reference was created by the old value. ++ * Storing column index in order to remove references from the correct ++ * column. 'by_key' flag allows to distinguish 2 references in a corner ++ * case where key and value are the same. */ ++ union ovsdb_atom key; ++ union ovsdb_atom value; ++ struct ovsdb_type type; /* Datum type of the key-value pair. */ ++ unsigned int column_idx; /* Row column index for this pair. */ ++ bool by_key; /* 'true' if reference is a 'key'. */ + }; + + /* A row in a database table. */ +@@ -50,8 +67,7 @@ struct ovsdb_row { + struct ovsdb_txn_row *txn_row; /* Transaction that row is in, if any. */ + + /* Weak references. Updated and checked only at transaction commit. */ +- struct ovs_list src_refs; /* Weak references from this row. */ +- struct ovs_list dst_refs; /* Weak references to this row. */ ++ struct hmap dst_refs; /* Weak references to this row. */ + + /* Number of strong refs to this row from other rows, in this table or + * other tables, through 'uuid' columns that have a 'refTable' constraint +@@ -69,6 +85,12 @@ struct ovsdb_row { + * index 'i' is contained in hmap table->indexes[i]. */ + }; + ++uint32_t ovsdb_weak_ref_hash(const struct ovsdb_weak_ref *); ++struct ovsdb_weak_ref * ovsdb_row_find_weak_ref(const struct ovsdb_row *, ++ const struct ovsdb_weak_ref *); ++void ovsdb_weak_ref_destroy(struct ovsdb_weak_ref *); ++ ++ + struct ovsdb_row *ovsdb_row_create(const struct ovsdb_table *); + struct ovsdb_row *ovsdb_row_clone(const struct ovsdb_row *); + void ovsdb_row_destroy(struct ovsdb_row *); diff --git a/ovsdb/storage.c b/ovsdb/storage.c index d727b1eacd..9e32efe582 100644 --- a/ovsdb/storage.c @@ -2841,10 +3072,55 @@ index d727b1eacd..9e32efe582 100644 return NULL; } else if (json->type != JSON_ARRAY || json->array.n != 2) { diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c -index 8ffefcf7c9..dcccc61c05 100644 +index 8ffefcf7c9..88e0528002 100644 --- a/ovsdb/transaction.c +++ b/ovsdb/transaction.c -@@ -266,9 +266,9 @@ ovsdb_txn_adjust_atom_refs(struct ovsdb_txn *txn, const struct ovsdb_row *r, +@@ -41,6 +41,9 @@ struct ovsdb_txn { + struct ovs_list txn_tables; /* Contains "struct ovsdb_txn_table"s. */ + struct ds comment; + struct uuid txnid; /* For clustered mode only. It is the eid. */ ++ size_t n_atoms; /* Number of atoms in all transaction rows. */ ++ ssize_t n_atoms_diff; /* Difference between number of added and ++ * removed atoms. */ + }; + + /* A table modified by a transaction. */ +@@ -86,6 +89,10 @@ struct ovsdb_txn_row { + struct uuid uuid; + struct ovsdb_table *table; + ++ /* Weak refs that needs to be added/deleted to/from destination rows. */ ++ struct ovs_list added_refs; ++ struct ovs_list deleted_refs; ++ + /* Used by for_each_txn_row(). */ + unsigned int serial; /* Serial number of in-progress commit. */ + +@@ -151,6 +158,23 @@ ovsdb_txn_row_abort(struct ovsdb_txn *txn OVS_UNUSED, + } else { + hmap_replace(&new->table->rows, &new->hmap_node, &old->hmap_node); + } ++ ++ struct ovsdb_weak_ref *weak, *next; ++ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) { ++ ovs_list_remove(&weak->src_node); ++ ovs_list_init(&weak->src_node); ++ if (hmap_node_is_null(&weak->dst_node)) { ++ ovsdb_weak_ref_destroy(weak); ++ } ++ } ++ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->added_refs) { ++ ovs_list_remove(&weak->src_node); ++ ovs_list_init(&weak->src_node); ++ if (hmap_node_is_null(&weak->dst_node)) { ++ ovsdb_weak_ref_destroy(weak); ++ } ++ } ++ + ovsdb_row_destroy(new); + free(txn_row); + +@@ -266,9 +290,9 @@ ovsdb_txn_adjust_atom_refs(struct ovsdb_txn *txn, const struct ovsdb_row *r, static struct ovsdb_error * OVS_WARN_UNUSED_RESULT ovsdb_txn_adjust_row_refs(struct ovsdb_txn *txn, const struct ovsdb_row *r, @@ -2856,7 +3132,7 @@ index 8ffefcf7c9..dcccc61c05 100644 struct ovsdb_error *error; error = ovsdb_txn_adjust_atom_refs(txn, r, column, &column->type.key, -@@ -291,14 +291,39 @@ update_row_ref_count(struct ovsdb_txn *txn, struct ovsdb_txn_row *r) +@@ -291,14 +315,39 @@ update_row_ref_count(struct ovsdb_txn *txn, struct ovsdb_txn_row *r) struct ovsdb_error *error; if (bitmap_is_set(r->changed, column->index)) { @@ -2901,6 +3177,429 @@ index 8ffefcf7c9..dcccc61c05 100644 if (error) { return error; } +@@ -459,93 +508,125 @@ static struct ovsdb_error * + ovsdb_txn_update_weak_refs(struct ovsdb_txn *txn OVS_UNUSED, + struct ovsdb_txn_row *txn_row) + { +- struct ovsdb_weak_ref *weak, *next; ++ struct ovsdb_weak_ref *weak, *next, *dst_weak; ++ struct ovsdb_row *dst_row; + +- /* Remove the weak references originating in the old version of the row. */ +- if (txn_row->old) { +- LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->old->src_refs) { +- ovs_list_remove(&weak->src_node); +- ovs_list_remove(&weak->dst_node); +- free(weak); ++ /* Find and clean up deleted references from destination rows. */ ++ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) { ++ dst_row = CONST_CAST(struct ovsdb_row *, ++ ovsdb_table_get_row(weak->dst_table, &weak->dst)); ++ if (dst_row) { ++ dst_weak = ovsdb_row_find_weak_ref(dst_row, weak); ++ hmap_remove(&dst_row->dst_refs, &dst_weak->dst_node); ++ ovs_assert(ovs_list_is_empty(&dst_weak->src_node)); ++ ovsdb_weak_ref_destroy(dst_weak); ++ } ++ ovs_list_remove(&weak->src_node); ++ ovs_list_init(&weak->src_node); ++ if (hmap_node_is_null(&weak->dst_node)) { ++ ovsdb_weak_ref_destroy(weak); + } + } + +- /* Although the originating rows have the responsibility of updating the +- * weak references in the dst, it is possible that some source rows aren't +- * part of the transaction. In that situation this row needs to move the +- * list of incoming weak references from the old row into the new one. +- */ +- if (txn_row->old && txn_row->new) { +- /* Move the incoming weak references from old to new. */ +- ovs_list_push_back_all(&txn_row->new->dst_refs, +- &txn_row->old->dst_refs); +- } +- +- /* Insert the weak references originating in the new version of the row. */ +- struct ovsdb_row *dst_row; +- if (txn_row->new) { +- LIST_FOR_EACH (weak, src_node, &txn_row->new->src_refs) { +- /* dst_row MUST exist. */ +- dst_row = CONST_CAST(struct ovsdb_row *, ++ /* Insert the weak references added in the new version of the row. */ ++ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->added_refs) { ++ dst_row = CONST_CAST(struct ovsdb_row *, + ovsdb_table_get_row(weak->dst_table, &weak->dst)); +- ovs_list_insert(&dst_row->dst_refs, &weak->dst_node); +- } ++ ++ ovs_assert(!ovsdb_row_find_weak_ref(dst_row, weak)); ++ hmap_insert(&dst_row->dst_refs, &weak->dst_node, ++ ovsdb_weak_ref_hash(weak)); ++ ovs_list_remove(&weak->src_node); ++ ovs_list_init(&weak->src_node); + } + + return NULL; + } + + static void +-add_weak_ref(const struct ovsdb_row *src_, const struct ovsdb_row *dst_) ++add_weak_ref(struct ovsdb_txn_row *txn_row, const struct ovsdb_row *dst_, ++ struct ovs_list *ref_list, ++ const union ovsdb_atom *key, const union ovsdb_atom *value, ++ bool by_key, const struct ovsdb_column *column) + { +- struct ovsdb_row *src = CONST_CAST(struct ovsdb_row *, src_); + struct ovsdb_row *dst = CONST_CAST(struct ovsdb_row *, dst_); + struct ovsdb_weak_ref *weak; + +- if (src == dst) { ++ if (txn_row->new == dst) { + return; + } + +- if (!ovs_list_is_empty(&dst->dst_refs)) { +- /* Omit duplicates. */ +- weak = CONTAINER_OF(ovs_list_back(&dst->dst_refs), +- struct ovsdb_weak_ref, dst_node); +- if (weak->src == src) { +- return; +- } +- } +- +- weak = xmalloc(sizeof *weak); +- weak->src = src; ++ weak = xzalloc(sizeof *weak); ++ weak->src_table = txn_row->new->table; ++ weak->src = *ovsdb_row_get_uuid(txn_row->new); + weak->dst_table = dst->table; + weak->dst = *ovsdb_row_get_uuid(dst); +- /* The dst_refs list is updated at commit time. */ +- ovs_list_init(&weak->dst_node); +- ovs_list_push_back(&src->src_refs, &weak->src_node); ++ ovsdb_type_clone(&weak->type, &column->type); ++ ovsdb_atom_clone(&weak->key, key, column->type.key.type); ++ if (column->type.value.type != OVSDB_TYPE_VOID) { ++ ovsdb_atom_clone(&weak->value, value, column->type.value.type); ++ } ++ weak->by_key = by_key; ++ weak->column_idx = column->index; ++ hmap_node_nullify(&weak->dst_node); ++ ovs_list_push_back(ref_list, &weak->src_node); ++} ++ ++static void ++find_and_add_weak_ref(struct ovsdb_txn_row *txn_row, ++ const union ovsdb_atom *key, ++ const union ovsdb_atom *value, ++ const struct ovsdb_column *column, ++ bool by_key, struct ovs_list *ref_list, ++ struct ovsdb_datum *not_found, bool *zero) ++{ ++ const struct ovsdb_row *row = by_key ++ ? ovsdb_table_get_row(column->type.key.uuid.refTable, &key->uuid) ++ : ovsdb_table_get_row(column->type.value.uuid.refTable, &value->uuid); ++ ++ if (row) { ++ add_weak_ref(txn_row, row, ref_list, key, value, by_key, column); ++ } else if (not_found) { ++ if (uuid_is_zero(by_key ? &key->uuid : &value->uuid)) { ++ *zero = true; ++ } ++ ovsdb_datum_add_unsafe(not_found, key, value, &column->type, NULL); ++ } + } + + static struct ovsdb_error * OVS_WARN_UNUSED_RESULT + assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row) + { ++ struct ovsdb_weak_ref *weak, *next; + struct ovsdb_table *table; + struct shash_node *node; + + if (txn_row->old && !txn_row->new) { + /* Mark rows that have weak references to 'txn_row' as modified, so +- * that their weak references will get reassessed. */ +- struct ovsdb_weak_ref *weak, *next; +- +- LIST_FOR_EACH_SAFE (weak, next, dst_node, &txn_row->old->dst_refs) { +- if (!weak->src->txn_row) { +- ovsdb_txn_row_modify(txn, weak->src); ++ * that their weak references will get reassessed. Adding all weak ++ * refs to 'deleted_ref' lists of their source rows, so they will be ++ * cleaned up from datums and deleted on commit. */ ++ ++ HMAP_FOR_EACH (weak, dst_node, &txn_row->old->dst_refs) { ++ struct ovsdb_txn_row *src_txn_row; ++ ++ src_txn_row = find_or_make_txn_row(txn, weak->src_table, ++ &weak->src); ++ if (!src_txn_row) { ++ /* Source row is also removed. */ ++ continue; + } ++ ovs_assert(src_txn_row); ++ ovs_assert(ovs_list_is_empty(&weak->src_node)); ++ ovs_list_insert(&src_txn_row->deleted_refs, &weak->src_node); + } + } + + if (!txn_row->new) { +- /* We don't have to do anything about references that originate at +- * 'txn_row', because ovsdb_row_destroy() will remove those weak +- * references. */ ++ /* Since all the atoms will be destroyed by the ovsdb_row_destroy(), ++ * there is no need to check them here. Source references queued ++ * into 'deleted_ref' while removing other rows will be cleaned up at ++ * commit time. */ + return NULL; + } + +@@ -553,50 +634,94 @@ assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row) + SHASH_FOR_EACH (node, &table->schema->columns) { + const struct ovsdb_column *column = node->data; + struct ovsdb_datum *datum = &txn_row->new->fields[column->index]; ++ struct ovsdb_datum added, removed, deleted_refs; + unsigned int orig_n, i; + bool zero = false; + + orig_n = datum->n; + ++ /* Collecting all key-value pairs that references deleted rows. */ ++ ovsdb_datum_init_empty(&deleted_refs); ++ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) { ++ if (column->index == weak->column_idx) { ++ ovsdb_datum_add_unsafe(&deleted_refs, &weak->key, &weak->value, ++ &column->type, NULL); ++ ovs_list_remove(&weak->src_node); ++ ovs_list_init(&weak->src_node); ++ } ++ } ++ ovsdb_datum_sort_unique(&deleted_refs, column->type.key.type, ++ column->type.value.type); ++ ++ /* Removing elements that references deleted rows. */ ++ ovsdb_datum_subtract(datum, &column->type, ++ &deleted_refs, &column->type); ++ ovsdb_datum_destroy(&deleted_refs, &column->type); ++ ++ /* Generating the difference between old and new data. */ ++ if (txn_row->old) { ++ ovsdb_datum_added_removed(&added, &removed, ++ &txn_row->old->fields[column->index], ++ datum, &column->type); ++ } else { ++ ovsdb_datum_init_empty(&removed); ++ ovsdb_datum_clone(&added, datum, &column->type); ++ } ++ ++ /* Checking added data and creating new references. */ ++ ovsdb_datum_init_empty(&deleted_refs); + if (ovsdb_base_type_is_weak_ref(&column->type.key)) { +- for (i = 0; i < datum->n; ) { +- const struct ovsdb_row *row; +- +- row = ovsdb_table_get_row(column->type.key.uuid.refTable, +- &datum->keys[i].uuid); +- if (row) { +- add_weak_ref(txn_row->new, row); +- i++; +- } else { +- if (uuid_is_zero(&datum->keys[i].uuid)) { +- zero = true; +- } +- ovsdb_datum_remove_unsafe(datum, i, &column->type); +- } ++ for (i = 0; i < added.n; i++) { ++ find_and_add_weak_ref(txn_row, &added.keys[i], ++ added.values ? &added.values[i] : NULL, ++ column, true, &txn_row->added_refs, ++ &deleted_refs, &zero); + } + } + + if (ovsdb_base_type_is_weak_ref(&column->type.value)) { +- for (i = 0; i < datum->n; ) { +- const struct ovsdb_row *row; +- +- row = ovsdb_table_get_row(column->type.value.uuid.refTable, +- &datum->values[i].uuid); +- if (row) { +- add_weak_ref(txn_row->new, row); +- i++; +- } else { +- if (uuid_is_zero(&datum->values[i].uuid)) { +- zero = true; +- } +- ovsdb_datum_remove_unsafe(datum, i, &column->type); +- } ++ for (i = 0; i < added.n; i++) { ++ find_and_add_weak_ref(txn_row, &added.keys[i], ++ &added.values[i], ++ column, false, &txn_row->added_refs, ++ &deleted_refs, &zero); ++ } ++ } ++ if (deleted_refs.n) { ++ /* Removing all the references that doesn't point to valid rows. */ ++ ovsdb_datum_sort_unique(&deleted_refs, column->type.key.type, ++ column->type.value.type); ++ ovsdb_datum_subtract(datum, &column->type, ++ &deleted_refs, &column->type); ++ ovsdb_datum_destroy(&deleted_refs, &column->type); ++ } ++ ovsdb_datum_destroy(&added, &column->type); ++ ++ /* Creating refs that needs to be removed on commit. This includes ++ * both: the references that got directly removed from the datum and ++ * references removed due to deletion of a referenced row. */ ++ if (ovsdb_base_type_is_weak_ref(&column->type.key)) { ++ for (i = 0; i < removed.n; i++) { ++ find_and_add_weak_ref(txn_row, &removed.keys[i], ++ removed.values ++ ? &removed.values[i] : NULL, ++ column, true, &txn_row->deleted_refs, ++ NULL, NULL); + } + } + ++ if (ovsdb_base_type_is_weak_ref(&column->type.value)) { ++ for (i = 0; i < removed.n; i++) { ++ find_and_add_weak_ref(txn_row, &removed.keys[i], ++ &removed.values[i], ++ column, false, &txn_row->deleted_refs, ++ NULL, NULL); ++ } ++ } ++ ovsdb_datum_destroy(&removed, &column->type); ++ + if (datum->n != orig_n) { + bitmap_set1(txn_row->changed, column->index); +- ovsdb_datum_sort_assert(datum, column->type.key.type); + if (datum->n < column->type.n_min) { + const struct uuid *row_uuid = ovsdb_row_get_uuid(txn_row->new); + if (zero && !txn_row->old) { +@@ -817,6 +942,37 @@ check_index_uniqueness(struct ovsdb_txn *txn OVS_UNUSED, + return NULL; + } + ++static struct ovsdb_error * OVS_WARN_UNUSED_RESULT ++count_atoms(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row) ++{ ++ struct ovsdb_table *table = txn_row->table; ++ ssize_t n_atoms_old = 0, n_atoms_new = 0; ++ struct shash_node *node; ++ ++ SHASH_FOR_EACH (node, &table->schema->columns) { ++ const struct ovsdb_column *column = node->data; ++ const struct ovsdb_type *type = &column->type; ++ unsigned int idx = column->index; ++ ++ if (txn_row->old) { ++ n_atoms_old += txn_row->old->fields[idx].n; ++ if (type->value.type != OVSDB_TYPE_VOID) { ++ n_atoms_old += txn_row->old->fields[idx].n; ++ } ++ } ++ if (txn_row->new) { ++ n_atoms_new += txn_row->new->fields[idx].n; ++ if (type->value.type != OVSDB_TYPE_VOID) { ++ n_atoms_new += txn_row->new->fields[idx].n; ++ } ++ } ++ } ++ ++ txn->n_atoms += n_atoms_old + n_atoms_new; ++ txn->n_atoms_diff += n_atoms_new - n_atoms_old; ++ return NULL; ++} ++ + static struct ovsdb_error * OVS_WARN_UNUSED_RESULT + update_version(struct ovsdb_txn *txn OVS_UNUSED, struct ovsdb_txn_row *txn_row) + { +@@ -885,6 +1041,12 @@ ovsdb_txn_precommit(struct ovsdb_txn *txn) + return error; + } + ++ /* Count atoms. */ ++ error = for_each_txn_row(txn, count_atoms); ++ if (error) { ++ return OVSDB_WRAP_BUG("can't happen", error); ++ } ++ + /* Update _version for rows that changed. */ + error = for_each_txn_row(txn, update_version); + if (error) { +@@ -900,6 +1062,8 @@ ovsdb_txn_clone(const struct ovsdb_txn *txn) + struct ovsdb_txn *txn_cloned = xzalloc(sizeof *txn_cloned); + ovs_list_init(&txn_cloned->txn_tables); + txn_cloned->txnid = txn->txnid; ++ txn_cloned->n_atoms = txn->n_atoms; ++ txn_cloned->n_atoms_diff = txn->n_atoms_diff; + + struct ovsdb_txn_table *t; + LIST_FOR_EACH (t, node, &txn->txn_tables) { +@@ -958,6 +1122,7 @@ ovsdb_txn_add_to_history(struct ovsdb_txn *txn) + node->txn = ovsdb_txn_clone(txn); + ovs_list_push_back(&txn->db->txn_history, &node->node); + txn->db->n_txn_history++; ++ txn->db->n_txn_history_atoms += txn->n_atoms; + } + } + +@@ -968,6 +1133,7 @@ ovsdb_txn_complete(struct ovsdb_txn *txn) + if (!ovsdb_txn_is_empty(txn)) { + + txn->db->run_triggers_now = txn->db->run_triggers = true; ++ txn->db->n_atoms += txn->n_atoms_diff; + ovsdb_monitors_commit(txn->db, txn); + ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_update_weak_refs)); + ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_row_commit)); +@@ -1215,6 +1381,9 @@ ovsdb_txn_row_create(struct ovsdb_txn *txn, struct ovsdb_table *table, + txn_row->n_refs = old ? old->n_refs : 0; + txn_row->serial = serial - 1; + ++ ovs_list_init(&txn_row->added_refs); ++ ovs_list_init(&txn_row->deleted_refs); ++ + if (old) { + old->txn_row = txn_row; + } +@@ -1423,12 +1592,18 @@ ovsdb_txn_history_run(struct ovsdb *db) + if (!db->need_txn_history) { + return; + } +- /* Remove old histories to limit the size of the history */ +- while (db->n_txn_history > 100) { ++ /* Remove old histories to limit the size of the history. Removing until ++ * the number of ovsdb atoms in history becomes less than the number of ++ * atoms in the database, because it will be faster to just get a database ++ * snapshot than re-constructing changes from the history that big. */ ++ while (db->n_txn_history && ++ (db->n_txn_history > 100 || ++ db->n_txn_history_atoms > db->n_atoms)) { + struct ovsdb_txn_history_node *txn_h_node = CONTAINER_OF( + ovs_list_pop_front(&db->txn_history), + struct ovsdb_txn_history_node, node); + ++ db->n_txn_history_atoms -= txn_h_node->txn->n_atoms; + ovsdb_txn_destroy_cloned(txn_h_node->txn); + free(txn_h_node); + db->n_txn_history--; +@@ -1440,6 +1615,7 @@ ovsdb_txn_history_init(struct ovsdb *db, bool need_txn_history) + { + db->need_txn_history = need_txn_history; + db->n_txn_history = 0; ++ db->n_txn_history_atoms = 0; + ovs_list_init(&db->txn_history); + } + +@@ -1458,4 +1634,5 @@ ovsdb_txn_history_destroy(struct ovsdb *db) + free(txn_h_node); + } + db->n_txn_history = 0; ++ db->n_txn_history_atoms = 0; + } diff --git a/python/ovs/db/data.py b/python/ovs/db/data.py index 2a2102d6be..99bf80ed62 100644 --- a/python/ovs/db/data.py @@ -3122,6 +3821,80 @@ index 8cd2a26cb3..25c6acdac6 100644 OK]]) OVSDB_CHECK_NEGATIVE([generate and apply diff with map -- size error], +diff --git a/tests/ovsdb-server.at b/tests/ovsdb-server.at +index ac243d6a79..2b742f78b0 100644 +--- a/tests/ovsdb-server.at ++++ b/tests/ovsdb-server.at +@@ -1228,6 +1228,69 @@ AT_CHECK([test $logged_updates -lt $logged_nonblock_updates]) + AT_CHECK_UNQUOTED([ovs-vsctl get open_vswitch . system_version], [0], + [xyzzy$counter + ]) ++OVS_APP_EXIT_AND_WAIT([ovsdb-server]) ++AT_CLEANUP ++ ++AT_SETUP([ovsdb-server transaction history size]) ++on_exit 'kill `cat *.pid`' ++ ++dnl Start an ovsdb-server with the clustered vswitchd schema. ++AT_CHECK([ovsdb-tool create-cluster db dnl ++ $abs_top_srcdir/vswitchd/vswitch.ovsschema unix:s1.raft], ++ [0], [ignore], [ignore]) ++AT_CHECK([ovsdb-server --detach --no-chdir --pidfile dnl ++ --log-file --remote=punix:db.sock db], ++ [0], [ignore], [ignore]) ++AT_CHECK([ovs-vsctl --no-wait init]) ++ ++dnl Create a bridge with N ports per transaction. Increase N every 4 ++dnl iterations. And then remove the bridges. By increasing the size of ++dnl transactions, ensuring that they take up a significant percentage of ++dnl the total database size, so the transaction history will not be able ++dnl to hold all of them. ++dnl ++dnl The test verifies that the number of atoms in the transaction history ++dnl is always less than the number of atoms in the database. ++get_n_atoms () { ++ n=$(ovs-appctl -t ovsdb-server memory/show dnl ++ | tr ' ' '\n' | grep atoms | grep "^$1:" | cut -d ':' -f 2) ++ if test X"$n" == "X"; then ++ n=0 ++ fi ++ echo $n ++} ++ ++check_atoms () { ++ n_db_atoms=$(get_n_atoms atoms) ++ n_txn_history_atoms=$(get_n_atoms txn-history-atoms) ++ echo "n_db_atoms: $n_db_atoms" ++ echo "n_txn_history_atoms: $n_txn_history_atoms" ++ AT_CHECK([test $n_txn_history_atoms -le $n_db_atoms]) ++} ++ ++add_ports () { ++ for j in $(seq 1 $2); do ++ printf " -- add-port br$1 p$1-%d" $j ++ done ++} ++ ++initial_db_atoms=$(get_n_atoms atoms) ++ ++for i in $(seq 1 100); do ++ cmd=$(add_ports $i $(($i / 4 + 1))) ++ AT_CHECK([ovs-vsctl --no-wait add-br br$i $cmd]) ++ check_atoms ++done ++ ++for i in $(seq 1 100); do ++ AT_CHECK([ovs-vsctl --no-wait del-br br$i]) ++ check_atoms ++done ++ ++dnl After removing all the bridges, the number of atoms in the database ++dnl should return to its initial value. ++AT_CHECK([test $(get_n_atoms atoms) -eq $initial_db_atoms]) ++ + OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + AT_CLEANUP + diff --git a/tests/system-traffic.at b/tests/system-traffic.at index f400cfabc9..092de308be 100644 --- a/tests/system-traffic.at diff --git a/SPECS/openvswitch2.16.spec b/SPECS/openvswitch2.16.spec index 7b89691..2856c4b 100644 --- a/SPECS/openvswitch2.16.spec +++ b/SPECS/openvswitch2.16.spec @@ -57,7 +57,7 @@ Summary: Open vSwitch Group: System Environment/Daemons daemon/database/utilities URL: http://www.openvswitch.org/ Version: 2.16.0 -Release: 25%{?dist} +Release: 27%{?dist} # Nearly all of openvswitch is ASL 2.0. The bugtool is LGPLv2+, and the # lib/sflow*.[ch] files are SISSL @@ -700,6 +700,113 @@ exit 0 %endif %changelog +* Tue Nov 09 2021 Ilya Maximets - 2.16.0-27 +- ovsdb: Don't let transaction history grow larger than the database. [RH git: 93d1fa0bdf] (#2012949) + commit 317b1bfd7dd315e241c158e6d4095002ff391ee3 + Author: Ilya Maximets + Date: Tue Sep 28 13:17:21 2021 +0200 + + ovsdb: Don't let transaction history grow larger than the database. + + If user frequently changes a lot of rows in a database, transaction + history could grow way larger than the database itself. This wastes + a lot of memory and also makes monitor_cond_since slower than + usual monotor_cond if the transaction id is old enough, because + re-construction of the changes from a history is slower than just + creation of initial database snapshot. This is also the case if + user deleted a lot of data, so transaction history still holds all of + it while the database itself doesn't. + + In case of current lb-per-service model in ovn-kubernetes, each + load-balancer is added to every logical switch/router. Such a + transaction touches more than a half of a OVN_Northbound database. + And each of these transactions is added to the transaction history. + Since transaction history depth is 100, in worst case scenario, + it will hold 100 copies of a database increasing memory consumption + dramatically. In tests with 3000 LBs and 120 LSs, memory goes up + to 3 GB, while holding at 30 MB if transaction history disabled in + the code. + + Fixing that by keeping count of the number of ovsdb_atom's in the + database and not allowing the total number of atoms in transaction + history to grow larger than this value. Counting atoms is fairly + cheap because we don't need to iterate over them, so it doesn't have + significant performance impact. It would be ideal to measure the + size of individual atoms, but that will hit the performance. + Counting cells instead of atoms is not sufficient, because OVN + users are adding hundreds or thousands of atoms to a single cell, + so they are largely different in size. + + Signed-off-by: Ilya Maximets + Acked-by: Han Zhou + Acked-by: Dumitru Ceara + + Reported-at: https://bugzilla.redhat.com/2012949 + Signed-off-by: Ilya Maximets + + +* Tue Nov 09 2021 Ilya Maximets - 2.16.0-26 +- ovsdb: transaction: Incremental reassessment of weak refs. [RH git: e8a363db49] (#2005958) + commit 4dbff9f0a68579241ac1a040726be3906afb8fe9 + Author: Ilya Maximets + Date: Sat Oct 16 03:20:23 2021 +0200 + + ovsdb: transaction: Incremental reassessment of weak refs. + + The main idea is to not store list of weak references in the source + row, so they all don't need to be re-checked/updated on every + modification of that source row. The point is that source row already + knows UUIDs of all destination rows stored in the data, so there is no + much profit in storing this information somewhere else. If needed, + destination row can be looked up and reference can be looked up in the + destination row. For the fast lookup, destination row now stores + references in a hash map. + + Weak reference structure now contains the table and uuid of a source + row instead of a direct pointer. This allows to replace/update the + source row without breaking any weak references stored in destination + rows. + + Structure also now contains the key-value pair of atoms that triggered + creation of this reference. These atoms can be used to quickly + subtract removed references from a source row. During reassessment, + ovsdb now only needs to care about new added or removed atoms, and + atoms that got removed due to removal of the destination rows, but + these are marked for reassessment by the destination row. + + ovsdb_datum_subtract() is used to remove atoms that points to removed + or incorrect rows, so there is no need to re-sort datum in the end. + + Results of an OVN load-balancer benchmark that adds 3K load-balancers + to each of 120 logical switches and 120 logical routers in the OVN + sandbox with clustered Northbound database and then removes them: + + Before: + + %CPU CPU Time CMD + 86.8 00:16:05 ovsdb-server nb1.db + 44.1 00:08:11 ovsdb-server nb2.db + 43.2 00:08:00 ovsdb-server nb3.db + + After: + + %CPU CPU Time CMD + 54.9 00:02:58 ovsdb-server nb1.db + 33.3 00:01:48 ovsdb-server nb2.db + 32.2 00:01:44 ovsdb-server nb3.db + + So, on a cluster leader the processing time dropped by 5.4x, on + followers - by 4.5x. More load-balancers - larger the performance + difference. There is a slight increase of memory usage, because new + reference structure is larger, but the difference is not significant. + + Signed-off-by: Ilya Maximets + Acked-by: Dumitru Ceara + + Reported-at: https://bugzilla.redhat.com/2005958 + Signed-off-by: Ilya Maximets + + * Thu Oct 28 2021 Open vSwitch CI - 2.16.0-25 - Merging upstream branch-2.16 [RH git: f5366890c5] Commit list: