773311
From 966dd7bc7fb7931d7616ca886dc24ecb1f34cced Mon Sep 17 00:00:00 2001
773311
From: Dumitru Ceara <dceara@redhat.com>
773311
Date: Thu, 28 May 2020 14:32:31 +0200
773311
Subject: [PATCH] ovsdb-idl: Avoid inconsistent IDL state with
773311
 OVSDB_MONITOR_V3.
773311
773311
Assuming an ovsdb client connected to a database using OVSDB_MONITOR_V3
773311
(i.e., "monitor_cond_since" method) with the initial monitor condition
773311
MC1.
773311
773311
Assuming the following two transactions are executed on the
773311
ovsdb-server:
773311
TXN1: "insert record R1 in table T1"
773311
TXN2: "insert record R2 in table T2"
773311
773311
If the client's monitor condition MC1 for table T2 matches R2 then the
773311
client will receive the following update3 message:
773311
method="update3", "insert record R2 in table T2", last-txn-id=TXN2
773311
773311
At this point, if the presence of the new record R2 in the IDL triggers
773311
the client to update its monitor condition to MC2 and add a clause for
773311
table T1 which matches R1, a monitor_cond_change message is sent to the
773311
server:
773311
method="monitor_cond_change", "clauses from MC2"
773311
773311
In normal operation the ovsdb-server will reply with a new update3
773311
message of the form:
773311
method="update3", "insert record R1 in table T1", last-txn-id=TXN2
773311
773311
However, if the connection drops in the meantime, this last update might
773311
get lost.
773311
773311
It might happen that during the reconnect a new transaction happens
773311
that modifies the original record R1:
773311
TXN3: "modify record R1 in table T1"
773311
773311
When the client reconnects, it will try to perform a fast resync by
773311
sending:
773311
method="monitor_cond_since", "clauses from MC2", last-txn-id=TXN2
773311
773311
Because TXN2 is still in the ovsdb-server transaction history, the
773311
server replies with the changes from the most recent transactions only,
773311
i.e., TXN3:
773311
result="true", last-txbb-id=TXN3, "modify record R1 in table T1"
773311
773311
This causes the IDL on the client in to end up in an inconsistent
773311
state because it has never seen the update that created R1.
773311
773311
Such a scenario is described in:
773311
https://bugzilla.redhat.com/show_bug.cgi?id=1808580#c22
773311
773311
To avoid this issue, the IDL will now maintain (up to) 3 different
773311
types of conditions for each DB table:
773311
- new_cond: condition that has been set by the IDL client but has
773311
  not yet been sent to the server through monitor_cond_change.
773311
- req_cond: condition that has been sent to the server but the reply
773311
  acknowledging the change hasn't been received yet.
773311
- ack_cond: condition that has been acknowledged by the server.
773311
773311
Whenever the IDL FSM is restarted (e.g., voluntary or involuntary
773311
disconnect):
773311
- if there is a known last_id txn-id the code ensures that new_cond
773311
  will contain the most recent condition set by the IDL client
773311
  (either req_cond if there was a request in flight, or new_cond
773311
  if the IDL client set a condition while the IDL was disconnected)
773311
- if there is no known last_id txn-id the code ensures that ack_cond will
773311
  contain the most recent conditions set by the IDL client regardless
773311
  whether they were acked by the server or not.
773311
773311
When monitor_cond_since/monitor_cond requests are sent they will
773311
always include ack_cond and if new_cond is not NULL a follow up
773311
monitor_cond_change will be generated afterwards.
773311
773311
On the other hand ovsdb_idl_db_set_condition() will always modify new_cond.
773311
773311
This ensures that updates of type "insert" that happened before the last
773311
transaction known by the IDL but didn't match old monitor conditions are
773311
sent upon reconnect if the monitor condition has changed to include them
773311
in the meantime.
773311
773311
Fixes: 403a6a0cb003 ("ovsdb-idl: Fast resync from server when connection reset.")
773311
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
773311
Acked-by: Han Zhou <hzhou@ovn.org>
773311
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
773311
(cherry picked from upstream OVS commit ae25f8c8fff80a58cd0a15e2d3ae7ab1b4994e48)
773311
773311
Change-Id: I4f3cd43cf69dfe76eb65c9709b759e5062c29e89
773311
---
773311
 openvswitch-2.13.0/lib/ovsdb-idl-provider.h |   8 +-
773311
 openvswitch-2.13.0/lib/ovsdb-idl.c          | 167 ++++++++++++++++++++++++----
773311
 openvswitch-2.13.0/tests/ovsdb-idl.at       |  56 ++++++++++
773311
 3 files changed, 206 insertions(+), 25 deletions(-)
773311
773311
diff --git a/openvswitch-2.13.0/lib/ovsdb-idl-provider.h b/openvswitch-2.13.0/lib/ovsdb-idl-provider.h
773311
index 30d1d08..00497d9 100644
773311
--- a/openvswitch-2.13.0/lib/ovsdb-idl-provider.h
773311
+++ b/openvswitch-2.13.0/lib/ovsdb-idl-provider.h
773311
@@ -122,8 +122,12 @@ struct ovsdb_idl_table {
773311
     unsigned int change_seqno[OVSDB_IDL_CHANGE_MAX];
773311
     struct ovs_list indexes;    /* Contains "struct ovsdb_idl_index"s */
773311
     struct ovs_list track_list; /* Tracked rows (ovsdb_idl_row.track_node). */
773311
-    struct ovsdb_idl_condition condition;
773311
-    bool cond_changed;
773311
+    struct ovsdb_idl_condition *ack_cond; /* Last condition acked by the
773311
+                                           * server. */
773311
+    struct ovsdb_idl_condition *req_cond; /* Last condition requested to the
773311
+                                           * server. */
773311
+    struct ovsdb_idl_condition *new_cond; /* Latest condition set by the IDL
773311
+                                           * client. */
773311
 };
773311
 
773311
 struct ovsdb_idl_class {
773311
diff --git a/openvswitch-2.13.0/lib/ovsdb-idl.c b/openvswitch-2.13.0/lib/ovsdb-idl.c
773311
index 2d35179..8eb4213 100644
773311
--- a/openvswitch-2.13.0/lib/ovsdb-idl.c
773311
+++ b/openvswitch-2.13.0/lib/ovsdb-idl.c
773311
@@ -240,6 +240,10 @@ static void ovsdb_idl_send_monitor_request(struct ovsdb_idl *,
773311
                                            struct ovsdb_idl_db *,
773311
                                            enum ovsdb_idl_monitor_method);
773311
 static void ovsdb_idl_db_clear(struct ovsdb_idl_db *db);
773311
+static void ovsdb_idl_db_ack_condition(struct ovsdb_idl_db *db);
773311
+static void ovsdb_idl_db_sync_condition(struct ovsdb_idl_db *db);
773311
+static void ovsdb_idl_condition_move(struct ovsdb_idl_condition **dst,
773311
+                                     struct ovsdb_idl_condition **src);
773311
 
773311
 struct ovsdb_idl {
773311
     struct ovsdb_idl_db server;
773311
@@ -424,9 +428,11 @@ ovsdb_idl_db_init(struct ovsdb_idl_db *db, const struct ovsdb_idl_class *class,
773311
             = table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
773311
             = table->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0;
773311
         table->db = db;
773311
-        ovsdb_idl_condition_init(&table->condition);
773311
-        ovsdb_idl_condition_add_clause_true(&table->condition);
773311
-        table->cond_changed = false;
773311
+        table->ack_cond = NULL;
773311
+        table->req_cond = NULL;
773311
+        table->new_cond = xmalloc(sizeof *table->new_cond);
773311
+        ovsdb_idl_condition_init(table->new_cond);
773311
+        ovsdb_idl_condition_add_clause_true(table->new_cond);
773311
     }
773311
     db->monitor_id = json_array_create_2(json_string_create("monid"),
773311
                                          json_string_create(class->database));
773311
@@ -558,12 +564,15 @@ ovsdb_idl_set_shuffle_remotes(struct ovsdb_idl *idl, bool shuffle)
773311
 static void
773311
 ovsdb_idl_db_destroy(struct ovsdb_idl_db *db)
773311
 {
773311
+    struct ovsdb_idl_condition *null_cond = NULL;
773311
     ovs_assert(!db->txn);
773311
     ovsdb_idl_db_txn_abort_all(db);
773311
     ovsdb_idl_db_clear(db);
773311
     for (size_t i = 0; i < db->class_->n_tables; i++) {
773311
         struct ovsdb_idl_table *table = &db->tables[i];
773311
-        ovsdb_idl_condition_destroy(&table->condition);
773311
+        ovsdb_idl_condition_move(&table->ack_cond, &null_cond);
773311
+        ovsdb_idl_condition_move(&table->req_cond, &null_cond);
773311
+        ovsdb_idl_condition_move(&table->new_cond, &null_cond);
773311
         ovsdb_idl_destroy_indexes(table);
773311
         shash_destroy(&table->columns);
773311
         hmap_destroy(&table->rows);
773311
@@ -692,6 +701,12 @@ ovsdb_idl_send_request(struct ovsdb_idl *idl, struct jsonrpc_msg *request)
773311
 static void
773311
 ovsdb_idl_restart_fsm(struct ovsdb_idl *idl)
773311
 {
773311
+    /* Resync data DB table conditions to avoid missing updates due to
773311
+     * conditions that were in flight or changed locally while the connection
773311
+     * was down.
773311
+     */
773311
+    ovsdb_idl_db_sync_condition(&idl->data);
773311
+
773311
     ovsdb_idl_send_schema_request(idl, &idl->server);
773311
     ovsdb_idl_transition(idl, IDL_S_SERVER_SCHEMA_REQUESTED);
773311
     idl->data.monitoring = OVSDB_IDL_NOT_MONITORING;
773311
@@ -799,7 +814,9 @@ ovsdb_idl_process_response(struct ovsdb_idl *idl, struct jsonrpc_msg *msg)
773311
          * do, it's a "monitor_cond_change", which means that the conditional
773311
          * monitor clauses were updated.
773311
          *
773311
-         * If further condition changes were pending, send them now. */
773311
+         * Mark the last requested conditions as acked and if further
773311
+         * condition changes were pending, send them now. */
773311
+        ovsdb_idl_db_ack_condition(&idl->data);
773311
         ovsdb_idl_send_cond_change(idl);
773311
         idl->data.cond_seqno++;
773311
         break;
773311
@@ -1495,30 +1512,60 @@ ovsdb_idl_condition_equals(const struct ovsdb_idl_condition *a,
773311
 }
773311
 
773311
 static void
773311
-ovsdb_idl_condition_clone(struct ovsdb_idl_condition *dst,
773311
+ovsdb_idl_condition_clone(struct ovsdb_idl_condition **dst,
773311
                           const struct ovsdb_idl_condition *src)
773311
 {
773311
-    ovsdb_idl_condition_init(dst);
773311
+    if (*dst) {
773311
+        ovsdb_idl_condition_destroy(*dst);
773311
+    } else {
773311
+        *dst = xmalloc(sizeof **dst);
773311
+    }
773311
+    ovsdb_idl_condition_init(*dst);
773311
 
773311
-    dst->is_true = src->is_true;
773311
+    (*dst)->is_true = src->is_true;
773311
 
773311
     const struct ovsdb_idl_clause *clause;
773311
     HMAP_FOR_EACH (clause, hmap_node, &src->clauses) {
773311
-        ovsdb_idl_condition_add_clause__(dst, clause, clause->hmap_node.hash);
773311
+        ovsdb_idl_condition_add_clause__(*dst, clause, clause->hmap_node.hash);
773311
     }
773311
 }
773311
 
773311
+static void
773311
+ovsdb_idl_condition_move(struct ovsdb_idl_condition **dst,
773311
+                         struct ovsdb_idl_condition **src)
773311
+{
773311
+    if (*dst) {
773311
+        ovsdb_idl_condition_destroy(*dst);
773311
+        free(*dst);
773311
+    }
773311
+    *dst = *src;
773311
+    *src = NULL;
773311
+}
773311
+
773311
 static unsigned int
773311
 ovsdb_idl_db_set_condition(struct ovsdb_idl_db *db,
773311
                            const struct ovsdb_idl_table_class *tc,
773311
                            const struct ovsdb_idl_condition *condition)
773311
 {
773311
+    struct ovsdb_idl_condition *table_cond;
773311
     struct ovsdb_idl_table *table = ovsdb_idl_db_table_from_class(db, tc);
773311
     unsigned int seqno = db->cond_seqno;
773311
-    if (!ovsdb_idl_condition_equals(condition, &table->condition)) {
773311
-        ovsdb_idl_condition_destroy(&table->condition);
773311
-        ovsdb_idl_condition_clone(&table->condition, condition);
773311
-        db->cond_changed = table->cond_changed = true;
773311
+
773311
+    /* Compare the new condition to the last known condition which can be
773311
+     * either "new" (not sent yet), "requested" or "acked", in this order.
773311
+     */
773311
+    if (table->new_cond) {
773311
+        table_cond = table->new_cond;
773311
+    } else if (table->req_cond) {
773311
+        table_cond = table->req_cond;
773311
+    } else {
773311
+        table_cond = table->ack_cond;
773311
+    }
773311
+    ovs_assert(table_cond);
773311
+
773311
+    if (!ovsdb_idl_condition_equals(condition, table_cond)) {
773311
+        ovsdb_idl_condition_clone(&table->new_cond, condition);
773311
+        db->cond_changed = true;
773311
         poll_immediate_wake();
773311
         return seqno + 1;
773311
     }
773311
@@ -1563,9 +1610,8 @@ ovsdb_idl_condition_to_json(const struct ovsdb_idl_condition *cnd)
773311
 }
773311
 
773311
 static struct json *
773311
-ovsdb_idl_create_cond_change_req(struct ovsdb_idl_table *table)
773311
+ovsdb_idl_create_cond_change_req(const struct ovsdb_idl_condition *cond)
773311
 {
773311
-    const struct ovsdb_idl_condition *cond = &table->condition;
773311
     struct json *monitor_cond_change_request = json_object_create();
773311
     struct json *cond_json = ovsdb_idl_condition_to_json(cond);
773311
 
773311
@@ -1585,8 +1631,12 @@ ovsdb_idl_db_compose_cond_change(struct ovsdb_idl_db *db)
773311
     for (size_t i = 0; i < db->class_->n_tables; i++) {
773311
         struct ovsdb_idl_table *table = &db->tables[i];
773311
 
773311
-        if (table->cond_changed) {
773311
-            struct json *req = ovsdb_idl_create_cond_change_req(table);
773311
+        /* Always use the most recent conditions set by the IDL client when
773311
+         * requesting monitor_cond_change, i.e., table->new_cond.
773311
+         */
773311
+        if (table->new_cond) {
773311
+            struct json *req =
773311
+                ovsdb_idl_create_cond_change_req(table->new_cond);
773311
             if (req) {
773311
                 if (!monitor_cond_change_requests) {
773311
                     monitor_cond_change_requests = json_object_create();
773311
@@ -1595,7 +1645,11 @@ ovsdb_idl_db_compose_cond_change(struct ovsdb_idl_db *db)
773311
                              table->class_->name,
773311
                              json_array_create_1(req));
773311
             }
773311
-            table->cond_changed = false;
773311
+            /* Mark the new condition as requested by moving it to req_cond.
773311
+             * If there's already requested condition that's a bug.
773311
+             */
773311
+            ovs_assert(table->req_cond == NULL);
773311
+            ovsdb_idl_condition_move(&table->req_cond, &table->new_cond);
773311
         }
773311
     }
773311
 
773311
@@ -1610,6 +1664,73 @@ ovsdb_idl_db_compose_cond_change(struct ovsdb_idl_db *db)
773311
     return jsonrpc_create_request("monitor_cond_change", params, NULL);
773311
 }
773311
 
773311
+/* Marks all requested table conditions in 'db' as acked by the server.
773311
+ * It should be called when the server replies to monitor_cond_change
773311
+ * requests.
773311
+ */
773311
+static void
773311
+ovsdb_idl_db_ack_condition(struct ovsdb_idl_db *db)
773311
+{
773311
+    for (size_t i = 0; i < db->class_->n_tables; i++) {
773311
+        struct ovsdb_idl_table *table = &db->tables[i];
773311
+
773311
+        if (table->req_cond) {
773311
+            ovsdb_idl_condition_move(&table->ack_cond, &table->req_cond);
773311
+        }
773311
+    }
773311
+}
773311
+
773311
+/* Should be called when the IDL fsm is restarted and resyncs table conditions
773311
+ * based on the state the DB is in:
773311
+ * - if a non-zero last_id is available for the DB then upon reconnect
773311
+ *   the IDL should first request acked conditions to avoid missing updates
773311
+ *   about records that were added before the transaction with
773311
+ *   txn-id == last_id. If there were requested condition changes in flight
773311
+ *   (i.e., req_cond not NULL) and the IDL client didn't set new conditions
773311
+ *   (i.e., new_cond is NULL) then move req_cond to new_cond to trigger a
773311
+ *   follow up monitor_cond_change request.
773311
+ * - if there's no last_id available for the DB then it's safe to use the
773311
+ *   latest conditions set by the IDL client even if they weren't acked yet.
773311
+ */
773311
+static void
773311
+ovsdb_idl_db_sync_condition(struct ovsdb_idl_db *db)
773311
+{
773311
+    bool ack_all = uuid_is_zero(&db->last_id);
773311
+
773311
+    db->cond_changed = false;
773311
+    for (size_t i = 0; i < db->class_->n_tables; i++) {
773311
+        struct ovsdb_idl_table *table = &db->tables[i];
773311
+
773311
+        /* When monitor_cond_since requests will be issued, the
773311
+         * table->ack_cond condition will be added to the "where" clause".
773311
+         * Follow up monitor_cond_change requests will use table->new_cond.
773311
+         */
773311
+        if (ack_all) {
773311
+            if (table->new_cond) {
773311
+                ovsdb_idl_condition_move(&table->req_cond, &table->new_cond);
773311
+            }
773311
+
773311
+            if (table->req_cond) {
773311
+                ovsdb_idl_condition_move(&table->ack_cond, &table->req_cond);
773311
+            }
773311
+        } else {
773311
+            /* If there was no "unsent" condition but instead a
773311
+             * monitor_cond_change request was in flight, move table->req_cond
773311
+             * to table->new_cond and set db->cond_changed to trigger a new
773311
+             * monitor_cond_change request.
773311
+             *
773311
+             * However, if a new condition has been set by the IDL client,
773311
+             * monitor_cond_change will be sent anyway and will use the most
773311
+             * recent table->new_cond so there's no need to update it here.
773311
+             */
773311
+            if (table->req_cond && !table->new_cond) {
773311
+                ovsdb_idl_condition_move(&table->new_cond, &table->req_cond);
773311
+                db->cond_changed = true;
773311
+            }
773311
+        }
773311
+    }
773311
+}
773311
+
773311
 static void
773311
 ovsdb_idl_send_cond_change(struct ovsdb_idl *idl)
773311
 {
773311
@@ -2064,13 +2185,15 @@ ovsdb_idl_send_monitor_request(struct ovsdb_idl *idl, struct ovsdb_idl_db *db,
773311
             monitor_request = json_object_create();
773311
             json_object_put(monitor_request, "columns", columns);
773311
 
773311
-            const struct ovsdb_idl_condition *cond = &table->condition;
773311
+            /* Always use acked conditions when requesting
773311
+             * monitor_cond/monitor_cond_since.
773311
+             */
773311
+            const struct ovsdb_idl_condition *cond = table->ack_cond;
773311
             if ((monitor_method == OVSDB_IDL_MM_MONITOR_COND ||
773311
                  monitor_method == OVSDB_IDL_MM_MONITOR_COND_SINCE) &&
773311
-                !ovsdb_idl_condition_is_true(cond)) {
773311
+                cond && !ovsdb_idl_condition_is_true(cond)) {
773311
                 json_object_put(monitor_request, "where",
773311
                                 ovsdb_idl_condition_to_json(cond));
773311
-                table->cond_changed = false;
773311
             }
773311
             json_object_put(monitor_requests, tc->name,
773311
                             json_array_create_1(monitor_request));
773311
@@ -2078,8 +2201,6 @@ ovsdb_idl_send_monitor_request(struct ovsdb_idl *idl, struct ovsdb_idl_db *db,
773311
     }
773311
     free_schema(schema);
773311
 
773311
-    db->cond_changed = false;
773311
-
773311
     struct json *params = json_array_create_3(
773311
                               json_string_create(db->class_->database),
773311
                               json_clone(db->monitor_id),
773311
diff --git a/openvswitch-2.13.0/tests/ovsdb-idl.at b/openvswitch-2.13.0/tests/ovsdb-idl.at
773311
index cc38d69..a5ca966 100644
773311
--- a/openvswitch-2.13.0/tests/ovsdb-idl.at
773311
+++ b/openvswitch-2.13.0/tests/ovsdb-idl.at
773311
@@ -1814,3 +1814,59 @@ m4_define([OVSDB_CHECK_IDL_LEADER_ONLY_PY],
773311
 
773311
 OVSDB_CHECK_IDL_LEADER_ONLY_PY([Check Python IDL connects to leader], 3, ['remote'])
773311
 OVSDB_CHECK_IDL_LEADER_ONLY_PY([Check Python IDL reconnects to leader], 3, ['remote' '+remotestop' 'remote'])
773311
+
773311
+# same as OVSDB_CHECK_IDL but uses C IDL implementation with tcp
773311
+# with multiple remotes.
773311
+m4_define([OVSDB_CHECK_CLUSTER_IDL_C],
773311
+  [AT_SETUP([$1 - C - tcp])
773311
+   AT_KEYWORDS([ovsdb server idl positive tcp socket $5])
773311
+   m4_define([LPBK],[127.0.0.1])
773311
+   AT_CHECK([ovsdb_cluster_start_idltest $2 "ptcp:0:"LPBK])
773311
+   PARSE_LISTENING_PORT([s1.log], [TCP_PORT_1])
773311
+   PARSE_LISTENING_PORT([s2.log], [TCP_PORT_2])
773311
+   PARSE_LISTENING_PORT([s3.log], [TCP_PORT_3])
773311
+   remotes=tcp:LPBK:$TCP_PORT_1,tcp:LPBK:$TCP_PORT_2,tcp:LPBK:$TCP_PORT_3
773311
+
773311
+   m4_if([$3], [], [],
773311
+     [AT_CHECK([ovsdb-client transact $remotes $3], [0], [ignore], [ignore])])
773311
+   AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 idl tcp:LPBK:$TCP_PORT_1 $4],
773311
+            [0], [stdout], [ignore])
773311
+   AT_CHECK([sort stdout | uuidfilt]m4_if([$7],,, [[| $7]]),
773311
+            [0], [$5])
773311
+   AT_CLEANUP])
773311
+
773311
+# Checks that monitor_cond_since works fine when disconnects happen
773311
+# with cond_change requests in flight (i.e., IDL is properly updated).
773311
+OVSDB_CHECK_CLUSTER_IDL_C([simple idl, monitor_cond_since, cluster disconnect],
773311
+  3,
773311
+  [['["idltest",
773311
+       {"op": "insert",
773311
+       "table": "simple",
773311
+       "row": {"i": 1,
773311
+               "r": 1.0,
773311
+               "b": true}},
773311
+       {"op": "insert",
773311
+       "table": "simple",
773311
+       "row": {"i": 2,
773311
+               "r": 1.0,
773311
+               "b": true}}]']],
773311
+  [['condition simple []' \
773311
+    'condition simple [["i","==",2]]' \
773311
+    'condition simple [["i","==",1]]' \
773311
+    '+reconnect' \
773311
+    '["idltest",
773311
+      {"op": "update",
773311
+       "table": "simple",
773311
+       "where": [["i", "==", 1]],
773311
+       "row": {"r": 2.0 }}]']],
773311
+  [[000: change conditions
773311
+001: empty
773311
+002: change conditions
773311
+003: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1>
773311
+004: change conditions
773311
+005: reconnect
773311
+006: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1>
773311
+007: {"error":null,"result":[{"count":1}]}
773311
+008: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2>
773311
+009: done
773311
+]])
773311
-- 
773311
1.8.3.1
773311