Blob Blame History Raw
From 966dd7bc7fb7931d7616ca886dc24ecb1f34cced Mon Sep 17 00:00:00 2001
From: Dumitru Ceara <>
Date: Thu, 28 May 2020 14:32:31 +0200
Subject: [PATCH] ovsdb-idl: Avoid inconsistent IDL state with

Assuming an ovsdb client connected to a database using OVSDB_MONITOR_V3
(i.e., "monitor_cond_since" method) with the initial monitor condition

Assuming the following two transactions are executed on the
TXN1: "insert record R1 in table T1"
TXN2: "insert record R2 in table T2"

If the client's monitor condition MC1 for table T2 matches R2 then the
client will receive the following update3 message:
method="update3", "insert record R2 in table T2", last-txn-id=TXN2

At this point, if the presence of the new record R2 in the IDL triggers
the client to update its monitor condition to MC2 and add a clause for
table T1 which matches R1, a monitor_cond_change message is sent to the
method="monitor_cond_change", "clauses from MC2"

In normal operation the ovsdb-server will reply with a new update3
message of the form:
method="update3", "insert record R1 in table T1", last-txn-id=TXN2

However, if the connection drops in the meantime, this last update might
get lost.

It might happen that during the reconnect a new transaction happens
that modifies the original record R1:
TXN3: "modify record R1 in table T1"

When the client reconnects, it will try to perform a fast resync by
method="monitor_cond_since", "clauses from MC2", last-txn-id=TXN2

Because TXN2 is still in the ovsdb-server transaction history, the
server replies with the changes from the most recent transactions only,
i.e., TXN3:
result="true", last-txbb-id=TXN3, "modify record R1 in table T1"

This causes the IDL on the client in to end up in an inconsistent
state because it has never seen the update that created R1.

Such a scenario is described in:

To avoid this issue, the IDL will now maintain (up to) 3 different
types of conditions for each DB table:
- new_cond: condition that has been set by the IDL client but has
  not yet been sent to the server through monitor_cond_change.
- req_cond: condition that has been sent to the server but the reply
  acknowledging the change hasn't been received yet.
- ack_cond: condition that has been acknowledged by the server.

Whenever the IDL FSM is restarted (e.g., voluntary or involuntary
- if there is a known last_id txn-id the code ensures that new_cond
  will contain the most recent condition set by the IDL client
  (either req_cond if there was a request in flight, or new_cond
  if the IDL client set a condition while the IDL was disconnected)
- if there is no known last_id txn-id the code ensures that ack_cond will
  contain the most recent conditions set by the IDL client regardless
  whether they were acked by the server or not.

When monitor_cond_since/monitor_cond requests are sent they will
always include ack_cond and if new_cond is not NULL a follow up
monitor_cond_change will be generated afterwards.

On the other hand ovsdb_idl_db_set_condition() will always modify new_cond.

This ensures that updates of type "insert" that happened before the last
transaction known by the IDL but didn't match old monitor conditions are
sent upon reconnect if the monitor condition has changed to include them
in the meantime.

Fixes: 403a6a0cb003 ("ovsdb-idl: Fast resync from server when connection reset.")
Signed-off-by: Dumitru Ceara <>
Acked-by: Han Zhou <>
Signed-off-by: Ilya Maximets <>
(cherry picked from upstream OVS commit ae25f8c8fff80a58cd0a15e2d3ae7ab1b4994e48)

Change-Id: I4f3cd43cf69dfe76eb65c9709b759e5062c29e89
 openvswitch-2.13.0/lib/ovsdb-idl-provider.h |   8 +-
 openvswitch-2.13.0/lib/ovsdb-idl.c          | 167 ++++++++++++++++++++++++----
 openvswitch-2.13.0/tests/       |  56 ++++++++++
 3 files changed, 206 insertions(+), 25 deletions(-)

diff --git a/openvswitch-2.13.0/lib/ovsdb-idl-provider.h b/openvswitch-2.13.0/lib/ovsdb-idl-provider.h
index 30d1d08..00497d9 100644
--- a/openvswitch-2.13.0/lib/ovsdb-idl-provider.h
+++ b/openvswitch-2.13.0/lib/ovsdb-idl-provider.h
@@ -122,8 +122,12 @@ struct ovsdb_idl_table {
     unsigned int change_seqno[OVSDB_IDL_CHANGE_MAX];
     struct ovs_list indexes;    /* Contains "struct ovsdb_idl_index"s */
     struct ovs_list track_list; /* Tracked rows (ovsdb_idl_row.track_node). */
-    struct ovsdb_idl_condition condition;
-    bool cond_changed;
+    struct ovsdb_idl_condition *ack_cond; /* Last condition acked by the
+                                           * server. */
+    struct ovsdb_idl_condition *req_cond; /* Last condition requested to the
+                                           * server. */
+    struct ovsdb_idl_condition *new_cond; /* Latest condition set by the IDL
+                                           * client. */
 struct ovsdb_idl_class {
diff --git a/openvswitch-2.13.0/lib/ovsdb-idl.c b/openvswitch-2.13.0/lib/ovsdb-idl.c
index 2d35179..8eb4213 100644
--- a/openvswitch-2.13.0/lib/ovsdb-idl.c
+++ b/openvswitch-2.13.0/lib/ovsdb-idl.c
@@ -240,6 +240,10 @@ static void ovsdb_idl_send_monitor_request(struct ovsdb_idl *,
                                            struct ovsdb_idl_db *,
                                            enum ovsdb_idl_monitor_method);
 static void ovsdb_idl_db_clear(struct ovsdb_idl_db *db);
+static void ovsdb_idl_db_ack_condition(struct ovsdb_idl_db *db);
+static void ovsdb_idl_db_sync_condition(struct ovsdb_idl_db *db);
+static void ovsdb_idl_condition_move(struct ovsdb_idl_condition **dst,
+                                     struct ovsdb_idl_condition **src);
 struct ovsdb_idl {
     struct ovsdb_idl_db server;
@@ -424,9 +428,11 @@ ovsdb_idl_db_init(struct ovsdb_idl_db *db, const struct ovsdb_idl_class *class,
             = table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
             = table->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0;
         table->db = db;
-        ovsdb_idl_condition_init(&table->condition);
-        ovsdb_idl_condition_add_clause_true(&table->condition);
-        table->cond_changed = false;
+        table->ack_cond = NULL;
+        table->req_cond = NULL;
+        table->new_cond = xmalloc(sizeof *table->new_cond);
+        ovsdb_idl_condition_init(table->new_cond);
+        ovsdb_idl_condition_add_clause_true(table->new_cond);
     db->monitor_id = json_array_create_2(json_string_create("monid"),
@@ -558,12 +564,15 @@ ovsdb_idl_set_shuffle_remotes(struct ovsdb_idl *idl, bool shuffle)
 static void
 ovsdb_idl_db_destroy(struct ovsdb_idl_db *db)
+    struct ovsdb_idl_condition *null_cond = NULL;
     for (size_t i = 0; i < db->class_->n_tables; i++) {
         struct ovsdb_idl_table *table = &db->tables[i];
-        ovsdb_idl_condition_destroy(&table->condition);
+        ovsdb_idl_condition_move(&table->ack_cond, &null_cond);
+        ovsdb_idl_condition_move(&table->req_cond, &null_cond);
+        ovsdb_idl_condition_move(&table->new_cond, &null_cond);
@@ -692,6 +701,12 @@ ovsdb_idl_send_request(struct ovsdb_idl *idl, struct jsonrpc_msg *request)
 static void
 ovsdb_idl_restart_fsm(struct ovsdb_idl *idl)
+    /* Resync data DB table conditions to avoid missing updates due to
+     * conditions that were in flight or changed locally while the connection
+     * was down.
+     */
+    ovsdb_idl_db_sync_condition(&idl->data);
     ovsdb_idl_send_schema_request(idl, &idl->server);
     ovsdb_idl_transition(idl, IDL_S_SERVER_SCHEMA_REQUESTED);
     idl->data.monitoring = OVSDB_IDL_NOT_MONITORING;
@@ -799,7 +814,9 @@ ovsdb_idl_process_response(struct ovsdb_idl *idl, struct jsonrpc_msg *msg)
          * do, it's a "monitor_cond_change", which means that the conditional
          * monitor clauses were updated.
-         * If further condition changes were pending, send them now. */
+         * Mark the last requested conditions as acked and if further
+         * condition changes were pending, send them now. */
+        ovsdb_idl_db_ack_condition(&idl->data);
@@ -1495,30 +1512,60 @@ ovsdb_idl_condition_equals(const struct ovsdb_idl_condition *a,
 static void
-ovsdb_idl_condition_clone(struct ovsdb_idl_condition *dst,
+ovsdb_idl_condition_clone(struct ovsdb_idl_condition **dst,
                           const struct ovsdb_idl_condition *src)
-    ovsdb_idl_condition_init(dst);
+    if (*dst) {
+        ovsdb_idl_condition_destroy(*dst);
+    } else {
+        *dst = xmalloc(sizeof **dst);
+    }
+    ovsdb_idl_condition_init(*dst);
-    dst->is_true = src->is_true;
+    (*dst)->is_true = src->is_true;
     const struct ovsdb_idl_clause *clause;
     HMAP_FOR_EACH (clause, hmap_node, &src->clauses) {
-        ovsdb_idl_condition_add_clause__(dst, clause, clause->hmap_node.hash);
+        ovsdb_idl_condition_add_clause__(*dst, clause, clause->hmap_node.hash);
+static void
+ovsdb_idl_condition_move(struct ovsdb_idl_condition **dst,
+                         struct ovsdb_idl_condition **src)
+    if (*dst) {
+        ovsdb_idl_condition_destroy(*dst);
+        free(*dst);
+    }
+    *dst = *src;
+    *src = NULL;
 static unsigned int
 ovsdb_idl_db_set_condition(struct ovsdb_idl_db *db,
                            const struct ovsdb_idl_table_class *tc,
                            const struct ovsdb_idl_condition *condition)
+    struct ovsdb_idl_condition *table_cond;
     struct ovsdb_idl_table *table = ovsdb_idl_db_table_from_class(db, tc);
     unsigned int seqno = db->cond_seqno;
-    if (!ovsdb_idl_condition_equals(condition, &table->condition)) {
-        ovsdb_idl_condition_destroy(&table->condition);
-        ovsdb_idl_condition_clone(&table->condition, condition);
-        db->cond_changed = table->cond_changed = true;
+    /* Compare the new condition to the last known condition which can be
+     * either "new" (not sent yet), "requested" or "acked", in this order.
+     */
+    if (table->new_cond) {
+        table_cond = table->new_cond;
+    } else if (table->req_cond) {
+        table_cond = table->req_cond;
+    } else {
+        table_cond = table->ack_cond;
+    }
+    ovs_assert(table_cond);
+    if (!ovsdb_idl_condition_equals(condition, table_cond)) {
+        ovsdb_idl_condition_clone(&table->new_cond, condition);
+        db->cond_changed = true;
         return seqno + 1;
@@ -1563,9 +1610,8 @@ ovsdb_idl_condition_to_json(const struct ovsdb_idl_condition *cnd)
 static struct json *
-ovsdb_idl_create_cond_change_req(struct ovsdb_idl_table *table)
+ovsdb_idl_create_cond_change_req(const struct ovsdb_idl_condition *cond)
-    const struct ovsdb_idl_condition *cond = &table->condition;
     struct json *monitor_cond_change_request = json_object_create();
     struct json *cond_json = ovsdb_idl_condition_to_json(cond);
@@ -1585,8 +1631,12 @@ ovsdb_idl_db_compose_cond_change(struct ovsdb_idl_db *db)
     for (size_t i = 0; i < db->class_->n_tables; i++) {
         struct ovsdb_idl_table *table = &db->tables[i];
-        if (table->cond_changed) {
-            struct json *req = ovsdb_idl_create_cond_change_req(table);
+        /* Always use the most recent conditions set by the IDL client when
+         * requesting monitor_cond_change, i.e., table->new_cond.
+         */
+        if (table->new_cond) {
+            struct json *req =
+                ovsdb_idl_create_cond_change_req(table->new_cond);
             if (req) {
                 if (!monitor_cond_change_requests) {
                     monitor_cond_change_requests = json_object_create();
@@ -1595,7 +1645,11 @@ ovsdb_idl_db_compose_cond_change(struct ovsdb_idl_db *db)
-            table->cond_changed = false;
+            /* Mark the new condition as requested by moving it to req_cond.
+             * If there's already requested condition that's a bug.
+             */
+            ovs_assert(table->req_cond == NULL);
+            ovsdb_idl_condition_move(&table->req_cond, &table->new_cond);
@@ -1610,6 +1664,73 @@ ovsdb_idl_db_compose_cond_change(struct ovsdb_idl_db *db)
     return jsonrpc_create_request("monitor_cond_change", params, NULL);
+/* Marks all requested table conditions in 'db' as acked by the server.
+ * It should be called when the server replies to monitor_cond_change
+ * requests.
+ */
+static void
+ovsdb_idl_db_ack_condition(struct ovsdb_idl_db *db)
+    for (size_t i = 0; i < db->class_->n_tables; i++) {
+        struct ovsdb_idl_table *table = &db->tables[i];
+        if (table->req_cond) {
+            ovsdb_idl_condition_move(&table->ack_cond, &table->req_cond);
+        }
+    }
+/* Should be called when the IDL fsm is restarted and resyncs table conditions
+ * based on the state the DB is in:
+ * - if a non-zero last_id is available for the DB then upon reconnect
+ *   the IDL should first request acked conditions to avoid missing updates
+ *   about records that were added before the transaction with
+ *   txn-id == last_id. If there were requested condition changes in flight
+ *   (i.e., req_cond not NULL) and the IDL client didn't set new conditions
+ *   (i.e., new_cond is NULL) then move req_cond to new_cond to trigger a
+ *   follow up monitor_cond_change request.
+ * - if there's no last_id available for the DB then it's safe to use the
+ *   latest conditions set by the IDL client even if they weren't acked yet.
+ */
+static void
+ovsdb_idl_db_sync_condition(struct ovsdb_idl_db *db)
+    bool ack_all = uuid_is_zero(&db->last_id);
+    db->cond_changed = false;
+    for (size_t i = 0; i < db->class_->n_tables; i++) {
+        struct ovsdb_idl_table *table = &db->tables[i];
+        /* When monitor_cond_since requests will be issued, the
+         * table->ack_cond condition will be added to the "where" clause".
+         * Follow up monitor_cond_change requests will use table->new_cond.
+         */
+        if (ack_all) {
+            if (table->new_cond) {
+                ovsdb_idl_condition_move(&table->req_cond, &table->new_cond);
+            }
+            if (table->req_cond) {
+                ovsdb_idl_condition_move(&table->ack_cond, &table->req_cond);
+            }
+        } else {
+            /* If there was no "unsent" condition but instead a
+             * monitor_cond_change request was in flight, move table->req_cond
+             * to table->new_cond and set db->cond_changed to trigger a new
+             * monitor_cond_change request.
+             *
+             * However, if a new condition has been set by the IDL client,
+             * monitor_cond_change will be sent anyway and will use the most
+             * recent table->new_cond so there's no need to update it here.
+             */
+            if (table->req_cond && !table->new_cond) {
+                ovsdb_idl_condition_move(&table->new_cond, &table->req_cond);
+                db->cond_changed = true;
+            }
+        }
+    }
 static void
 ovsdb_idl_send_cond_change(struct ovsdb_idl *idl)
@@ -2064,13 +2185,15 @@ ovsdb_idl_send_monitor_request(struct ovsdb_idl *idl, struct ovsdb_idl_db *db,
             monitor_request = json_object_create();
             json_object_put(monitor_request, "columns", columns);
-            const struct ovsdb_idl_condition *cond = &table->condition;
+            /* Always use acked conditions when requesting
+             * monitor_cond/monitor_cond_since.
+             */
+            const struct ovsdb_idl_condition *cond = table->ack_cond;
             if ((monitor_method == OVSDB_IDL_MM_MONITOR_COND ||
                  monitor_method == OVSDB_IDL_MM_MONITOR_COND_SINCE) &&
-                !ovsdb_idl_condition_is_true(cond)) {
+                cond && !ovsdb_idl_condition_is_true(cond)) {
                 json_object_put(monitor_request, "where",
-                table->cond_changed = false;
             json_object_put(monitor_requests, tc->name,
@@ -2078,8 +2201,6 @@ ovsdb_idl_send_monitor_request(struct ovsdb_idl *idl, struct ovsdb_idl_db *db,
-    db->cond_changed = false;
     struct json *params = json_array_create_3(
diff --git a/openvswitch-2.13.0/tests/ b/openvswitch-2.13.0/tests/
index cc38d69..a5ca966 100644
--- a/openvswitch-2.13.0/tests/
+++ b/openvswitch-2.13.0/tests/
@@ -1814,3 +1814,59 @@ m4_define([OVSDB_CHECK_IDL_LEADER_ONLY_PY],
 OVSDB_CHECK_IDL_LEADER_ONLY_PY([Check Python IDL connects to leader], 3, ['remote'])
 OVSDB_CHECK_IDL_LEADER_ONLY_PY([Check Python IDL reconnects to leader], 3, ['remote' '+remotestop' 'remote'])
+# same as OVSDB_CHECK_IDL but uses C IDL implementation with tcp
+# with multiple remotes.
+  [AT_SETUP([$1 - C - tcp])
+   AT_KEYWORDS([ovsdb server idl positive tcp socket $5])
+   m4_define([LPBK],[])
+   AT_CHECK([ovsdb_cluster_start_idltest $2 "ptcp:0:"LPBK])
+   remotes=tcp:LPBK:$TCP_PORT_1,tcp:LPBK:$TCP_PORT_2,tcp:LPBK:$TCP_PORT_3
+   m4_if([$3], [], [],
+     [AT_CHECK([ovsdb-client transact $remotes $3], [0], [ignore], [ignore])])
+   AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 idl tcp:LPBK:$TCP_PORT_1 $4],
+            [0], [stdout], [ignore])
+   AT_CHECK([sort stdout | uuidfilt]m4_if([$7],,, [[| $7]]),
+            [0], [$5])
+# Checks that monitor_cond_since works fine when disconnects happen
+# with cond_change requests in flight (i.e., IDL is properly updated).
+OVSDB_CHECK_CLUSTER_IDL_C([simple idl, monitor_cond_since, cluster disconnect],
+  3,
+  [['["idltest",
+       {"op": "insert",
+       "table": "simple",
+       "row": {"i": 1,
+               "r": 1.0,
+               "b": true}},
+       {"op": "insert",
+       "table": "simple",
+       "row": {"i": 2,
+               "r": 1.0,
+               "b": true}}]']],
+  [['condition simple []' \
+    'condition simple [["i","==",2]]' \
+    'condition simple [["i","==",1]]' \
+    '+reconnect' \
+    '["idltest",
+      {"op": "update",
+       "table": "simple",
+       "where": [["i", "==", 1]],
+       "row": {"r": 2.0 }}]']],
+  [[000: change conditions
+001: empty
+002: change conditions
+003: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1>
+004: change conditions
+005: reconnect
+006: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1>
+007: {"error":null,"result":[{"count":1}]}
+008: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2>
+009: done