diff --git a/SOURCES/openvswitch-2.16.0.patch b/SOURCES/openvswitch-2.16.0.patch index e3a3241..34ef187 100644 --- a/SOURCES/openvswitch-2.16.0.patch +++ b/SOURCES/openvswitch-2.16.0.patch @@ -4321,10 +4321,38 @@ index a69e37e5c2..48c6df511f 100644 /* On disk data serialization and deserialization. */ diff --git a/ovsdb/raft.c b/ovsdb/raft.c -index 2fb5156519..1a3447a8dd 100644 +index 2fb5156519..855404808c 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c -@@ -494,11 +494,11 @@ raft_create_cluster(const char *file_name, const char *name, +@@ -74,6 +74,7 @@ enum raft_failure_test { + FT_CRASH_BEFORE_SEND_EXEC_REQ, + FT_CRASH_AFTER_SEND_EXEC_REQ, + FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE, ++ FT_CRASH_BEFORE_SEND_SNAPSHOT_REP, + FT_DELAY_ELECTION, + FT_DONT_SEND_VOTE_REQUEST, + FT_STOP_RAFT_RPC, +@@ -379,12 +380,19 @@ static bool raft_handle_write_error(struct raft *, struct ovsdb_error *); + static void raft_run_reconfigure(struct raft *); + + static void raft_set_leader(struct raft *, const struct uuid *sid); ++ + static struct raft_server * + raft_find_server(const struct raft *raft, const struct uuid *sid) + { + return raft_server_find(&raft->servers, sid); + } + ++static struct raft_server * ++raft_find_new_server(struct raft *raft, const struct uuid *uuid) ++{ ++ return raft_server_find(&raft->add_servers, uuid); ++} ++ + static char * + raft_make_address_passive(const char *address_) + { +@@ -494,11 +502,11 @@ raft_create_cluster(const char *file_name, const char *name, .snap_index = index++, .snap = { .term = term, @@ -4337,7 +4365,7 @@ index 2fb5156519..1a3447a8dd 100644 shash_add_nocopy(json_object(h.snap.servers), xasprintf(UUID_FMT, UUID_ARGS(&h.sid)), json_string_create(local_address)); -@@ -727,10 +727,10 @@ raft_add_entry(struct raft *raft, +@@ -727,10 +735,10 @@ raft_add_entry(struct raft *raft, uint64_t index = raft->log_end++; struct raft_entry *entry = &raft->entries[index - raft->log_start]; entry->term = term; @@ -4349,7 +4377,7 @@ index 2fb5156519..1a3447a8dd 100644 return index; } -@@ -741,13 +741,16 @@ raft_write_entry(struct raft *raft, uint64_t term, struct json *data, +@@ -741,13 +749,16 @@ raft_write_entry(struct raft *raft, uint64_t term, struct json *data, const struct uuid *eid, struct json *servers, uint64_t election_timer) { @@ -4369,7 +4397,48 @@ index 2fb5156519..1a3447a8dd 100644 .servers = servers, .election_timer = election_timer, .eid = eid ? *eid : UUID_ZERO, -@@ -2161,7 +2164,7 @@ raft_get_eid(const struct raft *raft, uint64_t index) +@@ -1864,6 +1875,8 @@ raft_open_conn(struct raft *raft, const char *address, const struct uuid *sid) + static void + raft_conn_close(struct raft_conn *conn) + { ++ VLOG_DBG("closing connection to server %s (%s)", ++ conn->nickname, jsonrpc_session_get_name(conn->js)); + jsonrpc_session_close(conn->js); + ovs_list_remove(&conn->list_node); + free(conn->nickname); +@@ -1954,16 +1967,30 @@ raft_run(struct raft *raft) + } + + /* Close unneeded sessions. */ ++ struct raft_server *server; + struct raft_conn *next; + LIST_FOR_EACH_SAFE (conn, next, list_node, &raft->conns) { + if (!raft_conn_should_stay_open(raft, conn)) { ++ server = raft_find_new_server(raft, &conn->sid); ++ if (server) { ++ /* We only have one incoming connection from joining servers, ++ * so if it's closed, we need to destroy the record about the ++ * server. This way the process can be started over on the ++ * next join request. */ ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); ++ VLOG_INFO_RL(&rl, "cluster "CID_FMT": server %s (%s) " ++ "disconnected while joining", ++ CID_ARGS(&raft->cid), ++ server->nickname, server->address); ++ hmap_remove(&raft->add_servers, &server->hmap_node); ++ raft_server_destroy(server); ++ } + raft->n_disconnections++; + raft_conn_close(conn); + } + } + + /* Open needed sessions. */ +- struct raft_server *server; + HMAP_FOR_EACH (server, hmap_node, &raft->servers) { + raft_open_conn(raft, server->address, &server->sid); + } +@@ -2161,7 +2188,7 @@ raft_get_eid(const struct raft *raft, uint64_t index) { for (; index >= raft->log_start; index--) { const struct raft_entry *e = raft_get_entry(raft, index); @@ -4378,7 +4447,7 @@ index 2fb5156519..1a3447a8dd 100644 return &e->eid; } } -@@ -2826,8 +2829,8 @@ raft_truncate(struct raft *raft, uint64_t new_end) +@@ -2826,8 +2853,8 @@ raft_truncate(struct raft *raft, uint64_t new_end) return servers_changed; } @@ -4389,7 +4458,7 @@ index 2fb5156519..1a3447a8dd 100644 { /* Invariant: log_start - 2 <= last_applied <= commit_index < log_end. */ ovs_assert(raft->log_start <= raft->last_applied + 2); -@@ -2839,32 +2842,20 @@ raft_peek_next_entry(struct raft *raft, struct uuid *eid) +@@ -2839,32 +2866,20 @@ raft_peek_next_entry(struct raft *raft, struct uuid *eid) } if (raft->log_start == raft->last_applied + 2) { @@ -4425,7 +4494,7 @@ index 2fb5156519..1a3447a8dd 100644 /* Updates commit index in raft log. If commit index is already up-to-date * it does nothing and return false, otherwise, returns true. */ static bool -@@ -2878,7 +2869,7 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index) +@@ -2878,7 +2893,7 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index) while (raft->commit_index < new_commit_index) { uint64_t index = ++raft->commit_index; const struct raft_entry *e = raft_get_entry(raft, index); @@ -4434,7 +4503,7 @@ index 2fb5156519..1a3447a8dd 100644 struct raft_command *cmd = raft_find_command_by_eid(raft, &e->eid); if (cmd) { -@@ -3059,7 +3050,9 @@ raft_handle_append_entries(struct raft *raft, +@@ -3059,7 +3074,9 @@ raft_handle_append_entries(struct raft *raft, for (; i < n_entries; i++) { const struct raft_entry *e = &entries[i]; error = raft_write_entry(raft, e->term, @@ -4445,7 +4514,7 @@ index 2fb5156519..1a3447a8dd 100644 json_nullable_clone(e->servers), e->election_timer); if (error) { -@@ -3314,20 +3307,29 @@ bool +@@ -3314,20 +3331,29 @@ bool raft_has_next_entry(const struct raft *raft_) { struct raft *raft = CONST_CAST(struct raft *, raft_); @@ -4485,7 +4554,20 @@ index 2fb5156519..1a3447a8dd 100644 } /* Returns the log index of the last-read snapshot or log entry. */ -@@ -3420,6 +3422,7 @@ raft_send_install_snapshot_request(struct raft *raft, +@@ -3352,12 +3378,6 @@ raft_find_peer(struct raft *raft, const struct uuid *uuid) + return s && !uuid_equals(&raft->sid, &s->sid) ? s : NULL; + } + +-static struct raft_server * +-raft_find_new_server(struct raft *raft, const struct uuid *uuid) +-{ +- return raft_server_find(&raft->add_servers, uuid); +-} +- + /* Figure 3.1: "If there exists an N such that N > commitIndex, a + * majority of matchIndex[i] >= N, and log[N].term == currentTerm, set + * commitIndex = N (sections 3.5 and 3.6)." */ +@@ -3420,6 +3440,7 @@ raft_send_install_snapshot_request(struct raft *raft, const struct raft_server *s, const char *comment) { @@ -4493,7 +4575,7 @@ index 2fb5156519..1a3447a8dd 100644 union raft_rpc rpc = { .install_snapshot_request = { .common = { -@@ -3432,7 +3435,7 @@ raft_send_install_snapshot_request(struct raft *raft, +@@ -3432,7 +3453,7 @@ raft_send_install_snapshot_request(struct raft *raft, .last_term = raft->snap.term, .last_servers = raft->snap.servers, .last_eid = raft->snap.eid, @@ -4502,7 +4584,7 @@ index 2fb5156519..1a3447a8dd 100644 .election_timer = raft->election_timer, /* use latest value */ } }; -@@ -3980,6 +3983,10 @@ raft_write_snapshot(struct raft *raft, struct ovsdb_log *log, +@@ -3980,6 +4001,10 @@ raft_write_snapshot(struct raft *raft, struct ovsdb_log *log, uint64_t new_log_start, const struct raft_entry *new_snapshot) { @@ -4513,7 +4595,7 @@ index 2fb5156519..1a3447a8dd 100644 struct raft_header h = { .sid = raft->sid, .cid = raft->cid, -@@ -3998,12 +4005,13 @@ raft_write_snapshot(struct raft *raft, struct ovsdb_log *log, +@@ -3998,12 +4023,13 @@ raft_write_snapshot(struct raft *raft, struct ovsdb_log *log, /* Write log records. */ for (uint64_t index = new_log_start; index < raft->log_end; index++) { const struct raft_entry *e = &raft->entries[index - raft->log_start]; @@ -4528,7 +4610,7 @@ index 2fb5156519..1a3447a8dd 100644 .servers = e->servers, .election_timer = e->election_timer, .eid = e->eid, -@@ -4093,19 +4101,21 @@ raft_handle_install_snapshot_request__( +@@ -4093,19 +4119,21 @@ raft_handle_install_snapshot_request__( /* Case 3: The new snapshot starts past the end of our current log, so * discard all of our current log. */ @@ -4553,7 +4635,7 @@ index 2fb5156519..1a3447a8dd 100644 return false; } -@@ -4120,7 +4130,7 @@ raft_handle_install_snapshot_request__( +@@ -4120,7 +4148,7 @@ raft_handle_install_snapshot_request__( } raft_entry_uninit(&raft->snap); @@ -4562,7 +4644,18 @@ index 2fb5156519..1a3447a8dd 100644 raft_get_servers_from_log(raft, VLL_INFO); raft_get_election_timer_from_log(raft); -@@ -4216,7 +4226,7 @@ raft_may_snapshot(const struct raft *raft) +@@ -4132,6 +4160,10 @@ static void + raft_handle_install_snapshot_request( + struct raft *raft, const struct raft_install_snapshot_request *rq) + { ++ if (failure_test == FT_CRASH_BEFORE_SEND_SNAPSHOT_REP) { ++ ovs_fatal(0, "Raft test: crash before sending install_snapshot_reply"); ++ } ++ + if (raft_handle_install_snapshot_request__(raft, rq)) { + union raft_rpc rpy = { + .install_snapshot_reply = { +@@ -4216,7 +4248,7 @@ raft_may_snapshot(const struct raft *raft) && !raft->leaving && !raft->left && !raft->failed @@ -4571,7 +4664,7 @@ index 2fb5156519..1a3447a8dd 100644 && raft->last_applied >= raft->log_start); } -@@ -4265,11 +4275,12 @@ raft_store_snapshot(struct raft *raft, const struct json *new_snapshot_data) +@@ -4265,11 +4297,12 @@ raft_store_snapshot(struct raft *raft, const struct json *new_snapshot_data) uint64_t new_log_start = raft->last_applied + 1; struct raft_entry new_snapshot = { .term = raft_get_term(raft, new_log_start - 1), @@ -4585,7 +4678,7 @@ index 2fb5156519..1a3447a8dd 100644 struct ovsdb_error *error = raft_save_snapshot(raft, new_log_start, &new_snapshot); if (error) { -@@ -4286,6 +4297,9 @@ raft_store_snapshot(struct raft *raft, const struct json *new_snapshot_data) +@@ -4286,6 +4319,9 @@ raft_store_snapshot(struct raft *raft, const struct json *new_snapshot_data) memmove(&raft->entries[0], &raft->entries[new_log_start - raft->log_start], (raft->log_end - new_log_start) * sizeof *raft->entries); raft->log_start = new_log_start; @@ -4595,6 +4688,15 @@ index 2fb5156519..1a3447a8dd 100644 return NULL; } +@@ -4926,6 +4962,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, + failure_test = FT_CRASH_AFTER_SEND_EXEC_REQ; + } else if (!strcmp(test, "crash-after-receiving-append-request-update")) { + failure_test = FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE; ++ } else if (!strcmp(test, "crash-before-sending-install-snapshot-reply")) { ++ failure_test = FT_CRASH_BEFORE_SEND_SNAPSHOT_REP; + } else if (!strcmp(test, "delay-election")) { + failure_test = FT_DELAY_ELECTION; + struct raft *raft; diff --git a/ovsdb/raft.h b/ovsdb/raft.h index 3545c41c2c..599bc0ae86 100644 --- a/ovsdb/raft.h @@ -6171,6 +6273,72 @@ index 604f15c2d1..c93cb9f16c 100644 + +OVS_VSWITCHD_STOP(["/Flow exceeded the maximum flow statistics reply size and was excluded from the response set/d"]) +AT_CLEANUP +diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at +index fc6253cfe9..07af1160fc 100644 +--- a/tests/ovsdb-cluster.at ++++ b/tests/ovsdb-cluster.at +@@ -400,6 +400,61 @@ done + + AT_CLEANUP + ++AT_BANNER([OVSDB - cluster failure while joining]) ++AT_SETUP([OVSDB cluster - follower crash while joining]) ++AT_KEYWORDS([ovsdb server negative unix cluster join]) ++ ++n=3 ++schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` ++ordinal_schema > schema ++AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db dnl ++ $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) ++cid=`ovsdb-tool db-cid s1.db` ++schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` ++for i in `seq 2 $n`; do ++ AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) ++done ++ ++on_exit 'kill `cat *.pid`' ++ ++dnl Starting followers first, so we can configure them to crash on join. ++for j in `seq $n`; do ++ i=$(($n + 1 - $j)) ++ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off dnl ++ --detach --no-chdir --log-file=s$i.log dnl ++ --pidfile=s$i.pid --unixctl=s$i dnl ++ --remote=punix:s$i.ovsdb s$i.db]) ++ if test $i != 1; then ++ OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s$i dnl ++ cluster/failure-test crash-before-sending-install-snapshot-reply dnl ++ | grep -q "engaged"]) ++ fi ++done ++ ++dnl Make sure that followers really crashed. ++for i in `seq 2 $n`; do ++ OVS_WAIT_WHILE([test -s s$i.pid]) ++done ++ ++dnl Bring them back. ++for i in `seq 2 $n`; do ++ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off dnl ++ --detach --no-chdir --log-file=s$i.log dnl ++ --pidfile=s$i.pid --unixctl=s$i dnl ++ --remote=punix:s$i.ovsdb s$i.db]) ++done ++ ++dnl Make sure that all servers joined the cluster. ++for i in `seq $n`; do ++ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) ++done ++ ++for i in `seq $n`; do ++ OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) ++done ++ ++AT_CLEANUP ++ + + + OVS_START_SHELL_HELPERS diff --git a/tests/ovsdb-data.at b/tests/ovsdb-data.at index 8cd2a26cb3..25c6acdac6 100644 --- a/tests/ovsdb-data.at diff --git a/SPECS/openvswitch2.16.spec b/SPECS/openvswitch2.16.spec index c12bd43..2cca28c 100644 --- a/SPECS/openvswitch2.16.spec +++ b/SPECS/openvswitch2.16.spec @@ -57,7 +57,7 @@ Summary: Open vSwitch Group: System Environment/Daemons daemon/database/utilities URL: http://www.openvswitch.org/ Version: 2.16.0 -Release: 56%{?dist} +Release: 57%{?dist} # Nearly all of openvswitch is ASL 2.0. The bugtool is LGPLv2+, and the # lib/sflow*.[ch] files are SISSL @@ -699,6 +699,12 @@ exit 0 %endif %changelog +* Fri Feb 25 2022 Open vSwitch CI - 2.16.0-57 +- Merging upstream branch-2.16 [RH git: 897937f6d3] + Commit list: + 9598f0529c ovsdb: raft: Fix inability to join the cluster after interrupted attempt. (#2033514) + + * Fri Feb 25 2022 Open vSwitch CI - 2.16.0-56 - Merging upstream branch-2.16 [RH git: e4d6d108a3] Commit list: