From 99fdfb438da15772799718240eb50c6d4891736a Mon Sep 17 00:00:00 2001 From: Open vSwitch CI Date: Feb 25 2022 22:40:42 +0000 Subject: Import openvswitch2.13-2.13.0-162 from Fast DataPath --- diff --git a/SOURCES/openvswitch-2.13.0.patch b/SOURCES/openvswitch-2.13.0.patch index 610bf45..4bf6791 100644 --- a/SOURCES/openvswitch-2.13.0.patch +++ b/SOURCES/openvswitch-2.13.0.patch @@ -88126,7 +88126,7 @@ index 18c83fe9c2..dd14d81091 100644 struct hmap servers; struct ovsdb_error *error = diff --git a/ovsdb/raft.c b/ovsdb/raft.c -index 4789bc4f22..ec4c244762 100644 +index 4789bc4f22..68aea2823b 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -36,6 +36,7 @@ @@ -88137,17 +88137,18 @@ index 4789bc4f22..ec4c244762 100644 #include "socket-util.h" #include "stream.h" #include "timeval.h" -@@ -73,7 +74,8 @@ enum raft_failure_test { +@@ -73,7 +74,9 @@ enum raft_failure_test { FT_CRASH_BEFORE_SEND_EXEC_REQ, FT_CRASH_AFTER_SEND_EXEC_REQ, FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE, - FT_DELAY_ELECTION ++ FT_CRASH_BEFORE_SEND_SNAPSHOT_REP, + FT_DELAY_ELECTION, + FT_DONT_SEND_VOTE_REQUEST }; static enum raft_failure_test failure_test; -@@ -298,6 +300,17 @@ struct raft { +@@ -298,6 +301,17 @@ struct raft { bool had_leader; /* There has been leader elected since last election initiated. This is to help setting candidate_retrying. */ @@ -88165,7 +88166,27 @@ index 4789bc4f22..ec4c244762 100644 }; /* All Raft structures. */ -@@ -405,6 +418,9 @@ raft_alloc(void) +@@ -357,12 +371,19 @@ static bool raft_handle_write_error(struct raft *, struct ovsdb_error *); + static void raft_run_reconfigure(struct raft *); + + static void raft_set_leader(struct raft *, const struct uuid *sid); ++ + static struct raft_server * + raft_find_server(const struct raft *raft, const struct uuid *sid) + { + return raft_server_find(&raft->servers, sid); + } + ++static struct raft_server * ++raft_find_new_server(struct raft *raft, const struct uuid *uuid) ++{ ++ return raft_server_find(&raft->add_servers, uuid); ++} ++ + static char * + raft_make_address_passive(const char *address_) + { +@@ -405,6 +426,9 @@ raft_alloc(void) raft->election_timer = ELECTION_BASE_MSEC; @@ -88175,7 +88196,7 @@ index 4789bc4f22..ec4c244762 100644 return raft; } -@@ -918,6 +934,34 @@ raft_reset_ping_timer(struct raft *raft) +@@ -918,6 +942,34 @@ raft_reset_ping_timer(struct raft *raft) raft->ping_timeout = time_msec() + raft->election_timer / 3; } @@ -88210,7 +88231,7 @@ index 4789bc4f22..ec4c244762 100644 static void raft_add_conn(struct raft *raft, struct jsonrpc_session *js, const struct uuid *sid, bool incoming) -@@ -932,6 +976,9 @@ raft_add_conn(struct raft *raft, struct jsonrpc_session *js, +@@ -932,6 +984,9 @@ raft_add_conn(struct raft *raft, struct jsonrpc_session *js, &conn->sid); conn->incoming = incoming; conn->js_seqno = jsonrpc_session_get_seqno(conn->js); @@ -88220,7 +88241,7 @@ index 4789bc4f22..ec4c244762 100644 } /* Starts the local server in an existing Raft cluster, using the local copy of -@@ -1007,6 +1054,23 @@ raft_get_sid(const struct raft *raft) +@@ -1007,6 +1062,23 @@ raft_get_sid(const struct raft *raft) return &raft->sid; } @@ -88244,7 +88265,7 @@ index 4789bc4f22..ec4c244762 100644 /* Returns true if 'raft' has completed joining its cluster, has not left or * initiated leaving the cluster, does not have failed disk storage, and is * apparently connected to the leader in a healthy way (or is itself the -@@ -1020,12 +1084,22 @@ raft_get_sid(const struct raft *raft) +@@ -1020,12 +1092,22 @@ raft_get_sid(const struct raft *raft) bool raft_is_connected(const struct raft *raft) { @@ -88269,7 +88290,7 @@ index 4789bc4f22..ec4c244762 100644 return ret; } -@@ -1397,8 +1471,19 @@ raft_conn_run(struct raft *raft, struct raft_conn *conn) +@@ -1397,8 +1479,19 @@ raft_conn_run(struct raft *raft, struct raft_conn *conn) jsonrpc_session_run(conn->js); unsigned int new_seqno = jsonrpc_session_get_seqno(conn->js); @@ -88290,7 +88311,7 @@ index 4789bc4f22..ec4c244762 100644 conn->js_seqno = new_seqno; if (just_connected) { if (raft->joining) { -@@ -1641,6 +1726,7 @@ raft_start_election(struct raft *raft, bool leadership_transfer) +@@ -1641,6 +1734,7 @@ raft_start_election(struct raft *raft, bool leadership_transfer) } ovs_assert(raft->role != RAFT_LEADER); @@ -88298,7 +88319,7 @@ index 4789bc4f22..ec4c244762 100644 raft->role = RAFT_CANDIDATE; /* If there was no leader elected since last election, we know we are * retrying now. */ -@@ -1684,7 +1770,9 @@ raft_start_election(struct raft *raft, bool leadership_transfer) +@@ -1684,7 +1778,9 @@ raft_start_election(struct raft *raft, bool leadership_transfer) .leadership_transfer = leadership_transfer, }, }; @@ -88309,7 +88330,47 @@ index 4789bc4f22..ec4c244762 100644 } /* Vote for ourselves. */ -@@ -2513,13 +2601,14 @@ raft_server_init_leader(struct raft *raft, struct raft_server *s) +@@ -1703,6 +1799,8 @@ raft_open_conn(struct raft *raft, const char *address, const struct uuid *sid) + static void + raft_conn_close(struct raft_conn *conn) + { ++ VLOG_DBG("closing connection to server %s (%s)", ++ conn->nickname, jsonrpc_session_get_name(conn->js)); + jsonrpc_session_close(conn->js); + ovs_list_remove(&conn->list_node); + free(conn->nickname); +@@ -1793,15 +1891,29 @@ raft_run(struct raft *raft) + } + + /* Close unneeded sessions. */ ++ struct raft_server *server; + struct raft_conn *next; + LIST_FOR_EACH_SAFE (conn, next, list_node, &raft->conns) { + if (!raft_conn_should_stay_open(raft, conn)) { ++ server = raft_find_new_server(raft, &conn->sid); ++ if (server) { ++ /* We only have one incoming connection from joining servers, ++ * so if it's closed, we need to destroy the record about the ++ * server. This way the process can be started over on the ++ * next join request. */ ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); ++ VLOG_INFO_RL(&rl, "cluster "CID_FMT": server %s (%s) " ++ "disconnected while joining", ++ CID_ARGS(&raft->cid), ++ server->nickname, server->address); ++ hmap_remove(&raft->add_servers, &server->hmap_node); ++ raft_server_destroy(server); ++ } + raft_conn_close(conn); + } + } + + /* Open needed sessions. */ +- struct raft_server *server; + HMAP_FOR_EACH (server, hmap_node, &raft->servers) { + raft_open_conn(raft, server->address, &server->sid); + } +@@ -2513,13 +2625,14 @@ raft_server_init_leader(struct raft *raft, struct raft_server *s) s->match_index = 0; s->phase = RAFT_PHASE_STABLE; s->replied = false; @@ -88325,7 +88386,7 @@ index 4789bc4f22..ec4c244762 100644 raft->candidate_retrying = false; } -@@ -2731,6 +2820,7 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index) +@@ -2731,6 +2844,7 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index) raft->election_timer, e->election_timer); raft->election_timer = e->election_timer; raft->election_timer_new = 0; @@ -88333,7 +88394,7 @@ index 4789bc4f22..ec4c244762 100644 } if (e->servers) { /* raft_run_reconfigure() can write a new Raft entry, which can -@@ -2747,6 +2837,7 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index) +@@ -2747,6 +2861,7 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index) VLOG_INFO("Election timer changed from %"PRIu64" to %"PRIu64, raft->election_timer, e->election_timer); raft->election_timer = e->election_timer; @@ -88341,7 +88402,7 @@ index 4789bc4f22..ec4c244762 100644 } } /* Check if any pending command can be completed, and complete it. -@@ -2960,6 +3051,15 @@ raft_update_leader(struct raft *raft, const struct uuid *sid) +@@ -2960,6 +3075,15 @@ raft_update_leader(struct raft *raft, const struct uuid *sid) }; ignore(ovsdb_log_write_and_free(raft->log, raft_record_to_json(&r))); } @@ -88357,7 +88418,20 @@ index 4789bc4f22..ec4c244762 100644 return true; } -@@ -3260,7 +3360,20 @@ raft_send_install_snapshot_request(struct raft *raft, +@@ -3176,12 +3300,6 @@ raft_find_peer(struct raft *raft, const struct uuid *uuid) + return s && !uuid_equals(&raft->sid, &s->sid) ? s : NULL; + } + +-static struct raft_server * +-raft_find_new_server(struct raft *raft, const struct uuid *uuid) +-{ +- return raft_server_find(&raft->add_servers, uuid); +-} +- + /* Figure 3.1: "If there exists an N such that N > commitIndex, a + * majority of matchIndex[i] >= N, and log[N].term == currentTerm, set + * commitIndex = N (sections 3.5 and 3.6)." */ +@@ -3260,7 +3378,20 @@ raft_send_install_snapshot_request(struct raft *raft, .election_timer = raft->election_timer, /* use latest value */ } }; @@ -88379,7 +88453,7 @@ index 4789bc4f22..ec4c244762 100644 } static void -@@ -3913,7 +4026,7 @@ raft_handle_install_snapshot_request__( +@@ -3913,7 +4044,7 @@ raft_handle_install_snapshot_request__( struct ovsdb_error *error = raft_save_snapshot(raft, new_log_start, &new_snapshot); if (error) { @@ -88388,7 +88462,18 @@ index 4789bc4f22..ec4c244762 100644 VLOG_WARN("could not save snapshot: %s", error_s); free(error_s); return false; -@@ -3977,6 +4090,8 @@ raft_handle_install_snapshot_reply( +@@ -3942,6 +4073,10 @@ static void + raft_handle_install_snapshot_request( + struct raft *raft, const struct raft_install_snapshot_request *rq) + { ++ if (failure_test == FT_CRASH_BEFORE_SEND_SNAPSHOT_REP) { ++ ovs_fatal(0, "Raft test: crash before sending install_snapshot_reply"); ++ } ++ + if (raft_handle_install_snapshot_request__(raft, rq)) { + union raft_rpc rpy = { + .install_snapshot_reply = { +@@ -3977,6 +4112,8 @@ raft_handle_install_snapshot_reply( } } @@ -88397,7 +88482,7 @@ index 4789bc4f22..ec4c244762 100644 if (rpy->last_index != raft->log_start - 1 || rpy->last_term != raft->snap.term) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); -@@ -3992,8 +4107,9 @@ raft_handle_install_snapshot_reply( +@@ -3992,8 +4129,9 @@ raft_handle_install_snapshot_reply( VLOG_INFO_RL(&rl, "cluster "CID_FMT": installed snapshot on server %s " " up to %"PRIu64":%"PRIu64, CID_ARGS(&raft->cid), s->nickname, rpy->last_term, rpy->last_index); @@ -88409,7 +88494,7 @@ index 4789bc4f22..ec4c244762 100644 } /* Returns true if 'raft' has grown enough since the last snapshot that -@@ -4023,9 +4139,24 @@ raft_may_snapshot(const struct raft *raft) +@@ -4023,9 +4161,24 @@ raft_may_snapshot(const struct raft *raft) && !raft->leaving && !raft->left && !raft->failed @@ -88434,7 +88519,7 @@ index 4789bc4f22..ec4c244762 100644 /* Replaces the log for 'raft', up to the last log entry read, by * 'new_snapshot_data'. Returns NULL if successful, otherwise an error that * the caller must eventually free. -@@ -4143,9 +4274,7 @@ raft_handle_execute_command_request__( +@@ -4143,9 +4296,7 @@ raft_handle_execute_command_request__( cmd->sid = rq->common.sid; enum raft_command_status status = cmd->status; @@ -88445,7 +88530,7 @@ index 4789bc4f22..ec4c244762 100644 return status; } -@@ -4366,6 +4495,8 @@ raft_unixctl_status(struct unixctl_conn *conn, +@@ -4366,6 +4517,8 @@ raft_unixctl_status(struct unixctl_conn *conn, : raft->leaving ? "leaving cluster" : raft->left ? "left cluster" : raft->failed ? "failed" @@ -88454,7 +88539,7 @@ index 4789bc4f22..ec4c244762 100644 : "cluster member"); if (raft->joining) { ds_put_format(&s, "Remotes for joining:"); -@@ -4639,6 +4770,42 @@ raft_unixctl_change_election_timer(struct unixctl_conn *conn, +@@ -4639,6 +4792,42 @@ raft_unixctl_change_election_timer(struct unixctl_conn *conn, unixctl_command_reply(conn, "change of election timer initiated."); } @@ -88497,7 +88582,16 @@ index 4789bc4f22..ec4c244762 100644 static void raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED, const char *argv[], -@@ -4667,6 +4834,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, +@@ -4659,6 +4848,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, + failure_test = FT_CRASH_AFTER_SEND_EXEC_REQ; + } else if (!strcmp(test, "crash-after-receiving-append-request-update")) { + failure_test = FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE; ++ } else if (!strcmp(test, "crash-before-sending-install-snapshot-reply")) { ++ failure_test = FT_CRASH_BEFORE_SEND_SNAPSHOT_REP; + } else if (!strcmp(test, "delay-election")) { + failure_test = FT_DELAY_ELECTION; + struct raft *raft; +@@ -4667,6 +4858,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, raft_reset_election_timer(raft); } } @@ -88506,7 +88600,7 @@ index 4789bc4f22..ec4c244762 100644 } else if (!strcmp(test, "clear")) { failure_test = FT_NO_TEST; unixctl_command_reply(conn, "test dismissed"); -@@ -4697,6 +4866,9 @@ raft_init(void) +@@ -4697,6 +4890,9 @@ raft_init(void) raft_unixctl_kick, NULL); unixctl_command_register("cluster/change-election-timer", "DB TIME", 2, 2, raft_unixctl_change_election_timer, NULL); @@ -90405,7 +90499,7 @@ index 8d777a0275..5e3b26aea8 100644 AT_KEYWORDS([ovsdb client positive]) diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at -index 3a0bd4579e..cf43e9cf86 100644 +index 3a0bd4579e..9114ea1d13 100644 --- a/tests/ovsdb-cluster.at +++ b/tests/ovsdb-cluster.at @@ -128,7 +128,7 @@ ovsdb_test_cluster_disconnect () { @@ -90459,7 +90553,7 @@ index 3a0bd4579e..cf43e9cf86 100644 AT_BANNER([OVSDB cluster election timer change]) -@@ -273,6 +308,88 @@ OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s4 cluster/status $schema_name | grep "Ele +@@ -273,6 +308,143 @@ OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s4 cluster/status $schema_name | grep "Ele AT_CLEANUP @@ -90545,10 +90639,65 @@ index 3a0bd4579e..cf43e9cf86 100644 + +AT_CLEANUP + ++AT_BANNER([OVSDB - cluster failure while joining]) ++AT_SETUP([OVSDB cluster - follower crash while joining]) ++AT_KEYWORDS([ovsdb server negative unix cluster join]) ++ ++n=3 ++schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` ++ordinal_schema > schema ++AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db dnl ++ $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) ++cid=`ovsdb-tool db-cid s1.db` ++schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` ++for i in `seq 2 $n`; do ++ AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) ++done ++ ++on_exit 'kill `cat *.pid`' ++ ++dnl Starting followers first, so we can configure them to crash on join. ++for j in `seq $n`; do ++ i=$(($n + 1 - $j)) ++ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off dnl ++ --detach --no-chdir --log-file=s$i.log dnl ++ --pidfile=s$i.pid --unixctl=s$i dnl ++ --remote=punix:s$i.ovsdb s$i.db]) ++ if test $i != 1; then ++ OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s$i dnl ++ cluster/failure-test crash-before-sending-install-snapshot-reply dnl ++ | grep -q "engaged"]) ++ fi ++done ++ ++dnl Make sure that followers really crashed. ++for i in `seq 2 $n`; do ++ OVS_WAIT_WHILE([test -s s$i.pid]) ++done ++ ++dnl Bring them back. ++for i in `seq 2 $n`; do ++ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off dnl ++ --detach --no-chdir --log-file=s$i.log dnl ++ --pidfile=s$i.pid --unixctl=s$i dnl ++ --remote=punix:s$i.ovsdb s$i.db]) ++done ++ ++dnl Make sure that all servers joined the cluster. ++for i in `seq $n`; do ++ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) ++done ++ ++for i in `seq $n`; do ++ OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) ++done ++ ++AT_CLEANUP ++ OVS_START_SHELL_HELPERS -@@ -436,6 +553,61 @@ AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) +@@ -436,6 +608,61 @@ AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) ovsdb_cluster_failure_test 2 2 3 crash-after-receiving-append-request-update AT_CLEANUP @@ -90610,7 +90759,7 @@ index 3a0bd4579e..cf43e9cf86 100644 AT_BANNER([OVSDB - cluster tests]) -@@ -529,7 +701,7 @@ ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral +@@ -529,7 +756,7 @@ ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral # Use file instead of var because code inside "while" runs in a subshell. echo 0 > phase i=0 diff --git a/SPECS/openvswitch2.13.spec b/SPECS/openvswitch2.13.spec index 50857cb..fcf582d 100644 --- a/SPECS/openvswitch2.13.spec +++ b/SPECS/openvswitch2.13.spec @@ -59,7 +59,7 @@ Summary: Open vSwitch Group: System Environment/Daemons daemon/database/utilities URL: http://www.openvswitch.org/ Version: 2.13.0 -Release: 161%{?commit0:.%{date}git%{shortcommit0}}%{?commit1:dpdk%{shortcommit1}}%{?dist} +Release: 162%{?commit0:.%{date}git%{shortcommit0}}%{?commit1:dpdk%{shortcommit1}}%{?dist} # Nearly all of openvswitch is ASL 2.0. The bugtool is LGPLv2+, and the # lib/sflow*.[ch] files are SISSL @@ -715,6 +715,12 @@ exit 0 %endif %changelog +* Fri Feb 25 2022 Open vSwitch CI - 2.13.0-162 +- Merging upstream branch-2.13 [RH git: 02895de52e] + Commit list: + 226485590d ovsdb: raft: Fix inability to join the cluster after interrupted attempt. (#2033514) + + * Wed Feb 23 2022 Open vSwitch CI - 2.13.0-161 - Merging upstream branch-2.13 [RH git: b782265f2e] Commit list: