diff --git a/SOURCES/bz2002115-1-totem-Add-cancel_hold_on_retransmit-config-option.patch b/SOURCES/bz2002115-1-totem-Add-cancel_hold_on_retransmit-config-option.patch new file mode 100644 index 0000000..d8b82fd --- /dev/null +++ b/SOURCES/bz2002115-1-totem-Add-cancel_hold_on_retransmit-config-option.patch @@ -0,0 +1,128 @@ +From cdf72925db5a81e546ca8e8d7d8291ee1fc77be4 Mon Sep 17 00:00:00 2001 +From: Jan Friesse +Date: Wed, 11 Aug 2021 17:34:05 +0200 +Subject: [PATCH] totem: Add cancel_hold_on_retransmit config option + +Previously, existence of retransmit messages canceled holding +of token (and never allowed representative to enter token hold +state). + +This makes token rotating maximum speed and keeps processor +resending messages over and over again - overloading network +and reducing chance to successfully deliver the messages. + +Also there were reports of various Antivirus / IPS / IDS which slows +down delivery of packets with certain sizes (packets bigger than token) +what make Corosync retransmit messages over and over again. + +Proposed solution is to allow representative to enter token hold +state when there are only retransmit messages. This allows network to +handle overload and/or gives Antivirus/IPS/IDS enough time scan and +deliver packets without corosync entering "FAILED TO RECEIVE" state and +adding more load to network. + +Signed-off-by: Jan Friesse +Reviewed-by: Christine Caulfield +--- + exec/totemconfig.c | 6 ++++++ + exec/totemsrp.c | 5 +++-- + include/corosync/totem/totem.h | 2 ++ + man/corosync.conf.5 | 15 ++++++++++++++- + 4 files changed, 25 insertions(+), 3 deletions(-) + +diff --git a/exec/totemconfig.c b/exec/totemconfig.c +index 57a1587a..46e09952 100644 +--- a/exec/totemconfig.c ++++ b/exec/totemconfig.c +@@ -81,6 +81,7 @@ + #define MAX_MESSAGES 17 + #define MISS_COUNT_CONST 5 + #define BLOCK_UNLISTED_IPS 1 ++#define CANCEL_TOKEN_HOLD_ON_RETRANSMIT 0 + /* This constant is not used for knet */ + #define UDP_NETMTU 1500 + +@@ -144,6 +145,8 @@ static void *totem_get_param_by_name(struct totem_config *totem_config, const ch + return totem_config->knet_compression_model; + if (strcmp(param_name, "totem.block_unlisted_ips") == 0) + return &totem_config->block_unlisted_ips; ++ if (strcmp(param_name, "totem.cancel_token_hold_on_retransmit") == 0) ++ return &totem_config->cancel_token_hold_on_retransmit; + + return NULL; + } +@@ -365,6 +368,9 @@ void totem_volatile_config_read (struct totem_config *totem_config, icmap_map_t + + totem_volatile_config_set_boolean_value(totem_config, temp_map, "totem.block_unlisted_ips", deleted_key, + BLOCK_UNLISTED_IPS); ++ ++ totem_volatile_config_set_boolean_value(totem_config, temp_map, "totem.cancel_token_hold_on_retransmit", ++ deleted_key, CANCEL_TOKEN_HOLD_ON_RETRANSMIT); + } + + int totem_volatile_config_validate ( +diff --git a/exec/totemsrp.c b/exec/totemsrp.c +index 949d367b..d24b11fa 100644 +--- a/exec/totemsrp.c ++++ b/exec/totemsrp.c +@@ -3981,8 +3981,9 @@ static int message_handler_orf_token ( + transmits_allowed = fcc_calculate (instance, token); + mcasted_retransmit = orf_token_rtr (instance, token, &transmits_allowed); + +- if (instance->my_token_held == 1 && +- (token->rtr_list_entries > 0 || mcasted_retransmit > 0)) { ++ if (instance->totem_config->cancel_token_hold_on_retransmit && ++ instance->my_token_held == 1 && ++ (token->rtr_list_entries > 0 || mcasted_retransmit > 0)) { + instance->my_token_held = 0; + forward_token = 1; + } +diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h +index 8b166566..bdb6a15f 100644 +--- a/include/corosync/totem/totem.h ++++ b/include/corosync/totem/totem.h +@@ -244,6 +244,8 @@ struct totem_config { + + unsigned int block_unlisted_ips; + ++ unsigned int cancel_token_hold_on_retransmit; ++ + void (*totem_memb_ring_id_create_or_load) ( + struct memb_ring_id *memb_ring_id, + unsigned int nodeid); +diff --git a/man/corosync.conf.5 b/man/corosync.conf.5 +index 0588ad1e..a3771ea7 100644 +--- a/man/corosync.conf.5 ++++ b/man/corosync.conf.5 +@@ -32,7 +32,7 @@ + .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + .\" * THE POSSIBILITY OF SUCH DAMAGE. + .\" */ +-.TH COROSYNC_CONF 5 2021-07-23 "corosync Man Page" "Corosync Cluster Engine Programmer's Manual" ++.TH COROSYNC_CONF 5 2021-08-11 "corosync Man Page" "Corosync Cluster Engine Programmer's Manual" + .SH NAME + corosync.conf - corosync executive configuration file + +@@ -584,6 +584,19 @@ with an old configuration. + + The default value is yes. + ++.TP ++cancel_token_hold_on_retransmit ++Allows Corosync to hold token by representative when there is too much ++retransmit messages. This allows network to process increased load without ++overloading it. Used mechanism is same as described for ++.B hold ++directive. ++ ++Some deployments may prefer to never hold token when there is ++retransmit messages. If so, option should be set to yes. ++ ++The default value is no. ++ + .PP + Within the + .B logging +-- +2.27.0 + diff --git a/SOURCES/bz2024658-1-totemsrp-Switch-totempg-buffers-at-the-right-time.patch b/SOURCES/bz2024658-1-totemsrp-Switch-totempg-buffers-at-the-right-time.patch new file mode 100644 index 0000000..39f1e01 --- /dev/null +++ b/SOURCES/bz2024658-1-totemsrp-Switch-totempg-buffers-at-the-right-time.patch @@ -0,0 +1,109 @@ +From e7a82370a7b5d3ca342d5e42e25763fa2c938739 Mon Sep 17 00:00:00 2001 +From: Jan Friesse +Date: Tue, 26 Oct 2021 18:17:59 +0200 +Subject: [PATCH] totemsrp: Switch totempg buffers at the right time + +Commit 92e0f9c7bb9b4b6a0da8d64bdf3b2e47ae55b1cc added switching of +totempg buffers in sync phase. But because buffers got switch too early +there was a problem when delivering recovered messages (messages got +corrupted and/or lost). Solution is to switch buffers after recovered +messages got delivered. + +I think it is worth to describe complete history with reproducers so it +doesn't get lost. + +It all started with 402638929e5045ef520a7339696c687fbed0b31b (more info +about original problem is described in +https://bugzilla.redhat.com/show_bug.cgi?id=820821). This patch +solves problem which is way to be reproduced with following reproducer: +- 2 nodes +- Both nodes running corosync and testcpg +- Pause node 1 (SIGSTOP of corosync) +- On node 1, send some messages by testcpg + (it's not answering but this doesn't matter). Simply hit ENTER key + few times is enough) +- Wait till node 2 detects that node 1 left +- Unpause node 1 (SIGCONT of corosync) + +and on node 1 newly mcasted cpg messages got sent before sync barrier, +so node 2 logs "Unknown node -> we will not deliver message". + +Solution was to add switch of totemsrp new messages buffer. + +This patch was not enough so new one +(92e0f9c7bb9b4b6a0da8d64bdf3b2e47ae55b1cc) was created. Reproducer of +problem was similar, just cpgverify was used instead of testcpg. +Occasionally when node 1 was unpaused it hang in sync phase because +there was a partial message in totempg buffers. New sync message had +different frag cont so it was thrown away and never delivered. + +After many years problem was found which is solved by this patch +(original issue describe in +https://github.com/corosync/corosync/issues/660). +Reproducer is more complex: +- 2 nodes +- Node 1 is rate-limited (used script on the hypervisor side): + ``` + iface=tapXXXX + # ~0.1MB/s in bit/s + rate=838856 + # 1mb/s + burst=1048576 + tc qdisc add dev $iface root handle 1: htb default 1 + tc class add dev $iface parent 1: classid 1:1 htb rate ${rate}bps \ + burst ${burst}b + tc qdisc add dev $iface handle ffff: ingress + tc filter add dev $iface parent ffff: prio 50 basic police rate \ + ${rate}bps burst ${burst}b mtu 64kb "drop" + ``` +- Node 2 is running corosync and cpgverify +- Node 1 keeps restarting of corosync and running cpgverify in cycle + - Console 1: while true; do corosync; sleep 20; \ + kill $(pidof corosync); sleep 20; done + - Console 2: while true; do ./cpgverify;done + +And from time to time (reproduced usually in less than 5 minutes) +cpgverify reports corrupted message. + +Signed-off-by: Jan Friesse +Reviewed-by: Fabio M. Di Nitto +--- + exec/totemsrp.c | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +diff --git a/exec/totemsrp.c b/exec/totemsrp.c +index d24b11fa..fd71771b 100644 +--- a/exec/totemsrp.c ++++ b/exec/totemsrp.c +@@ -1989,13 +1989,27 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance) + trans_memb_list_totemip, instance->my_trans_memb_entries, + left_list, instance->my_left_memb_entries, + 0, 0, &instance->my_ring_id); ++ /* ++ * Switch new totemsrp messages queue. Messages sent from now on are stored ++ * in different queue so synchronization messages are delivered first. Totempg ++ * buffers will be switched later. ++ */ + instance->waiting_trans_ack = 1; +- instance->totemsrp_waiting_trans_ack_cb_fn (1); + + // TODO we need to filter to ensure we only deliver those + // messages which are part of instance->my_deliver_memb + messages_deliver_to_app (instance, 1, instance->old_ring_state_high_seq_received); + ++ /* ++ * Switch totempg buffers. This used to be right after ++ * instance->waiting_trans_ack = 1; ++ * line. This was causing problem, because there may be not yet ++ * processed parts of messages in totempg buffers. ++ * So when buffers were switched and recovered messages ++ * got delivered it was not possible to assemble them. ++ */ ++ instance->totemsrp_waiting_trans_ack_cb_fn (1); ++ + instance->my_aru = aru_save; + + /* +-- +2.27.0 + diff --git a/SPECS/corosync.spec b/SPECS/corosync.spec index eb525b0..9fbb696 100644 --- a/SPECS/corosync.spec +++ b/SPECS/corosync.spec @@ -23,11 +23,14 @@ Name: corosync Summary: The Corosync Cluster Engine and Application Programming Interfaces Version: 3.1.5 -Release: 1%{?gitver}%{?dist} +Release: 2%{?gitver}%{?dist} License: BSD URL: http://corosync.github.io/corosync/ Source0: http://build.clusterlabs.org/corosync/releases/%{name}-%{version}%{?gittarver}.tar.gz +Patch0: bz2002115-1-totem-Add-cancel_hold_on_retransmit-config-option.patch +Patch1: bz2024658-1-totemsrp-Switch-totempg-buffers-at-the-right-time.patch + %if %{with spausedd} Source1: https://github.com/jfriesse/spausedd/releases/download/%{spausedd_version}/spausedd-%{spausedd_version}.tar.gz # VMGuestLib exists only for x86_64 architecture @@ -91,6 +94,9 @@ BuildRequires: pkgconfig(vmguestlib) %setup -q -n %{name}-%{version}%{?gittarver} %endif +%patch0 -p1 -b .bz2002115-1 +%patch1 -p1 -b .bz2024658-1 + %build %if %{with runautogen} ./autogen.sh @@ -389,6 +395,15 @@ fi %endif %changelog +* Wed Nov 24 2021 Jan Friesse 3.1.5-2 +- Resolves: rhbz#2002115 +- Resolves: rhbz#2024658 + +- totem: Add cancel_hold_on_retransmit config option (rhbz#2002115) +- merge upstream commit cdf72925db5a81e546ca8e8d7d8291ee1fc77be4 (rhbz#2002115) +- totemsrp: Switch totempg buffers at the right time (rhbz#2024658) +- merge upstream commit e7a82370a7b5d3ca342d5e42e25763fa2c938739 (rhbz#2024658) + * Wed Aug 04 2021 Jan Friesse 3.1.5-1 - Related: rhbz#1948973