Blame SOURCES/bz2002115-1-totem-Add-cancel_hold_on_retransmit-config-option.patch

f27531
From cdf72925db5a81e546ca8e8d7d8291ee1fc77be4 Mon Sep 17 00:00:00 2001
f27531
From: Jan Friesse <jfriesse@redhat.com>
f27531
Date: Wed, 11 Aug 2021 17:34:05 +0200
f27531
Subject: [PATCH] totem: Add cancel_hold_on_retransmit config option
f27531
f27531
Previously, existence of retransmit messages canceled holding
f27531
of token (and never allowed representative to enter token hold
f27531
state).
f27531
f27531
This makes token rotating maximum speed and keeps processor
f27531
resending messages over and over again - overloading network
f27531
and reducing chance to successfully deliver the messages.
f27531
f27531
Also there were reports of various Antivirus / IPS / IDS which slows
f27531
down delivery of packets with certain sizes (packets bigger than token)
f27531
what make Corosync retransmit messages over and over again.
f27531
f27531
Proposed solution is to allow representative to enter token hold
f27531
state when there are only retransmit messages. This allows network to
f27531
handle overload and/or gives Antivirus/IPS/IDS enough time scan and
f27531
deliver packets without corosync entering "FAILED TO RECEIVE" state and
f27531
adding more load to network.
f27531
f27531
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
f27531
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
f27531
---
f27531
 exec/totemconfig.c             |  6 ++++++
f27531
 exec/totemsrp.c                |  5 +++--
f27531
 include/corosync/totem/totem.h |  2 ++
f27531
 man/corosync.conf.5            | 15 ++++++++++++++-
f27531
 4 files changed, 25 insertions(+), 3 deletions(-)
f27531
f27531
diff --git a/exec/totemconfig.c b/exec/totemconfig.c
f27531
index 57a1587a..46e09952 100644
f27531
--- a/exec/totemconfig.c
f27531
+++ b/exec/totemconfig.c
f27531
@@ -81,6 +81,7 @@
f27531
 #define MAX_MESSAGES				17
f27531
 #define MISS_COUNT_CONST			5
f27531
 #define BLOCK_UNLISTED_IPS			1
f27531
+#define CANCEL_TOKEN_HOLD_ON_RETRANSMIT		0
f27531
 /* This constant is not used for knet */
f27531
 #define UDP_NETMTU                              1500
f27531
 
f27531
@@ -144,6 +145,8 @@ static void *totem_get_param_by_name(struct totem_config *totem_config, const ch
f27531
 		return totem_config->knet_compression_model;
f27531
 	if (strcmp(param_name, "totem.block_unlisted_ips") == 0)
f27531
 		return &totem_config->block_unlisted_ips;
f27531
+	if (strcmp(param_name, "totem.cancel_token_hold_on_retransmit") == 0)
f27531
+		return &totem_config->cancel_token_hold_on_retransmit;
f27531
 
f27531
 	return NULL;
f27531
 }
f27531
@@ -365,6 +368,9 @@ void totem_volatile_config_read (struct totem_config *totem_config, icmap_map_t
f27531
 
f27531
 	totem_volatile_config_set_boolean_value(totem_config, temp_map, "totem.block_unlisted_ips", deleted_key,
f27531
 	    BLOCK_UNLISTED_IPS);
f27531
+
f27531
+	totem_volatile_config_set_boolean_value(totem_config, temp_map, "totem.cancel_token_hold_on_retransmit",
f27531
+	    deleted_key, CANCEL_TOKEN_HOLD_ON_RETRANSMIT);
f27531
 }
f27531
 
f27531
 int totem_volatile_config_validate (
f27531
diff --git a/exec/totemsrp.c b/exec/totemsrp.c
f27531
index 949d367b..d24b11fa 100644
f27531
--- a/exec/totemsrp.c
f27531
+++ b/exec/totemsrp.c
f27531
@@ -3981,8 +3981,9 @@ static int message_handler_orf_token (
f27531
 		transmits_allowed = fcc_calculate (instance, token);
f27531
 		mcasted_retransmit = orf_token_rtr (instance, token, &transmits_allowed);
f27531
 
f27531
-		if (instance->my_token_held == 1 &&
f27531
-			(token->rtr_list_entries > 0 || mcasted_retransmit > 0)) {
f27531
+		if (instance->totem_config->cancel_token_hold_on_retransmit &&
f27531
+		    instance->my_token_held == 1 &&
f27531
+		    (token->rtr_list_entries > 0 || mcasted_retransmit > 0)) {
f27531
 			instance->my_token_held = 0;
f27531
 			forward_token = 1;
f27531
 		}
f27531
diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h
f27531
index 8b166566..bdb6a15f 100644
f27531
--- a/include/corosync/totem/totem.h
f27531
+++ b/include/corosync/totem/totem.h
f27531
@@ -244,6 +244,8 @@ struct totem_config {
f27531
 
f27531
 	unsigned int block_unlisted_ips;
f27531
 
f27531
+	unsigned int cancel_token_hold_on_retransmit;
f27531
+
f27531
 	void (*totem_memb_ring_id_create_or_load) (
f27531
 	    struct memb_ring_id *memb_ring_id,
f27531
 	    unsigned int nodeid);
f27531
diff --git a/man/corosync.conf.5 b/man/corosync.conf.5
f27531
index 0588ad1e..a3771ea7 100644
f27531
--- a/man/corosync.conf.5
f27531
+++ b/man/corosync.conf.5
f27531
@@ -32,7 +32,7 @@
f27531
 .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
f27531
 .\" * THE POSSIBILITY OF SUCH DAMAGE.
f27531
 .\" */
f27531
-.TH COROSYNC_CONF 5 2021-07-23 "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
f27531
+.TH COROSYNC_CONF 5 2021-08-11 "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
f27531
 .SH NAME
f27531
 corosync.conf - corosync executive configuration file
f27531
 
f27531
@@ -584,6 +584,19 @@ with an old configuration.
f27531
 
f27531
 The default value is yes.
f27531
 
f27531
+.TP
f27531
+cancel_token_hold_on_retransmit
f27531
+Allows Corosync to hold token by representative when there is too much
f27531
+retransmit messages. This allows network to process increased load without
f27531
+overloading it. Used mechanism is same as described for
f27531
+.B hold
f27531
+directive.
f27531
+
f27531
+Some deployments may prefer to never hold token when there is
f27531
+retransmit messages. If so, option should be set to yes.
f27531
+
f27531
+The default value is no.
f27531
+
f27531
 .PP
f27531
 Within the
f27531
 .B logging
f27531
-- 
f27531
2.27.0
f27531