Blame SOURCES/bz1736872-fix-mtu-calculation.patch

cf5375
commit b67c63101246b400c7512cb1adbc590ac06cb6ee
cf5375
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
Date:   Tue Jul 30 11:18:33 2019 +0200
cf5375
cf5375
    [crypto] fix log information
cf5375
    
cf5375
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
cf5375
diff --git a/libknet/crypto.c b/libknet/crypto.c
cf5375
index 9f05fba..9d6757b 100644
cf5375
--- a/libknet/crypto.c
cf5375
+++ b/libknet/crypto.c
cf5375
@@ -151,8 +151,6 @@ int crypto_init(
cf5375
 		goto out;
cf5375
 	}
cf5375
 
cf5375
-	log_debug(knet_h, KNET_SUB_CRYPTO, "security network overhead: %zu", knet_h->sec_header_size);
cf5375
-
cf5375
 out:
cf5375
 	if (!err) {
cf5375
 		knet_h->crypto_instance = new;
cf5375
@@ -161,6 +159,8 @@ out:
cf5375
 		knet_h->sec_hash_size = new->sec_hash_size;
cf5375
 		knet_h->sec_salt_size = new->sec_salt_size;
cf5375
 
cf5375
+		log_debug(knet_h, KNET_SUB_CRYPTO, "security network overhead: %zu", knet_h->sec_header_size);
cf5375
+
cf5375
 		if (current) {
cf5375
 			if (crypto_modules_cmds[current->model].ops->fini != NULL) {
cf5375
 				crypto_modules_cmds[current->model].ops->fini(knet_h, current);
cf5375
commit a89c2cd6d3863abe0f3ae0165239177a7461ee5e
cf5375
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
Date:   Wed Jul 31 14:15:07 2019 +0200
cf5375
cf5375
    [udp] log information about detected kernel MTU
cf5375
    
cf5375
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
cf5375
diff --git a/libknet/transport_udp.c b/libknet/transport_udp.c
cf5375
index 53d2ba0..be990bb 100644
cf5375
--- a/libknet/transport_udp.c
cf5375
+++ b/libknet/transport_udp.c
cf5375
@@ -337,6 +337,7 @@ static int read_errs_from_sock(knet_handle_t knet_h, int sockfd)
cf5375
 									break;
cf5375
 								} else {
cf5375
 									knet_h->kernel_mtu = sock_err->ee_info;
cf5375
+									log_debug(knet_h, KNET_SUB_TRANSP_UDP, "detected kernel MTU: %u", knet_h->kernel_mtu);
cf5375
 									pthread_mutex_unlock(&knet_h->kmtu_mutex);
cf5375
 								}
cf5375
 
cf5375
commit 650ef6d26e83dd7827b2e913c52a1fac67ea60d4
cf5375
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
Date:   Fri Aug 2 10:43:09 2019 +0200
cf5375
cf5375
    [docs] add knet packet layout
cf5375
    
cf5375
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
cf5375
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
cf5375
index 603f595..2cd48f9 100644
cf5375
--- a/libknet/threads_pmtud.c
cf5375
+++ b/libknet/threads_pmtud.c
cf5375
@@ -91,6 +91,28 @@ restart:
cf5375
 		failsafe++;
cf5375
 	}
cf5375
 
cf5375
+	/*
cf5375
+	 * unencrypted packet looks like:
cf5375
+	 *
cf5375
+	 * | ip | protocol | knet_header | unencrypted data                                  |
cf5375
+	 * | onwire_len                                                                      |
cf5375
+	 * | overhead_len  |
cf5375
+	 *                 | data_len                                                        |
cf5375
+	 *                               | app MTU                                           |
cf5375
+	 *
cf5375
+	 * encrypted packet looks like (not to scale):
cf5375
+	 *
cf5375
+	 * | ip | protocol | salt | crypto(knet_header | data)      | crypto_data_pad | hash |
cf5375
+	 * | onwire_len                                                                      |
cf5375
+	 * | overhead_len  |
cf5375
+	 *                 | data_len                                                        |
cf5375
+	 *                                             | app MTU    |
cf5375
+	 *
cf5375
+	 * knet_h->sec_block_size is >= 0 if encryption will pad the data
cf5375
+	 * knet_h->sec_salt_size is >= 0 if encryption is enabled
cf5375
+	 * knet_h->sec_hash_size is >= 0 if signing is enabled
cf5375
+	 */
cf5375
+
cf5375
 	data_len = onwire_len - overhead_len;
cf5375
 
cf5375
 	if (knet_h->crypto_instance) {
cf5375
commit dbed772f0cb9070826eac6524646bd2ea7cce8c0
cf5375
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
Date:   Fri Aug 2 10:44:23 2019 +0200
cf5375
cf5375
    [PMTUd] fix MTU calculation when using crypto and add docs
cf5375
    
cf5375
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
cf5375
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
cf5375
index 2cd48f9..1a19806 100644
cf5375
--- a/libknet/threads_pmtud.c
cf5375
+++ b/libknet/threads_pmtud.c
cf5375
@@ -113,29 +113,68 @@ restart:
cf5375
 	 * knet_h->sec_hash_size is >= 0 if signing is enabled
cf5375
 	 */
cf5375
 
cf5375
+	/*
cf5375
+	 * common to all packets
cf5375
+	 */
cf5375
 	data_len = onwire_len - overhead_len;
cf5375
 
cf5375
 	if (knet_h->crypto_instance) {
cf5375
 
cf5375
+realign:
cf5375
 		if (knet_h->sec_block_size) {
cf5375
+
cf5375
+			/*
cf5375
+			 * drop both salt and hash, that leaves only the crypto data and padding
cf5375
+			 * we need to calculate the padding based on the real encrypted data.
cf5375
+			 */
cf5375
+			data_len = data_len - (knet_h->sec_salt_size + knet_h->sec_hash_size);
cf5375
+
cf5375
+			/*
cf5375
+			 * if the crypto mechanism requires padding, calculate the padding
cf5375
+			 * and add it back to data_len because that's what the crypto layer
cf5375
+			 * would do.
cf5375
+			 */
cf5375
 			pad_len = knet_h->sec_block_size - (data_len % knet_h->sec_block_size);
cf5375
+
cf5375
+			/*
cf5375
+			 * if are at the boundary, reset padding
cf5375
+			 */
cf5375
 			if (pad_len == knet_h->sec_block_size) {
cf5375
 				pad_len = 0;
cf5375
 			}
cf5375
 			data_len = data_len + pad_len;
cf5375
-		}
cf5375
 
cf5375
-		data_len = data_len + (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size);
cf5375
-
cf5375
-		if (knet_h->sec_block_size) {
cf5375
+			/*
cf5375
+			 * if our current data_len is higher than max_mtu_len
cf5375
+			 * then we need to reduce by padding size (that is our
cf5375
+			 * increment / decrement value)
cf5375
+			 *
cf5375
+			 * this generally happens only on the first PMTUd run
cf5375
+			 */
cf5375
 			while (data_len + overhead_len >= max_mtu_len) {
cf5375
 				data_len = data_len - knet_h->sec_block_size;
cf5375
 			}
cf5375
+
cf5375
+			/*
cf5375
+			 * add both hash and salt size back, similar to padding above,
cf5375
+			 * the crypto layer will add them to the data_len
cf5375
+			 */
cf5375
+			data_len = data_len + (knet_h->sec_salt_size + knet_h->sec_hash_size);
cf5375
 		}
cf5375
 
cf5375
 		if (dst_link->last_bad_mtu) {
cf5375
-			while (data_len + overhead_len >= dst_link->last_bad_mtu) {
cf5375
-				data_len = data_len - (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size);
cf5375
+			if (data_len + overhead_len >= dst_link->last_bad_mtu) {
cf5375
+				/*
cf5375
+				 * reduce data_len to something lower than last_bad_mtu, overhead_len
cf5375
+				 * and sec_block_size (decrementing step) - 1 (granularity)
cf5375
+				 */
cf5375
+				data_len = dst_link->last_bad_mtu - overhead_len - knet_h->sec_block_size - 1;
cf5375
+				if (knet_h->sec_block_size) {
cf5375
+					/*
cf5375
+					 * make sure that data_len is aligned to the sec_block_size boundary
cf5375
+					 */
cf5375
+					goto realign;
cf5375
+				}
cf5375
 			}
cf5375
 		}
cf5375
 
cf5375
@@ -144,6 +183,10 @@ restart:
cf5375
 			return -1;
cf5375
 		}
cf5375
 
cf5375
+		/*
cf5375
+		 * recalculate onwire_len based on crypto information
cf5375
+		 * and place it in the PMTUd packet info
cf5375
+		 */
cf5375
 		onwire_len = data_len + overhead_len;
cf5375
 		knet_h->pmtudbuf->khp_pmtud_size = onwire_len;
cf5375
 
cf5375
commit a9460c72fafe452b7cb584598aa43a87b44428f0
cf5375
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
Date:   Mon Aug 12 16:52:59 2019 +0200
cf5375
cf5375
    [PMTUd] rework the whole math to calculate MTU
cf5375
    
cf5375
    internal changes:
cf5375
    - drop the concept of sec_header_size that was completely wrong
cf5375
      and unnecessary
cf5375
    - bump crypto API to version 3 due to the above change
cf5375
    - clarify the difference between link->proto_overhead and
cf5375
      link->status->proto_overhead. We cannot rename the status
cf5375
      one as it would also change ABI.
cf5375
    - add onwire.c with documentation on the packet format
cf5375
      and what various len(s) mean in context.
cf5375
    - add 3 new functions to calculate MTUs back and forth
cf5375
      and use them around, hopefully with enough clarification
cf5375
      on why things are done in a given way.
cf5375
    - heavily change thread_pmtud.c to use those new facilities.
cf5375
    - fix major calculation issues when using crypto (non-crypto
cf5375
      was not affected by the problem).
cf5375
    - fix checks around to make sure they match the new math.
cf5375
    - fix padding calculation.
cf5375
    - add functional PMTUd crypto test
cf5375
      this test can take several hours (12+) and should be executed
cf5375
      on a controlled environment since it automatically changes
cf5375
      loopback MTU to run tests.
cf5375
    - fix way the lowest MTU is calculated during a PMTUd run
cf5375
      to avoid spurious double notifications.
cf5375
    - drop redundant checks.
cf5375
    
cf5375
    user visible changes:
cf5375
    - Global MTU is now calculated properly when using crypto
cf5375
      and values will be in general bigger than before due
cf5375
      to incorrect padding calculation in the previous implementation.
cf5375
    
cf5375
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
cf5375
diff --git a/libknet/Makefile.am b/libknet/Makefile.am
cf5375
index d080732..2fa2416 100644
cf5375
--- a/libknet/Makefile.am
cf5375
+++ b/libknet/Makefile.am
cf5375
@@ -36,6 +36,7 @@ sources			= \
cf5375
 			  links_acl_loopback.c \
cf5375
 			  logging.c \
cf5375
 			  netutils.c \
cf5375
+			  onwire.c \
cf5375
 			  threads_common.c \
cf5375
 			  threads_dsthandler.c \
cf5375
 			  threads_heartbeat.c \
cf5375
diff --git a/libknet/crypto.c b/libknet/crypto.c
cf5375
index 9d6757b..afa4f88 100644
cf5375
--- a/libknet/crypto.c
cf5375
+++ b/libknet/crypto.c
cf5375
@@ -154,12 +154,14 @@ int crypto_init(
cf5375
 out:
cf5375
 	if (!err) {
cf5375
 		knet_h->crypto_instance = new;
cf5375
-		knet_h->sec_header_size = new->sec_header_size;
cf5375
 		knet_h->sec_block_size = new->sec_block_size;
cf5375
 		knet_h->sec_hash_size = new->sec_hash_size;
cf5375
 		knet_h->sec_salt_size = new->sec_salt_size;
cf5375
 
cf5375
-		log_debug(knet_h, KNET_SUB_CRYPTO, "security network overhead: %zu", knet_h->sec_header_size);
cf5375
+		log_debug(knet_h, KNET_SUB_CRYPTO, "Hash size: %zu salt size: %zu block size: %zu",
cf5375
+			  knet_h->sec_hash_size,
cf5375
+			  knet_h->sec_salt_size,
cf5375
+			  knet_h->sec_block_size);
cf5375
 
cf5375
 		if (current) {
cf5375
 			if (crypto_modules_cmds[current->model].ops->fini != NULL) {
cf5375
@@ -195,7 +197,6 @@ void crypto_fini(
cf5375
 			crypto_modules_cmds[knet_h->crypto_instance->model].ops->fini(knet_h, knet_h->crypto_instance);
cf5375
 		}
cf5375
 		free(knet_h->crypto_instance);
cf5375
-		knet_h->sec_header_size = 0;
cf5375
 		knet_h->sec_block_size = 0;
cf5375
 		knet_h->sec_hash_size = 0;
cf5375
 		knet_h->sec_salt_size = 0;
cf5375
diff --git a/libknet/crypto_model.h b/libknet/crypto_model.h
cf5375
index 70f6238..b05e49c 100644
cf5375
--- a/libknet/crypto_model.h
cf5375
+++ b/libknet/crypto_model.h
cf5375
@@ -14,13 +14,12 @@
cf5375
 struct crypto_instance {
cf5375
 	int	model;
cf5375
 	void	*model_instance;
cf5375
-	size_t	sec_header_size;
cf5375
 	size_t	sec_block_size;
cf5375
 	size_t	sec_hash_size;
cf5375
 	size_t	sec_salt_size;
cf5375
 };
cf5375
 
cf5375
-#define KNET_CRYPTO_MODEL_ABI 2
cf5375
+#define KNET_CRYPTO_MODEL_ABI 3
cf5375
 
cf5375
 /*
cf5375
  * see compress_model.h for explanation of the various lib related functions
cf5375
diff --git a/libknet/crypto_nss.c b/libknet/crypto_nss.c
cf5375
index 330b40c..c624a47 100644
cf5375
--- a/libknet/crypto_nss.c
cf5375
+++ b/libknet/crypto_nss.c
cf5375
@@ -801,10 +801,7 @@ static int nsscrypto_init(
cf5375
 		goto out_err;
cf5375
 	}
cf5375
 
cf5375
-	crypto_instance->sec_header_size = 0;
cf5375
-
cf5375
 	if (nsscrypto_instance->crypto_hash_type > 0) {
cf5375
-		crypto_instance->sec_header_size += nsshash_len[nsscrypto_instance->crypto_hash_type];
cf5375
 		crypto_instance->sec_hash_size = nsshash_len[nsscrypto_instance->crypto_hash_type];
cf5375
 	}
cf5375
 
cf5375
@@ -821,8 +818,6 @@ static int nsscrypto_init(
cf5375
 			}
cf5375
 		}
cf5375
 
cf5375
-		crypto_instance->sec_header_size += (block_size * 2);
cf5375
-		crypto_instance->sec_header_size += SALT_SIZE;
cf5375
 		crypto_instance->sec_salt_size = SALT_SIZE;
cf5375
 		crypto_instance->sec_block_size = block_size;
cf5375
 	}
cf5375
diff --git a/libknet/crypto_openssl.c b/libknet/crypto_openssl.c
cf5375
index 0cbc6f5..6571498 100644
cf5375
--- a/libknet/crypto_openssl.c
cf5375
+++ b/libknet/crypto_openssl.c
cf5375
@@ -566,11 +566,8 @@ static int opensslcrypto_init(
cf5375
 	memmove(opensslcrypto_instance->private_key, knet_handle_crypto_cfg->private_key, knet_handle_crypto_cfg->private_key_len);
cf5375
 	opensslcrypto_instance->private_key_len = knet_handle_crypto_cfg->private_key_len;
cf5375
 
cf5375
-	crypto_instance->sec_header_size = 0;
cf5375
-
cf5375
 	if (opensslcrypto_instance->crypto_hash_type) {
cf5375
 		crypto_instance->sec_hash_size = EVP_MD_size(opensslcrypto_instance->crypto_hash_type);
cf5375
-		crypto_instance->sec_header_size += crypto_instance->sec_hash_size;
cf5375
 	}
cf5375
 
cf5375
 	if (opensslcrypto_instance->crypto_cipher_type) {
cf5375
@@ -578,8 +575,6 @@ static int opensslcrypto_init(
cf5375
 
cf5375
 		block_size = EVP_CIPHER_block_size(opensslcrypto_instance->crypto_cipher_type);
cf5375
 
cf5375
-		crypto_instance->sec_header_size += (block_size * 2);
cf5375
-		crypto_instance->sec_header_size += SALT_SIZE;
cf5375
 		crypto_instance->sec_salt_size = SALT_SIZE;
cf5375
 		crypto_instance->sec_block_size = block_size;
cf5375
 	}
cf5375
diff --git a/libknet/internals.h b/libknet/internals.h
cf5375
index 3f105a1..31840e4 100644
cf5375
--- a/libknet/internals.h
cf5375
+++ b/libknet/internals.h
cf5375
@@ -71,7 +71,9 @@ struct knet_link {
cf5375
 	uint8_t received_pong;
cf5375
 	struct timespec ping_last;
cf5375
 	/* used by PMTUD thread as temp per-link variables and should always contain the onwire_len value! */
cf5375
-	uint32_t proto_overhead;
cf5375
+	uint32_t proto_overhead;		/* IP + UDP/SCTP overhead. NOT to be confused
cf5375
+						   with stats.proto_overhead that includes also knet headers
cf5375
+						   and crypto headers */
cf5375
 	struct timespec pmtud_last;
cf5375
 	uint32_t last_ping_size;
cf5375
 	uint32_t last_good_mtu;
cf5375
@@ -197,7 +199,6 @@ struct knet_handle {
cf5375
 	int pmtud_forcerun;
cf5375
 	int pmtud_abort;
cf5375
 	struct crypto_instance *crypto_instance;
cf5375
-	size_t sec_header_size;
cf5375
 	size_t sec_block_size;
cf5375
 	size_t sec_hash_size;
cf5375
 	size_t sec_salt_size;
cf5375
diff --git a/libknet/links.c b/libknet/links.c
cf5375
index 51ead5a..03e0af9 100644
cf5375
--- a/libknet/links.c
cf5375
+++ b/libknet/links.c
cf5375
@@ -265,7 +265,32 @@ int knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t l
cf5375
 		host->status.reachable = 1;
cf5375
 		link->status.mtu = KNET_PMTUD_SIZE_V6;
cf5375
 	} else {
cf5375
-		link->status.mtu =  KNET_PMTUD_MIN_MTU_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size;
cf5375
+		/*
cf5375
+		 * calculate the minimum MTU that is safe to use,
cf5375
+		 * based on RFCs and that each network device should
cf5375
+		 * be able to support without any troubles
cf5375
+		 */
cf5375
+		if (link->dynamic == KNET_LINK_STATIC) {
cf5375
+			/*
cf5375
+			 * with static link we can be more precise than using
cf5375
+			 * the generic calc_min_mtu()
cf5375
+			 */
cf5375
+			switch (link->dst_addr.ss_family) {
cf5375
+				case AF_INET6:
cf5375
+					link->status.mtu =  calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V6 - (KNET_PMTUD_OVERHEAD_V6 + link->proto_overhead));
cf5375
+					break;
cf5375
+				case AF_INET:
cf5375
+					link->status.mtu =  calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V4 - (KNET_PMTUD_OVERHEAD_V4 + link->proto_overhead));
cf5375
+					break;
cf5375
+			}
cf5375
+		} else {
cf5375
+			/*
cf5375
+			 * for dynamic links we start with the minimum MTU
cf5375
+			 * possible and PMTUd will kick in immediately
cf5375
+			 * after connection status is 1
cf5375
+			 */
cf5375
+			link->status.mtu =  calc_min_mtu(knet_h);
cf5375
+		}
cf5375
 		link->has_valid_mtu = 1;
cf5375
 	}
cf5375
 
cf5375
diff --git a/libknet/onwire.c b/libknet/onwire.c
cf5375
new file mode 100644
cf5375
index 0000000..143ac4b
cf5375
--- /dev/null
cf5375
+++ b/libknet/onwire.c
cf5375
@@ -0,0 +1,127 @@
cf5375
+/*
cf5375
+ * Copyright (C) 2019 Red Hat, Inc.  All rights reserved.
cf5375
+ *
cf5375
+ * Author: Fabio M. Di Nitto <fabbione@kronosnet.org>
cf5375
+ *
cf5375
+ * This software licensed under LGPL-2.0+
cf5375
+ */
cf5375
+
cf5375
+#include "config.h"
cf5375
+
cf5375
+#include <sys/errno.h>
cf5375
+#include <stdlib.h>
cf5375
+#include <string.h>
cf5375
+
cf5375
+#include "crypto.h"
cf5375
+#include "internals.h"
cf5375
+#include "logging.h"
cf5375
+#include "common.h"
cf5375
+#include "transport_udp.h"
cf5375
+#include "transport_sctp.h"
cf5375
+
cf5375
+/*
cf5375
+ * unencrypted packet looks like:
cf5375
+ *
cf5375
+ * | ip | protocol  | knet_header | unencrypted data                                  |
cf5375
+ * | onwire_len                                                                       |
cf5375
+ * | proto_overhead |
cf5375
+ *                  | data_len                                                        |
cf5375
+ *                                | app MTU                                           |
cf5375
+ *
cf5375
+ * encrypted packet looks like (not to scale):
cf5375
+ *
cf5375
+ * | ip | protocol  | salt | crypto(knet_header | data)      | crypto_data_pad | hash |
cf5375
+ * | onwire_len                                                                       |
cf5375
+ * | proto_overhead |
cf5375
+ *                  | data_len                                                        |
cf5375
+ *                                              | app MTU    |
cf5375
+ *
cf5375
+ * knet_h->sec_block_size is >= 0 if encryption will pad the data
cf5375
+ * knet_h->sec_salt_size is >= 0 if encryption is enabled
cf5375
+ * knet_h->sec_hash_size is >= 0 if signing is enabled
cf5375
+ */
cf5375
+
cf5375
+/*
cf5375
+ * this function takes in the data that we would like to send
cf5375
+ * and tells us the outgoing onwire data size with crypto and
cf5375
+ * all the headers adjustment.
cf5375
+ * calling thread needs to account for protocol overhead.
cf5375
+ */
cf5375
+
cf5375
+size_t calc_data_outlen(knet_handle_t knet_h, size_t inlen)
cf5375
+{
cf5375
+	size_t outlen = inlen, pad_len = 0;
cf5375
+
cf5375
+	if (knet_h->sec_block_size) {
cf5375
+		/*
cf5375
+		 * if the crypto mechanism requires padding, calculate the padding
cf5375
+		 * and add it back to outlen because that's what the crypto layer
cf5375
+		 * would do.
cf5375
+		 */
cf5375
+		pad_len = knet_h->sec_block_size - (outlen % knet_h->sec_block_size);
cf5375
+
cf5375
+		outlen = outlen + pad_len;
cf5375
+	}
cf5375
+
cf5375
+	return outlen + knet_h->sec_salt_size + knet_h->sec_hash_size;
cf5375
+}
cf5375
+
cf5375
+/*
cf5375
+ * this function takes in the data that we would like to send
cf5375
+ * and tells us what is the real maximum data we can send
cf5375
+ * accounting for headers and crypto
cf5375
+ * calling thread needs to account for protocol overhead.
cf5375
+ */
cf5375
+
cf5375
+size_t calc_max_data_outlen(knet_handle_t knet_h, size_t inlen)
cf5375
+{
cf5375
+	size_t outlen = inlen, pad_len = 0;
cf5375
+
cf5375
+	if (knet_h->sec_block_size) {
cf5375
+		/*
cf5375
+		 * drop both salt and hash, that leaves only the crypto data and padding
cf5375
+		 * we need to calculate the padding based on the real encrypted data
cf5375
+		 * that includes the knet_header.
cf5375
+		 */
cf5375
+		outlen = outlen - (knet_h->sec_salt_size + knet_h->sec_hash_size);
cf5375
+
cf5375
+		/*
cf5375
+		 * if the crypto mechanism requires padding, calculate the padding
cf5375
+		 * and remove it, to align the data.
cf5375
+		 * NOTE: we need to remove pad_len + 1 because, based on testing,
cf5375
+		 * if we send data that are already aligned to block_size, the
cf5375
+		 * crypto implementations will add another block_size!
cf5375
+		 * so we want to make sure that our data won't add an unnecessary
cf5375
+		 * block_size that we need to remove later.
cf5375
+		 */
cf5375
+		pad_len = outlen % knet_h->sec_block_size;
cf5375
+
cf5375
+		outlen = outlen - (pad_len + 1);
cf5375
+
cf5375
+		/*
cf5375
+		 * add both hash and salt size back, similar to padding above,
cf5375
+		 * the crypto layer will add them to the outlen
cf5375
+		 */
cf5375
+		outlen = outlen + (knet_h->sec_salt_size + knet_h->sec_hash_size);
cf5375
+	}
cf5375
+
cf5375
+	/*
cf5375
+	 * drop KNET_HEADER_ALL_SIZE to provide a clean application MTU
cf5375
+	 * and various crypto headers
cf5375
+	 */
cf5375
+	outlen = outlen - (KNET_HEADER_ALL_SIZE + knet_h->sec_salt_size + knet_h->sec_hash_size);
cf5375
+
cf5375
+	return outlen;
cf5375
+}
cf5375
+
cf5375
+/*
cf5375
+ * set the lowest possible value as failsafe for all links.
cf5375
+ * KNET_PMTUD_MIN_MTU_V4 < KNET_PMTUD_MIN_MTU_V6
cf5375
+ * KNET_PMTUD_OVERHEAD_V6 > KNET_PMTUD_OVERHEAD_V4
cf5375
+ * KNET_PMTUD_SCTP_OVERHEAD > KNET_PMTUD_UDP_OVERHEAD
cf5375
+ */
cf5375
+
cf5375
+size_t calc_min_mtu(knet_handle_t knet_h)
cf5375
+{
cf5375
+	return calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V4 - (KNET_PMTUD_OVERHEAD_V6 + KNET_PMTUD_SCTP_OVERHEAD));
cf5375
+}
cf5375
diff --git a/libknet/onwire.h b/libknet/onwire.h
cf5375
index 9815bc3..74d4d09 100644
cf5375
--- a/libknet/onwire.h
cf5375
+++ b/libknet/onwire.h
cf5375
@@ -120,7 +120,9 @@ struct knet_header_payload_ping {
cf5375
 #define KNET_PMTUD_SIZE_V4 65535
cf5375
 #define KNET_PMTUD_SIZE_V6 KNET_PMTUD_SIZE_V4
cf5375
 
cf5375
-/* These two get the protocol-specific overheads added to them */
cf5375
+/*
cf5375
+ * IPv4/IPv6 header size
cf5375
+ */
cf5375
 #define KNET_PMTUD_OVERHEAD_V4 20
cf5375
 #define KNET_PMTUD_OVERHEAD_V6 40
cf5375
 
cf5375
@@ -199,4 +201,8 @@ struct knet_header {
cf5375
 #define KNET_HEADER_PMTUD_SIZE (KNET_HEADER_SIZE + sizeof(struct knet_header_payload_pmtud))
cf5375
 #define KNET_HEADER_DATA_SIZE (KNET_HEADER_SIZE + sizeof(struct knet_header_payload_data))
cf5375
 
cf5375
+size_t calc_data_outlen(knet_handle_t knet_h, size_t inlen);
cf5375
+size_t calc_max_data_outlen(knet_handle_t knet_h, size_t inlen);
cf5375
+size_t calc_min_mtu(knet_handle_t knet_h);
cf5375
+
cf5375
 #endif
cf5375
diff --git a/libknet/tests/Makefile.am b/libknet/tests/Makefile.am
cf5375
index 3346596..9160780 100644
cf5375
--- a/libknet/tests/Makefile.am
cf5375
+++ b/libknet/tests/Makefile.am
cf5375
@@ -38,6 +38,12 @@ int_checks		= \
cf5375
 
cf5375
 fun_checks		=
cf5375
 
cf5375
+# checks below need to be executed manually
cf5375
+# or with a specifi environment
cf5375
+
cf5375
+long_run_checks		= \
cf5375
+			  fun_pmtud_crypto_test
cf5375
+
cf5375
 benchmarks		= \
cf5375
 			  knet_bench_test
cf5375
 
cf5375
@@ -45,6 +51,7 @@ noinst_PROGRAMS		= \
cf5375
 			  api_knet_handle_new_limit_test \
cf5375
 			  pckt_test \
cf5375
 			  $(benchmarks) \
cf5375
+			  $(long_run_checks) \
cf5375
 			  $(check_PROGRAMS)
cf5375
 
cf5375
 noinst_SCRIPTS		= \
cf5375
@@ -71,6 +78,7 @@ int_links_acl_ip_test_SOURCES = int_links_acl_ip.c \
cf5375
 				../logging.c \
cf5375
 				../netutils.c \
cf5375
 				../threads_common.c \
cf5375
+				../onwire.c \
cf5375
 				../transports.c \
cf5375
 				../transport_common.c \
cf5375
 				../transport_loopback.c \
cf5375
@@ -88,4 +96,9 @@ knet_bench_test_SOURCES	= knet_bench.c \
cf5375
 			  ../logging.c \
cf5375
 			  ../compat.c \
cf5375
 			  ../transport_common.c \
cf5375
-			  ../threads_common.c
cf5375
+			  ../threads_common.c \
cf5375
+			  ../onwire.c
cf5375
+
cf5375
+fun_pmtud_crypto_test_SOURCES = fun_pmtud_crypto.c \
cf5375
+				test-common.c \
cf5375
+				../onwire.c
cf5375
diff --git a/libknet/tests/api_knet_send_crypto.c b/libknet/tests/api_knet_send_crypto.c
cf5375
index 11de857..5fc5463 100644
cf5375
--- a/libknet/tests/api_knet_send_crypto.c
cf5375
+++ b/libknet/tests/api_knet_send_crypto.c
cf5375
@@ -67,7 +67,7 @@ static void test(const char *model)
cf5375
 	memset(&knet_handle_crypto_cfg, 0, sizeof(struct knet_handle_crypto_cfg));
cf5375
 	strncpy(knet_handle_crypto_cfg.crypto_model, model, sizeof(knet_handle_crypto_cfg.crypto_model) - 1);
cf5375
 	strncpy(knet_handle_crypto_cfg.crypto_cipher_type, "aes128", sizeof(knet_handle_crypto_cfg.crypto_cipher_type) - 1);
cf5375
-	strncpy(knet_handle_crypto_cfg.crypto_hash_type, "sha1", sizeof(knet_handle_crypto_cfg.crypto_hash_type) - 1);
cf5375
+	strncpy(knet_handle_crypto_cfg.crypto_hash_type, "sha256", sizeof(knet_handle_crypto_cfg.crypto_hash_type) - 1);
cf5375
 	knet_handle_crypto_cfg.private_key_len = 2000;
cf5375
 
cf5375
 	if (knet_handle_crypto(knet_h, &knet_handle_crypto_cfg)) {
cf5375
diff --git a/libknet/tests/fun_pmtud_crypto.c b/libknet/tests/fun_pmtud_crypto.c
cf5375
new file mode 100644
cf5375
index 0000000..91c062c
cf5375
--- /dev/null
cf5375
+++ b/libknet/tests/fun_pmtud_crypto.c
cf5375
@@ -0,0 +1,326 @@
cf5375
+/*
cf5375
+ * Copyright (C) 2019 Red Hat, Inc.  All rights reserved.
cf5375
+ *
cf5375
+ * Authors: Fabio M. Di Nitto <fabbione@kronosnet.org>
cf5375
+ *
cf5375
+ * This software licensed under GPL-2.0+
cf5375
+ */
cf5375
+
cf5375
+#include "config.h"
cf5375
+
cf5375
+#include <errno.h>
cf5375
+#include <stdio.h>
cf5375
+#include <stdlib.h>
cf5375
+#include <string.h>
cf5375
+#include <unistd.h>
cf5375
+#include <inttypes.h>
cf5375
+#include <sys/ioctl.h>
cf5375
+#include <net/ethernet.h>
cf5375
+#include <ifaddrs.h>
cf5375
+#include <net/if.h>
cf5375
+
cf5375
+#include "libknet.h"
cf5375
+
cf5375
+#include "compress.h"
cf5375
+#include "internals.h"
cf5375
+#include "netutils.h"
cf5375
+#include "onwire.h"
cf5375
+#include "test-common.h"
cf5375
+
cf5375
+static int private_data;
cf5375
+
cf5375
+static void sock_notify(void *pvt_data,
cf5375
+			int datafd,
cf5375
+			int8_t channel,
cf5375
+			uint8_t tx_rx,
cf5375
+			int error,
cf5375
+			int errorno)
cf5375
+{
cf5375
+	return;
cf5375
+}
cf5375
+
cf5375
+static int iface_fd = 0;
cf5375
+static int default_mtu = 0;
cf5375
+
cf5375
+#ifdef KNET_LINUX
cf5375
+const char *loopback = "lo";
cf5375
+#endif
cf5375
+#ifdef KNET_BSD
cf5375
+const char *loopback = "lo0";
cf5375
+#endif
cf5375
+
cf5375
+static int fd_init(void)
cf5375
+{
cf5375
+#ifdef KNET_LINUX
cf5375
+	return socket(AF_INET, SOCK_STREAM, 0);
cf5375
+#endif
cf5375
+#ifdef KNET_BSD
cf5375
+	return socket(AF_LOCAL, SOCK_DGRAM, 0);
cf5375
+#endif
cf5375
+	return -1;
cf5375
+}
cf5375
+
cf5375
+static int set_iface_mtu(uint32_t mtu)
cf5375
+{
cf5375
+	int err = 0;
cf5375
+	struct ifreq ifr;
cf5375
+
cf5375
+	memset(&ifr, 0, sizeof(struct ifreq));
cf5375
+	strncpy(ifr.ifr_name, loopback, IFNAMSIZ - 1);
cf5375
+	ifr.ifr_mtu = mtu;
cf5375
+
cf5375
+	err = ioctl(iface_fd, SIOCSIFMTU, &ifr);
cf5375
+
cf5375
+	return err;
cf5375
+}
cf5375
+
cf5375
+static int get_iface_mtu(void)
cf5375
+{
cf5375
+	int err = 0, savederrno = 0;
cf5375
+	struct ifreq ifr;
cf5375
+
cf5375
+	memset(&ifr, 0, sizeof(struct ifreq));
cf5375
+	strncpy(ifr.ifr_name, loopback, IFNAMSIZ - 1);
cf5375
+
cf5375
+	err = ioctl(iface_fd, SIOCGIFMTU, &ifr);
cf5375
+	if (err) {
cf5375
+		savederrno = errno;
cf5375
+		goto out_clean;
cf5375
+	}
cf5375
+
cf5375
+	err = ifr.ifr_mtu;
cf5375
+
cf5375
+out_clean:
cf5375
+	errno = savederrno;
cf5375
+	return err;
cf5375
+}
cf5375
+
cf5375
+static int exit_local(int code)
cf5375
+{
cf5375
+	set_iface_mtu(default_mtu);
cf5375
+	close(iface_fd);
cf5375
+	iface_fd = 0;
cf5375
+	exit(code);
cf5375
+}
cf5375
+
cf5375
+static void test_mtu(const char *model, const char *crypto, const char *hash)
cf5375
+{
cf5375
+	knet_handle_t knet_h;
cf5375
+	int logfds[2];
cf5375
+	int datafd = 0;
cf5375
+	int8_t channel = 0;
cf5375
+	struct sockaddr_storage lo;
cf5375
+	struct knet_handle_crypto_cfg knet_handle_crypto_cfg;
cf5375
+	unsigned int data_mtu, expected_mtu;
cf5375
+	size_t calculated_iface_mtu = 0, detected_iface_mtu = 0;
cf5375
+
cf5375
+	if (make_local_sockaddr(&lo, 0) < 0) {
cf5375
+		printf("Unable to convert loopback to sockaddr: %s\n", strerror(errno));
cf5375
+		exit_local(FAIL);
cf5375
+	}
cf5375
+
cf5375
+	setup_logpipes(logfds);
cf5375
+
cf5375
+	knet_h = knet_handle_start(logfds, KNET_LOG_DEBUG);
cf5375
+
cf5375
+	flush_logs(logfds[0], stdout);
cf5375
+
cf5375
+	printf("Test knet_send with %s and valid data\n", model);
cf5375
+
cf5375
+	memset(&knet_handle_crypto_cfg, 0, sizeof(struct knet_handle_crypto_cfg));
cf5375
+	strncpy(knet_handle_crypto_cfg.crypto_model, model, sizeof(knet_handle_crypto_cfg.crypto_model) - 1);
cf5375
+	strncpy(knet_handle_crypto_cfg.crypto_cipher_type, crypto, sizeof(knet_handle_crypto_cfg.crypto_cipher_type) - 1);
cf5375
+	strncpy(knet_handle_crypto_cfg.crypto_hash_type, hash, sizeof(knet_handle_crypto_cfg.crypto_hash_type) - 1);
cf5375
+	knet_handle_crypto_cfg.private_key_len = 2000;
cf5375
+
cf5375
+	if (knet_handle_crypto(knet_h, &knet_handle_crypto_cfg)) {
cf5375
+		printf("knet_handle_crypto failed with correct config: %s\n", strerror(errno));
cf5375
+		knet_handle_free(knet_h);
cf5375
+		flush_logs(logfds[0], stdout);
cf5375
+		close_logpipes(logfds);
cf5375
+		exit_local(FAIL);
cf5375
+        }
cf5375
+
cf5375
+	if (knet_handle_enable_sock_notify(knet_h, &private_data, sock_notify) < 0) {
cf5375
+		printf("knet_handle_enable_sock_notify failed: %s\n", strerror(errno));
cf5375
+		knet_handle_free(knet_h);
cf5375
+		flush_logs(logfds[0], stdout);
cf5375
+		close_logpipes(logfds);
cf5375
+		exit_local(FAIL);
cf5375
+        }
cf5375
+
cf5375
+	datafd = 0;
cf5375
+	channel = -1;
cf5375
+
cf5375
+	if (knet_handle_add_datafd(knet_h, &datafd, &channel) < 0) {
cf5375
+		printf("knet_handle_add_datafd failed: %s\n", strerror(errno));
cf5375
+		knet_handle_free(knet_h);
cf5375
+		flush_logs(logfds[0], stdout);
cf5375
+		close_logpipes(logfds);
cf5375
+		exit_local(FAIL);
cf5375
+	}
cf5375
+
cf5375
+	if (knet_host_add(knet_h, 1) < 0) {
cf5375
+		printf("knet_host_add failed: %s\n", strerror(errno));
cf5375
+		knet_handle_free(knet_h);
cf5375
+		flush_logs(logfds[0], stdout);
cf5375
+		close_logpipes(logfds);
cf5375
+		exit_local(FAIL);
cf5375
+	}
cf5375
+
cf5375
+	if (knet_link_set_config(knet_h, 1, 0, KNET_TRANSPORT_UDP, &lo, &lo, 0) < 0) {
cf5375
+		printf("Unable to configure link: %s\n", strerror(errno));
cf5375
+		knet_host_remove(knet_h, 1);
cf5375
+		knet_handle_free(knet_h);
cf5375
+		flush_logs(logfds[0], stdout);
cf5375
+		close_logpipes(logfds);
cf5375
+		exit_local(FAIL);
cf5375
+	}
cf5375
+
cf5375
+	if (knet_link_set_pong_count(knet_h, 1, 0, 1) < 0) {
cf5375
+		printf("knet_link_set_pong_count failed: %s\n", strerror(errno));
cf5375
+		knet_host_remove(knet_h, 1);
cf5375
+		knet_handle_free(knet_h);
cf5375
+		flush_logs(logfds[0], stdout);
cf5375
+		close_logpipes(logfds);
cf5375
+		exit_local(FAIL);
cf5375
+	}
cf5375
+
cf5375
+	if (knet_link_set_enable(knet_h, 1, 0, 1) < 0) {
cf5375
+		printf("knet_link_set_enable failed: %s\n", strerror(errno));
cf5375
+		knet_link_clear_config(knet_h, 1, 0);
cf5375
+		knet_host_remove(knet_h, 1);
cf5375
+		knet_handle_free(knet_h);
cf5375
+		flush_logs(logfds[0], stdout);
cf5375
+		close_logpipes(logfds);
cf5375
+		exit_local(FAIL);
cf5375
+	}
cf5375
+
cf5375
+	if (wait_for_host(knet_h, 1, 4, logfds[0], stdout) < 0) {
cf5375
+		printf("timeout waiting for host to be reachable");
cf5375
+		knet_link_set_enable(knet_h, 1, 0, 0);
cf5375
+		knet_link_clear_config(knet_h, 1, 0);
cf5375
+		knet_host_remove(knet_h, 1);
cf5375
+		knet_handle_free(knet_h);
cf5375
+		flush_logs(logfds[0], stdout);
cf5375
+		close_logpipes(logfds);
cf5375
+		exit_local(FAIL);
cf5375
+	}
cf5375
+
cf5375
+	flush_logs(logfds[0], stdout);
cf5375
+
cf5375
+	if (knet_handle_pmtud_get(knet_h, &data_mtu) < 0) {
cf5375
+		printf("knet_handle_pmtud_get failed error: %s\n", strerror(errno));
cf5375
+		knet_link_set_enable(knet_h, 1, 0, 0);
cf5375
+		knet_link_clear_config(knet_h, 1, 0);
cf5375
+		knet_host_remove(knet_h, 1);
cf5375
+		knet_handle_free(knet_h);
cf5375
+		flush_logs(logfds[0], stdout);
cf5375
+		close_logpipes(logfds);
cf5375
+		exit_local(FAIL);
cf5375
+	}
cf5375
+
cf5375
+	calculated_iface_mtu = calc_data_outlen(knet_h, data_mtu + KNET_HEADER_ALL_SIZE) + 28;
cf5375
+	detected_iface_mtu = get_iface_mtu();
cf5375
+	/*
cf5375
+	 * 28 = 20 IP header + 8 UDP header
cf5375
+	 */
cf5375
+	expected_mtu = calc_max_data_outlen(knet_h, detected_iface_mtu - 28);
cf5375
+
cf5375
+	if (expected_mtu != data_mtu) {
cf5375
+		printf("Wrong MTU detected! interface mtu: %zu knet mtu: %u expected mtu: %u\n", detected_iface_mtu, data_mtu, expected_mtu);
cf5375
+		knet_link_set_enable(knet_h, 1, 0, 0);
cf5375
+		knet_link_clear_config(knet_h, 1, 0);
cf5375
+		knet_host_remove(knet_h, 1);
cf5375
+		knet_handle_free(knet_h);
cf5375
+		flush_logs(logfds[0], stdout);
cf5375
+		close_logpipes(logfds);
cf5375
+		exit_local(FAIL);
cf5375
+	}
cf5375
+
cf5375
+	if ((detected_iface_mtu - calculated_iface_mtu) >= knet_h->sec_block_size) {
cf5375
+		printf("Wrong MTU detected! real iface mtu: %zu calculated: %zu\n", detected_iface_mtu, calculated_iface_mtu);
cf5375
+		knet_link_set_enable(knet_h, 1, 0, 0);
cf5375
+		knet_link_clear_config(knet_h, 1, 0);
cf5375
+		knet_host_remove(knet_h, 1);
cf5375
+		knet_handle_free(knet_h);
cf5375
+		flush_logs(logfds[0], stdout);
cf5375
+		close_logpipes(logfds);
cf5375
+		exit_local(FAIL);
cf5375
+	}
cf5375
+
cf5375
+	knet_link_set_enable(knet_h, 1, 0, 0);
cf5375
+	knet_link_clear_config(knet_h, 1, 0);
cf5375
+	knet_host_remove(knet_h, 1);
cf5375
+	knet_handle_free(knet_h);
cf5375
+	flush_logs(logfds[0], stdout);
cf5375
+	close_logpipes(logfds);
cf5375
+}
cf5375
+
cf5375
+static void test(const char *model, const char *crypto, const char *hash)
cf5375
+{
cf5375
+	int i = 576;
cf5375
+	int max = 65535;
cf5375
+
cf5375
+	while (i <= max) {
cf5375
+		printf("Setting interface MTU to: %i\n", i);
cf5375
+		set_iface_mtu(i);
cf5375
+		test_mtu(model, crypto, hash);
cf5375
+		if (i == max) {
cf5375
+			break;
cf5375
+		}
cf5375
+		i = i + 15;
cf5375
+		if (i > max) {
cf5375
+			i = max;
cf5375
+		}
cf5375
+	}
cf5375
+}
cf5375
+
cf5375
+int main(int argc, char *argv[])
cf5375
+{
cf5375
+	struct knet_crypto_info crypto_list[16];
cf5375
+	size_t crypto_list_entries;
cf5375
+
cf5375
+#ifdef KNET_BSD
cf5375
+	if (is_memcheck() || is_helgrind()) {
cf5375
+		printf("valgrind-freebsd cannot run this test properly. Skipping\n");
cf5375
+		return SKIP;
cf5375
+	}
cf5375
+#endif
cf5375
+
cf5375
+	if (geteuid() != 0) {
cf5375
+		printf("This test requires root privileges\n");
cf5375
+		return SKIP;
cf5375
+	}
cf5375
+
cf5375
+	iface_fd = fd_init();
cf5375
+	if (iface_fd < 0) {
cf5375
+		printf("fd_init failed: %s\n", strerror(errno));
cf5375
+		return FAIL;
cf5375
+	}
cf5375
+
cf5375
+	default_mtu = get_iface_mtu();
cf5375
+	if (default_mtu < 0) {
cf5375
+		printf("get_iface_mtu failed: %s\n", strerror(errno));
cf5375
+		return FAIL;
cf5375
+	}
cf5375
+
cf5375
+	memset(crypto_list, 0, sizeof(crypto_list));
cf5375
+
cf5375
+	if (knet_get_crypto_list(crypto_list, &crypto_list_entries) < 0) {
cf5375
+		printf("knet_get_crypto_list failed: %s\n", strerror(errno));
cf5375
+		return FAIL;
cf5375
+	}
cf5375
+
cf5375
+	if (crypto_list_entries == 0) {
cf5375
+		printf("no crypto modules detected. Skipping\n");
cf5375
+		return SKIP;
cf5375
+	}
cf5375
+
cf5375
+	test(crypto_list[0].name, "aes128", "sha1");
cf5375
+	test(crypto_list[0].name, "aes128", "sha256");
cf5375
+	test(crypto_list[0].name, "aes256", "sha1");
cf5375
+	test(crypto_list[0].name, "aes256", "sha256");
cf5375
+
cf5375
+	exit_local(PASS);
cf5375
+}
cf5375
diff --git a/libknet/threads_common.c b/libknet/threads_common.c
cf5375
index 1f3e1e3..03edfc4 100644
cf5375
--- a/libknet/threads_common.c
cf5375
+++ b/libknet/threads_common.c
cf5375
@@ -161,7 +161,7 @@ void force_pmtud_run(knet_handle_t knet_h, uint8_t subsystem, uint8_t reset_mtu)
cf5375
 {
cf5375
 	if (reset_mtu) {
cf5375
 		log_debug(knet_h, subsystem, "PMTUd has been reset to default");
cf5375
-		knet_h->data_mtu = KNET_PMTUD_MIN_MTU_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size;
cf5375
+		knet_h->data_mtu = calc_min_mtu(knet_h);
cf5375
 		if (knet_h->pmtud_notify_fn) {
cf5375
 			knet_h->pmtud_notify_fn(knet_h->pmtud_notify_fn_private_data,
cf5375
 						knet_h->data_mtu);
cf5375
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
cf5375
index 1a19806..1dd1788 100644
cf5375
--- a/libknet/threads_pmtud.c
cf5375
+++ b/libknet/threads_pmtud.c
cf5375
@@ -25,16 +25,16 @@
cf5375
 static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link)
cf5375
 {
cf5375
 	int err, ret, savederrno, mutex_retry_limit, failsafe, use_kernel_mtu, warn_once;
cf5375
-	uint32_t kernel_mtu; /* record kernel_mtu from EMSGSIZE */
cf5375
-	size_t onwire_len;   /* current packet onwire size */
cf5375
-	size_t overhead_len; /* onwire packet overhead (protocol based) */
cf5375
-	size_t max_mtu_len;  /* max mtu for protocol */
cf5375
-	size_t data_len;     /* how much data we can send in the packet
cf5375
-			      * generally would be onwire_len - overhead_len
cf5375
-			      * needs to be adjusted for crypto
cf5375
-			      */
cf5375
-	size_t pad_len;	     /* crypto packet pad size, needs to move into crypto.c callbacks */
cf5375
-	ssize_t len;	     /* len of what we were able to sendto onwire */
cf5375
+	uint32_t kernel_mtu;		/* record kernel_mtu from EMSGSIZE */
cf5375
+	size_t onwire_len;   		/* current packet onwire size */
cf5375
+	size_t ipproto_overhead_len;	/* onwire packet overhead (protocol based) */
cf5375
+	size_t max_mtu_len;		/* max mtu for protocol */
cf5375
+	size_t data_len;		/* how much data we can send in the packet
cf5375
+					 * generally would be onwire_len - ipproto_overhead_len
cf5375
+					 * needs to be adjusted for crypto
cf5375
+					 */
cf5375
+	size_t app_mtu_len;		/* real data that we can send onwire */
cf5375
+	ssize_t len;			/* len of what we were able to sendto onwire */
cf5375
 
cf5375
 	struct timespec ts;
cf5375
 	unsigned long long pong_timeout_adj_tmp;
cf5375
@@ -45,20 +45,16 @@ static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_
cf5375
 	mutex_retry_limit = 0;
cf5375
 	failsafe = 0;
cf5375
 
cf5375
-	dst_link->last_bad_mtu = 0;
cf5375
-
cf5375
 	knet_h->pmtudbuf->khp_pmtud_link = dst_link->link_id;
cf5375
 
cf5375
 	switch (dst_link->dst_addr.ss_family) {
cf5375
 		case AF_INET6:
cf5375
 			max_mtu_len = KNET_PMTUD_SIZE_V6;
cf5375
-			overhead_len = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead;
cf5375
-			dst_link->last_good_mtu = dst_link->last_ping_size + overhead_len;
cf5375
+			ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead;
cf5375
 			break;
cf5375
 		case AF_INET:
cf5375
 			max_mtu_len = KNET_PMTUD_SIZE_V4;
cf5375
-			overhead_len = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead;
cf5375
-			dst_link->last_good_mtu = dst_link->last_ping_size + overhead_len;
cf5375
+			ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead;
cf5375
 			break;
cf5375
 		default:
cf5375
 			log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD aborted, unknown protocol");
cf5375
@@ -66,6 +62,9 @@ static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_
cf5375
 			break;
cf5375
 	}
cf5375
 
cf5375
+	dst_link->last_bad_mtu = 0;
cf5375
+	dst_link->last_good_mtu = dst_link->last_ping_size + ipproto_overhead_len;
cf5375
+
cf5375
 	/*
cf5375
 	 * discovery starts from the top because kernel will
cf5375
 	 * refuse to send packets > current iface mtu.
cf5375
@@ -92,107 +91,39 @@ restart:
cf5375
 	}
cf5375
 
cf5375
 	/*
cf5375
-	 * unencrypted packet looks like:
cf5375
-	 *
cf5375
-	 * | ip | protocol | knet_header | unencrypted data                                  |
cf5375
-	 * | onwire_len                                                                      |
cf5375
-	 * | overhead_len  |
cf5375
-	 *                 | data_len                                                        |
cf5375
-	 *                               | app MTU                                           |
cf5375
-	 *
cf5375
-	 * encrypted packet looks like (not to scale):
cf5375
-	 *
cf5375
-	 * | ip | protocol | salt | crypto(knet_header | data)      | crypto_data_pad | hash |
cf5375
-	 * | onwire_len                                                                      |
cf5375
-	 * | overhead_len  |
cf5375
-	 *                 | data_len                                                        |
cf5375
-	 *                                             | app MTU    |
cf5375
-	 *
cf5375
-	 * knet_h->sec_block_size is >= 0 if encryption will pad the data
cf5375
-	 * knet_h->sec_salt_size is >= 0 if encryption is enabled
cf5375
-	 * knet_h->sec_hash_size is >= 0 if signing is enabled
cf5375
+	 * common to all packets
cf5375
 	 */
cf5375
 
cf5375
 	/*
cf5375
-	 * common to all packets
cf5375
+	 * calculate the application MTU based on current onwire_len minus ipproto_overhead_len
cf5375
 	 */
cf5375
-	data_len = onwire_len - overhead_len;
cf5375
-
cf5375
-	if (knet_h->crypto_instance) {
cf5375
 
cf5375
-realign:
cf5375
-		if (knet_h->sec_block_size) {
cf5375
+	app_mtu_len = calc_max_data_outlen(knet_h, onwire_len - ipproto_overhead_len);
cf5375
 
cf5375
-			/*
cf5375
-			 * drop both salt and hash, that leaves only the crypto data and padding
cf5375
-			 * we need to calculate the padding based on the real encrypted data.
cf5375
-			 */
cf5375
-			data_len = data_len - (knet_h->sec_salt_size + knet_h->sec_hash_size);
cf5375
-
cf5375
-			/*
cf5375
-			 * if the crypto mechanism requires padding, calculate the padding
cf5375
-			 * and add it back to data_len because that's what the crypto layer
cf5375
-			 * would do.
cf5375
-			 */
cf5375
-			pad_len = knet_h->sec_block_size - (data_len % knet_h->sec_block_size);
cf5375
-
cf5375
-			/*
cf5375
-			 * if are at the boundary, reset padding
cf5375
-			 */
cf5375
-			if (pad_len == knet_h->sec_block_size) {
cf5375
-				pad_len = 0;
cf5375
-			}
cf5375
-			data_len = data_len + pad_len;
cf5375
-
cf5375
-			/*
cf5375
-			 * if our current data_len is higher than max_mtu_len
cf5375
-			 * then we need to reduce by padding size (that is our
cf5375
-			 * increment / decrement value)
cf5375
-			 *
cf5375
-			 * this generally happens only on the first PMTUd run
cf5375
-			 */
cf5375
-			while (data_len + overhead_len >= max_mtu_len) {
cf5375
-				data_len = data_len - knet_h->sec_block_size;
cf5375
-			}
cf5375
+	/*
cf5375
+	 * recalculate onwire len back that might be different based
cf5375
+	 * on data padding from crypto layer.
cf5375
+	 */
cf5375
 
cf5375
-			/*
cf5375
-			 * add both hash and salt size back, similar to padding above,
cf5375
-			 * the crypto layer will add them to the data_len
cf5375
-			 */
cf5375
-			data_len = data_len + (knet_h->sec_salt_size + knet_h->sec_hash_size);
cf5375
-		}
cf5375
+	onwire_len = calc_data_outlen(knet_h, app_mtu_len + KNET_HEADER_ALL_SIZE) + ipproto_overhead_len;
cf5375
 
cf5375
-		if (dst_link->last_bad_mtu) {
cf5375
-			if (data_len + overhead_len >= dst_link->last_bad_mtu) {
cf5375
-				/*
cf5375
-				 * reduce data_len to something lower than last_bad_mtu, overhead_len
cf5375
-				 * and sec_block_size (decrementing step) - 1 (granularity)
cf5375
-				 */
cf5375
-				data_len = dst_link->last_bad_mtu - overhead_len - knet_h->sec_block_size - 1;
cf5375
-				if (knet_h->sec_block_size) {
cf5375
-					/*
cf5375
-					 * make sure that data_len is aligned to the sec_block_size boundary
cf5375
-					 */
cf5375
-					goto realign;
cf5375
-				}
cf5375
-			}
cf5375
-		}
cf5375
+	/*
cf5375
+	 * calculate the size of what we need to send to sendto(2).
cf5375
+	 * see also onwire.c for packet format explanation.
cf5375
+	 */
cf5375
+	data_len = app_mtu_len + knet_h->sec_hash_size + knet_h->sec_salt_size + KNET_HEADER_ALL_SIZE;
cf5375
 
cf5375
-		if (data_len < (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size) + 1) {
cf5375
+	if (knet_h->crypto_instance) {
cf5375
+		if (data_len < (knet_h->sec_hash_size + knet_h->sec_salt_size) + 1) {
cf5375
 			log_debug(knet_h, KNET_SUB_PMTUD, "Aborting PMTUD process: link mtu smaller than crypto header detected (link might have been disconnected)");
cf5375
 			return -1;
cf5375
 		}
cf5375
 
cf5375
-		/*
cf5375
-		 * recalculate onwire_len based on crypto information
cf5375
-		 * and place it in the PMTUd packet info
cf5375
-		 */
cf5375
-		onwire_len = data_len + overhead_len;
cf5375
 		knet_h->pmtudbuf->khp_pmtud_size = onwire_len;
cf5375
 
cf5375
 		if (crypto_encrypt_and_sign(knet_h,
cf5375
 					    (const unsigned char *)knet_h->pmtudbuf,
cf5375
-					    data_len - (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size),
cf5375
+					    data_len - (knet_h->sec_hash_size + knet_h->sec_salt_size),
cf5375
 					    knet_h->pmtudbuf_crypt,
cf5375
 					    (ssize_t *)&data_len) < 0) {
cf5375
 			log_debug(knet_h, KNET_SUB_PMTUD, "Unable to crypto pmtud packet");
cf5375
@@ -201,11 +132,8 @@ realign:
cf5375
 
cf5375
 		outbuf = knet_h->pmtudbuf_crypt;
cf5375
 		knet_h->stats_extra.tx_crypt_pmtu_packets++;
cf5375
-
cf5375
 	} else {
cf5375
-
cf5375
 		knet_h->pmtudbuf->khp_pmtud_size = onwire_len;
cf5375
-
cf5375
 	}
cf5375
 
cf5375
 	/* link has gone down, aborting pmtud */
cf5375
@@ -417,7 +345,7 @@ retry:
cf5375
 				/*
cf5375
 				 * account for IP overhead, knet headers and crypto in PMTU calculation
cf5375
 				 */
cf5375
-				dst_link->status.mtu = onwire_len - dst_link->status.proto_overhead;
cf5375
+				dst_link->status.mtu = calc_max_data_outlen(knet_h, onwire_len - ipproto_overhead_len);
cf5375
 				pthread_mutex_unlock(&knet_h->pmtud_mutex);
cf5375
 				return 0;
cf5375
 			}
cf5375
@@ -437,7 +365,7 @@ retry:
cf5375
 	goto restart;
cf5375
 }
cf5375
 
cf5375
-static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link, unsigned int *min_mtu, int force_run)
cf5375
+static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link, int force_run)
cf5375
 {
cf5375
 	uint8_t saved_valid_pmtud;
cf5375
 	unsigned int saved_pmtud;
cf5375
@@ -455,17 +383,22 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host,
cf5375
 		timespec_diff(dst_link->pmtud_last, clock_now, &diff_pmtud);
cf5375
 
cf5375
 		if (diff_pmtud < interval) {
cf5375
-			*min_mtu = dst_link->status.mtu;
cf5375
 			return dst_link->has_valid_mtu;
cf5375
 		}
cf5375
 	}
cf5375
 
cf5375
+	/*
cf5375
+	 * status.proto_overhead should include all IP/(UDP|SCTP)/knet headers
cf5375
+	 *
cf5375
+	 * please note that it is not the same as link->proto_overhead that
cf5375
+	 * includes only either UDP or SCTP (at the moment) overhead.
cf5375
+	 */
cf5375
 	switch (dst_link->dst_addr.ss_family) {
cf5375
 		case AF_INET6:
cf5375
-			dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_header_size;
cf5375
+			dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_hash_size + knet_h->sec_salt_size;
cf5375
 			break;
cf5375
 		case AF_INET:
cf5375
-			dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_header_size;
cf5375
+			dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_hash_size + knet_h->sec_salt_size;
cf5375
 			break;
cf5375
 	}
cf5375
 
cf5375
@@ -486,26 +419,6 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host,
cf5375
 		dst_link->has_valid_mtu = 0;
cf5375
 	} else {
cf5375
 		dst_link->has_valid_mtu = 1;
cf5375
-		switch (dst_link->dst_addr.ss_family) {
cf5375
-			case AF_INET6:
cf5375
-				if (((dst_link->status.mtu + dst_link->status.proto_overhead) < KNET_PMTUD_MIN_MTU_V6) ||
cf5375
-				    ((dst_link->status.mtu + dst_link->status.proto_overhead) > KNET_PMTUD_SIZE_V6)) {
cf5375
-					log_debug(knet_h, KNET_SUB_PMTUD,
cf5375
-						  "PMTUD detected an IPv6 MTU out of bound value (%u) for host: %u link: %u.",
cf5375
-						  dst_link->status.mtu + dst_link->status.proto_overhead, dst_host->host_id, dst_link->link_id);
cf5375
-					dst_link->has_valid_mtu = 0;
cf5375
-				}
cf5375
-				break;
cf5375
-			case AF_INET:
cf5375
-				if (((dst_link->status.mtu + dst_link->status.proto_overhead) < KNET_PMTUD_MIN_MTU_V4) ||
cf5375
-				    ((dst_link->status.mtu + dst_link->status.proto_overhead) > KNET_PMTUD_SIZE_V4)) {
cf5375
-					log_debug(knet_h, KNET_SUB_PMTUD,
cf5375
-						  "PMTUD detected an IPv4 MTU out of bound value (%u) for host: %u link: %u.",
cf5375
-						  dst_link->status.mtu + dst_link->status.proto_overhead, dst_host->host_id, dst_link->link_id);
cf5375
-					dst_link->has_valid_mtu = 0;
cf5375
-				}
cf5375
-				break;
cf5375
-		}
cf5375
 		if (dst_link->has_valid_mtu) {
cf5375
 			if ((saved_pmtud) && (saved_pmtud != dst_link->status.mtu)) {
cf5375
 				log_info(knet_h, KNET_SUB_PMTUD, "PMTUD link change for host: %u link: %u from %u to %u",
cf5375
@@ -513,9 +426,6 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host,
cf5375
 			}
cf5375
 			log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD completed for host: %u link: %u current link mtu: %u",
cf5375
 				  dst_host->host_id, dst_link->link_id, dst_link->status.mtu);
cf5375
-			if (dst_link->status.mtu < *min_mtu) {
cf5375
-				*min_mtu = dst_link->status.mtu;
cf5375
-			}
cf5375
 
cf5375
 			/*
cf5375
 			 * set pmtud_last, if we can, after we are done with the PMTUd process
cf5375
@@ -541,14 +451,14 @@ void *_handle_pmtud_link_thread(void *data)
cf5375
 	struct knet_host *dst_host;
cf5375
 	struct knet_link *dst_link;
cf5375
 	int link_idx;
cf5375
-	unsigned int min_mtu, have_mtu;
cf5375
+	unsigned int have_mtu;
cf5375
 	unsigned int lower_mtu;
cf5375
 	int link_has_mtu;
cf5375
 	int force_run = 0;
cf5375
 
cf5375
 	set_thread_status(knet_h, KNET_THREAD_PMTUD, KNET_THREAD_STARTED);
cf5375
 
cf5375
-	knet_h->data_mtu = KNET_PMTUD_MIN_MTU_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size;
cf5375
+	knet_h->data_mtu = calc_min_mtu(knet_h);
cf5375
 
cf5375
 	/* preparing pmtu buffer */
cf5375
 	knet_h->pmtudbuf->kh_version = KNET_HEADER_VERSION;
cf5375
@@ -578,7 +488,6 @@ void *_handle_pmtud_link_thread(void *data)
cf5375
 		}
cf5375
 
cf5375
 		lower_mtu = KNET_PMTUD_SIZE_V4;
cf5375
-		min_mtu = KNET_PMTUD_SIZE_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size;
cf5375
 		have_mtu = 0;
cf5375
 
cf5375
 		for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) {
cf5375
@@ -593,14 +502,14 @@ void *_handle_pmtud_link_thread(void *data)
cf5375
 				     (dst_link->status.dynconnected != 1)))
cf5375
 					continue;
cf5375
 
cf5375
-				link_has_mtu = _handle_check_pmtud(knet_h, dst_host, dst_link, &min_mtu, force_run);
cf5375
+				link_has_mtu = _handle_check_pmtud(knet_h, dst_host, dst_link, force_run);
cf5375
 				if (errno == EDEADLK) {
cf5375
 					goto out_unlock;
cf5375
 				}
cf5375
 				if (link_has_mtu) {
cf5375
 					have_mtu = 1;
cf5375
-					if (min_mtu < lower_mtu) {
cf5375
-						lower_mtu = min_mtu;
cf5375
+					if (dst_link->status.mtu < lower_mtu) {
cf5375
+						lower_mtu = dst_link->status.mtu;
cf5375
 					}
cf5375
 				}
cf5375
 			}
cf5375
commit 499f589404db791d8e68c84c8ba3a857aeea5083
cf5375
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
Date:   Tue Aug 13 06:41:32 2019 +0200
cf5375
cf5375
    [PMTUd] add dynamic pong timeout when using crypto
cf5375
    
cf5375
    problem originally reported by proxmox community, users
cf5375
    observed that under pressure the MTU would flap back and forth
cf5375
    between 2 values due to other node response timeout.
cf5375
    
cf5375
    implement a dynamic timeout multiplier when using crypto that
cf5375
    should solve the problem in a more flexible fashion.
cf5375
    
cf5375
    When a timeout hits, those new logs will show:
cf5375
    
cf5375
    [knet]: [info] host: host: 1 (passive) best link: 0 (pri: 0)
cf5375
    [knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0
cf5375
    [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (4) for host 1 link: 0
cf5375
    [knet]: [info] pmtud: PMTUD link change for host: 1 link: 0 from 469 to 65429
cf5375
    [knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429
cf5375
    [knet]: [info] pmtud: Global data MTU changed to: 65429
cf5375
    [knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0
cf5375
    [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (8) for host 1 link: 0
cf5375
    [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (16) for host 1 link: 0
cf5375
    [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (32) for host 1 link: 0
cf5375
    [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (64) for host 1 link: 0
cf5375
    [knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429
cf5375
    [knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0
cf5375
    [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (128) for host 1 link: 0
cf5375
    [knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429
cf5375
    
cf5375
    and when the latency reduces and it is safe to be more responsive again:
cf5375
    
cf5375
    [knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0
cf5375
    [knet]: [debug] pmtud: Decreasing PMTUd response timeout multiplier to (64) for host 1 link: 0
cf5375
    [knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429
cf5375
    
cf5375
    ....
cf5375
    
cf5375
    testing this patch on normal hosts is a bit challenging tho.
cf5375
    
cf5375
    Patch was tested by hardcoding a super low timeout.
cf5375
    and using a long running version of api_knet_send_crypto_test with a short PMTUd setfreq (10 sec).
cf5375
    
cf5375
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
cf5375
diff --git a/libknet/internals.h b/libknet/internals.h
cf5375
index 31840e4..d1a4757 100644
cf5375
--- a/libknet/internals.h
cf5375
+++ b/libknet/internals.h
cf5375
@@ -80,6 +80,7 @@ struct knet_link {
cf5375
 	uint32_t last_bad_mtu;
cf5375
 	uint32_t last_sent_mtu;
cf5375
 	uint32_t last_recv_mtu;
cf5375
+	uint32_t pmtud_crypto_timeout_multiplier;/* used by PMTUd to adjust timeouts on high loads */
cf5375
 	uint8_t has_valid_mtu;
cf5375
 };
cf5375
 
cf5375
diff --git a/libknet/links.c b/libknet/links.c
cf5375
index 03e0af9..f7eccc3 100644
cf5375
--- a/libknet/links.c
cf5375
+++ b/libknet/links.c
cf5375
@@ -219,6 +219,7 @@ int knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t l
cf5375
 		}
cf5375
 	}
cf5375
 
cf5375
+	link->pmtud_crypto_timeout_multiplier = KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN;
cf5375
 	link->pong_count = KNET_LINK_DEFAULT_PONG_COUNT;
cf5375
 	link->has_valid_mtu = 0;
cf5375
 	link->ping_interval = KNET_LINK_DEFAULT_PING_INTERVAL * 1000; /* microseconds */
cf5375
diff --git a/libknet/links.h b/libknet/links.h
cf5375
index e14958d..c8ca610 100644
cf5375
--- a/libknet/links.h
cf5375
+++ b/libknet/links.h
cf5375
@@ -30,6 +30,16 @@
cf5375
  */
cf5375
 #define KNET_LINK_PONG_TIMEOUT_LAT_MUL	2
cf5375
 
cf5375
+/*
cf5375
+ * under heavy load with crypto enabled, it takes much
cf5375
+ * longer time to receive a response from the other node.
cf5375
+ *
cf5375
+ * 128 is somewhat arbitrary number but we want to set a limit
cf5375
+ * and report failures after that.
cf5375
+ */
cf5375
+#define KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN	  2
cf5375
+#define KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MAX	128
cf5375
+
cf5375
 int _link_updown(knet_handle_t knet_h, knet_node_id_t node_id, uint8_t link_id,
cf5375
 		 unsigned int enabled, unsigned int connected);
cf5375
 
cf5375
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
cf5375
index 1dd1788..d342697 100644
cf5375
--- a/libknet/threads_pmtud.c
cf5375
+++ b/libknet/threads_pmtud.c
cf5375
@@ -36,8 +36,9 @@ static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_
cf5375
 	size_t app_mtu_len;		/* real data that we can send onwire */
cf5375
 	ssize_t len;			/* len of what we were able to sendto onwire */
cf5375
 
cf5375
-	struct timespec ts;
cf5375
-	unsigned long long pong_timeout_adj_tmp;
cf5375
+	struct timespec ts, pmtud_crypto_start_ts, pmtud_crypto_stop_ts;
cf5375
+	unsigned long long pong_timeout_adj_tmp, timediff;
cf5375
+	int pmtud_crypto_reduce = 1;
cf5375
 	unsigned char *outbuf = (unsigned char *)knet_h->pmtudbuf;
cf5375
 
cf5375
 	warn_once = 0;
cf5375
@@ -242,6 +243,15 @@ retry:
cf5375
 			return -1;
cf5375
 		}
cf5375
 
cf5375
+		/*
cf5375
+		 * non fatal, we can wait the next round to reduce the
cf5375
+		 * multiplier
cf5375
+		 */
cf5375
+		if (clock_gettime(CLOCK_MONOTONIC, &pmtud_crypto_start_ts) < 0) {
cf5375
+			log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get current time: %s", strerror(errno));
cf5375
+			pmtud_crypto_reduce = 0;
cf5375
+		}
cf5375
+
cf5375
 		/*
cf5375
 		 * set PMTUd reply timeout to match pong_timeout on a given link
cf5375
 		 *
cf5375
@@ -261,7 +271,7 @@ retry:
cf5375
 			/*
cf5375
 			 * crypto, under pressure, is a royal PITA
cf5375
 			 */
cf5375
-			pong_timeout_adj_tmp = dst_link->pong_timeout_adj * 2;
cf5375
+			pong_timeout_adj_tmp = dst_link->pong_timeout_adj * dst_link->pmtud_crypto_timeout_multiplier;
cf5375
 		} else {
cf5375
 			pong_timeout_adj_tmp = dst_link->pong_timeout_adj;
cf5375
 		}
cf5375
@@ -295,6 +305,17 @@ retry:
cf5375
 
cf5375
 		if (ret) {
cf5375
 			if (ret == ETIMEDOUT) {
cf5375
+				if ((knet_h->crypto_instance) && (dst_link->pmtud_crypto_timeout_multiplier < KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MAX)) {
cf5375
+					dst_link->pmtud_crypto_timeout_multiplier = dst_link->pmtud_crypto_timeout_multiplier * 2;
cf5375
+					pmtud_crypto_reduce = 0;
cf5375
+					log_debug(knet_h, KNET_SUB_PMTUD,
cf5375
+							"Increasing PMTUd response timeout multiplier to (%u) for host %u link: %u",
cf5375
+							dst_link->pmtud_crypto_timeout_multiplier,
cf5375
+							dst_host->host_id,
cf5375
+							dst_link->link_id);
cf5375
+					pthread_mutex_unlock(&knet_h->pmtud_mutex);
cf5375
+					goto restart;
cf5375
+				}
cf5375
 				if (!warn_once) {
cf5375
 					log_warn(knet_h, KNET_SUB_PMTUD,
cf5375
 							"possible MTU misconfiguration detected. "
cf5375
@@ -323,6 +344,23 @@ retry:
cf5375
 			}
cf5375
 		}
cf5375
 
cf5375
+		if ((knet_h->crypto_instance) && (pmtud_crypto_reduce == 1) &&
cf5375
+		    (dst_link->pmtud_crypto_timeout_multiplier > KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN)) {
cf5375
+			if (!clock_gettime(CLOCK_MONOTONIC, &pmtud_crypto_stop_ts)) {
cf5375
+				timespec_diff(pmtud_crypto_start_ts, pmtud_crypto_stop_ts, &timediff);
cf5375
+				if (((pong_timeout_adj_tmp * 1000) / 2) > timediff) {
cf5375
+					dst_link->pmtud_crypto_timeout_multiplier = dst_link->pmtud_crypto_timeout_multiplier / 2;
cf5375
+					log_debug(knet_h, KNET_SUB_PMTUD,
cf5375
+							"Decreasing PMTUd response timeout multiplier to (%u) for host %u link: %u",
cf5375
+							dst_link->pmtud_crypto_timeout_multiplier,
cf5375
+							dst_host->host_id,
cf5375
+							dst_link->link_id);
cf5375
+				}
cf5375
+			} else {
cf5375
+				log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get current time: %s", strerror(errno));
cf5375
+			}
cf5375
+		}
cf5375
+
cf5375
 		if ((dst_link->last_recv_mtu != onwire_len) || (ret)) {
cf5375
 			dst_link->last_bad_mtu = onwire_len;
cf5375
 		} else {
cf5375
commit 5f3476849523e9ee486481b429b471a1ab3cac20
cf5375
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
Date:   Thu Jul 18 07:50:37 2019 +0200
cf5375
cf5375
    [handle] make sure that the pmtud buf contains at least knet header size
cf5375
    
cf5375
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
cf5375
diff --git a/libknet/handle.c b/libknet/handle.c
cf5375
index 4835e99..1fb9c9b 100644
cf5375
--- a/libknet/handle.c
cf5375
+++ b/libknet/handle.c
cf5375
@@ -234,14 +234,14 @@ static int _init_buffers(knet_handle_t knet_h)
cf5375
 	}
cf5375
 	memset(knet_h->pingbuf, 0, KNET_HEADER_PING_SIZE);
cf5375
 
cf5375
-	knet_h->pmtudbuf = malloc(KNET_PMTUD_SIZE_V6);
cf5375
+	knet_h->pmtudbuf = malloc(KNET_PMTUD_SIZE_V6 + KNET_HEADER_ALL_SIZE);
cf5375
 	if (!knet_h->pmtudbuf) {
cf5375
 		savederrno = errno;
cf5375
 		log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for pmtud buffer: %s",
cf5375
 			strerror(savederrno));
cf5375
 		goto exit_fail;
cf5375
 	}
cf5375
-	memset(knet_h->pmtudbuf, 0, KNET_PMTUD_SIZE_V6);
cf5375
+	memset(knet_h->pmtudbuf, 0, KNET_PMTUD_SIZE_V6 + KNET_HEADER_ALL_SIZE);
cf5375
 
cf5375
 	for (i = 0; i < PCKT_FRAG_MAX; i++) {
cf5375
 		bufsize = ceil((float)KNET_MAX_PACKET_SIZE / (i + 1)) + KNET_HEADER_ALL_SIZE + KNET_DATABUFSIZE_CRYPT_PAD;
cf5375
commit 3b3b6d2a7e1fee7eb41c6bacc1005ff90f7dd5cb
cf5375
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
Date:   Thu Jul 18 10:23:14 2019 +0200
cf5375
cf5375
    [tests] fix knet_bench coverity errors
cf5375
    
cf5375
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
cf5375
diff --git a/libknet/tests/knet_bench.c b/libknet/tests/knet_bench.c
cf5375
index dfe5238..dc04239 100644
cf5375
--- a/libknet/tests/knet_bench.c
cf5375
+++ b/libknet/tests/knet_bench.c
cf5375
@@ -277,22 +277,24 @@ static void setup_knet(int argc, char *argv[])
cf5375
 					printf("Error: -p can only be specified once\n");
cf5375
 					exit(FAIL);
cf5375
 				}
cf5375
-				policystr = optarg;
cf5375
-				if (!strcmp(policystr, "active")) {
cf5375
-					policy = KNET_LINK_POLICY_ACTIVE;
cf5375
-					policyfound = 1;
cf5375
-				}
cf5375
-				/*
cf5375
-				 * we can't use rr because clangs can't compile
cf5375
-				 * an array of 3 strings, one of which is 2 bytes long
cf5375
-				 */
cf5375
-				if (!strcmp(policystr, "round-robin")) {
cf5375
-					policy = KNET_LINK_POLICY_RR;
cf5375
-					policyfound = 1;
cf5375
-				}
cf5375
-				if (!strcmp(policystr, "passive")) {
cf5375
-					policy = KNET_LINK_POLICY_PASSIVE;
cf5375
-					policyfound = 1;
cf5375
+				if (optarg) {
cf5375
+					policystr = optarg;
cf5375
+					if (!strcmp(policystr, "active")) {
cf5375
+						policy = KNET_LINK_POLICY_ACTIVE;
cf5375
+						policyfound = 1;
cf5375
+					}
cf5375
+					/*
cf5375
+					 * we can't use rr because clangs can't compile
cf5375
+					 * an array of 3 strings, one of which is 2 bytes long
cf5375
+					 */
cf5375
+					if (!strcmp(policystr, "round-robin")) {
cf5375
+						policy = KNET_LINK_POLICY_RR;
cf5375
+						policyfound = 1;
cf5375
+					}
cf5375
+					if (!strcmp(policystr, "passive")) {
cf5375
+						policy = KNET_LINK_POLICY_PASSIVE;
cf5375
+						policyfound = 1;
cf5375
+					}
cf5375
 				}
cf5375
 				if (!policyfound) {
cf5375
 					printf("Error: invalid policy %s specified. -p accepts active|passive|rr\n", policystr);
cf5375
@@ -304,14 +306,16 @@ static void setup_knet(int argc, char *argv[])
cf5375
 					printf("Error: -P can only be specified once\n");
cf5375
 					exit(FAIL);
cf5375
 				}
cf5375
-				protostr = optarg;
cf5375
-				if (!strcmp(protostr, "UDP")) {
cf5375
-					protocol = KNET_TRANSPORT_UDP;
cf5375
-					protofound = 1;
cf5375
-				}
cf5375
-				if (!strcmp(protostr, "SCTP")) {
cf5375
-					protocol = KNET_TRANSPORT_SCTP;
cf5375
-					protofound = 1;
cf5375
+				if (optarg) {
cf5375
+					protostr = optarg;
cf5375
+					if (!strcmp(protostr, "UDP")) {
cf5375
+						protocol = KNET_TRANSPORT_UDP;
cf5375
+						protofound = 1;
cf5375
+					}
cf5375
+					if (!strcmp(protostr, "SCTP")) {
cf5375
+						protocol = KNET_TRANSPORT_SCTP;
cf5375
+						protofound = 1;
cf5375
+					}
cf5375
 				}
cf5375
 				if (!protofound) {
cf5375
 					printf("Error: invalid protocol %s specified. -P accepts udp|sctp\n", policystr);
cf5375
@@ -380,17 +384,22 @@ static void setup_knet(int argc, char *argv[])
cf5375
 				}
cf5375
 				break;
cf5375
 			case 'T':
cf5375
-				if (!strcmp("ping", optarg)) {
cf5375
-					test_type = TEST_PING;
cf5375
-				}
cf5375
-				if (!strcmp("ping_data", optarg)) {
cf5375
-					test_type = TEST_PING_AND_DATA;
cf5375
-				}
cf5375
-				if (!strcmp("perf-by-size", optarg)) {
cf5375
-					test_type = TEST_PERF_BY_SIZE;
cf5375
-				}
cf5375
-				if (!strcmp("perf-by-time", optarg)) {
cf5375
-					test_type = TEST_PERF_BY_TIME;
cf5375
+				if (optarg) {
cf5375
+					if (!strcmp("ping", optarg)) {
cf5375
+						test_type = TEST_PING;
cf5375
+					}
cf5375
+					if (!strcmp("ping_data", optarg)) {
cf5375
+						test_type = TEST_PING_AND_DATA;
cf5375
+					}
cf5375
+					if (!strcmp("perf-by-size", optarg)) {
cf5375
+						test_type = TEST_PERF_BY_SIZE;
cf5375
+					}
cf5375
+					if (!strcmp("perf-by-time", optarg)) {
cf5375
+						test_type = TEST_PERF_BY_TIME;
cf5375
+					}
cf5375
+				} else {
cf5375
+					printf("Error: -T requires an option\n");
cf5375
+					exit(FAIL);
cf5375
 				}
cf5375
 				break;
cf5375
 			case 'S':
cf5375
@@ -957,15 +966,14 @@ static void display_stats(int level)
cf5375
 	struct knet_link_stats total_link_stats;
cf5375
 	knet_node_id_t host_list[KNET_MAX_HOST];
cf5375
 	uint8_t link_list[KNET_MAX_LINK];
cf5375
-	int res;
cf5375
 	unsigned int i,j;
cf5375
 	size_t num_hosts, num_links;
cf5375
 
cf5375
-	res = knet_handle_get_stats(knet_h, &handle_stats, sizeof(handle_stats));
cf5375
-	if (res) {
cf5375
+	if (knet_handle_get_stats(knet_h, &handle_stats, sizeof(handle_stats)) < 0) {
cf5375
 		perror("[info]: failed to get knet handle stats");
cf5375
 		return;
cf5375
 	}
cf5375
+
cf5375
 	if (compresscfg || cryptocfg) {
cf5375
 		printf("\n");
cf5375
 		printf("[stat]: handle stats\n");
cf5375
@@ -1005,8 +1013,7 @@ static void display_stats(int level)
cf5375
 
cf5375
 	memset(&total_link_stats, 0, sizeof(struct knet_link_stats));
cf5375
 
cf5375
-	res = knet_host_get_host_list(knet_h, host_list, &num_hosts);
cf5375
-	if (res) {
cf5375
+	if (knet_host_get_host_list(knet_h, host_list, &num_hosts) < 0) {
cf5375
 		perror("[info]: cannot get host list for stats");
cf5375
 		return;
cf5375
 	}
cf5375
@@ -1015,18 +1022,16 @@ static void display_stats(int level)
cf5375
 	qsort(host_list, num_hosts, sizeof(uint16_t), node_compare);
cf5375
 
cf5375
 	for (j=0; j
cf5375
-		res = knet_link_get_link_list(knet_h, host_list[j], link_list, &num_links);
cf5375
-		if (res) {
cf5375
+		if (knet_link_get_link_list(knet_h, host_list[j], link_list, &num_links) < 0) {
cf5375
 			perror("[info]: cannot get link list for stats");
cf5375
 			return;
cf5375
 		}
cf5375
 
cf5375
 		for (i=0; i < num_links; i++) {
cf5375
-			res = knet_link_get_status(knet_h,
cf5375
-						   host_list[j],
cf5375
-						   link_list[i],
cf5375
-						   &link_status,
cf5375
-						   sizeof(link_status));
cf5375
+			if (knet_link_get_status(knet_h, host_list[j], link_list[i], &link_status, sizeof(link_status)) < 0) {
cf5375
+				perror("[info]: cannot get link status");
cf5375
+				return;
cf5375
+			}
cf5375
 
cf5375
 			total_link_stats.tx_data_packets += link_status.stats.tx_data_packets;
cf5375
 			total_link_stats.rx_data_packets += link_status.stats.rx_data_packets;
cf5375
commit d74380a82c00716aafb780f5602182fce90d381f
cf5375
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
Date:   Wed Jul 24 08:38:56 2019 +0200
cf5375
cf5375
    [PMTUd] do not double unlock global read lock
cf5375
    
cf5375
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
cf5375
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
cf5375
index d342697..f884760 100644
cf5375
--- a/libknet/threads_pmtud.c
cf5375
+++ b/libknet/threads_pmtud.c
cf5375
@@ -297,7 +297,11 @@ retry:
cf5375
 			return -1;
cf5375
 		}
cf5375
 
cf5375
-		if (shutdown_in_progress(knet_h)) {
cf5375
+		/*
cf5375
+		 * we cannot use shutdown_in_progress in here because
cf5375
+		 * we already hold the read lock
cf5375
+		 */
cf5375
+		if (knet_h->fini_in_progress) {
cf5375
 			pthread_mutex_unlock(&knet_h->pmtud_mutex);
cf5375
 			log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD aborted. shutdown in progress");
cf5375
 			return -1;
cf5375
commit 01242c683b18b813a67c13d3fc0546fec34f9f7c
cf5375
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
Date:   Mon Sep 9 15:11:25 2019 +0200
cf5375
cf5375
    [pmtud] switch to use async version of dstcache update due to locking context (read vs write)
cf5375
    
cf5375
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
cf5375
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
cf5375
index f884760..d10984f 100644
cf5375
--- a/libknet/threads_pmtud.c
cf5375
+++ b/libknet/threads_pmtud.c
cf5375
@@ -481,7 +481,7 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host,
cf5375
 	}
cf5375
 
cf5375
 	if (saved_valid_pmtud != dst_link->has_valid_mtu) {
cf5375
-		_host_dstcache_update_sync(knet_h, dst_host);
cf5375
+		_host_dstcache_update_async(knet_h, dst_host);
cf5375
 	}
cf5375
 
cf5375
 	return dst_link->has_valid_mtu;
cf5375
commit a70f0adf0d4d38ed614bf2eef1a4e66fec2f2c92
cf5375
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
Date:   Fri Sep 13 07:28:55 2019 +0200
cf5375
cf5375
    [tests] fix ip generation boundaries
cf5375
    
cf5375
    https://ci.kronosnet.org/job/knet-build-all-voting/1450/knet-build-all-voting=rhel80z-s390x/console
cf5375
    
cf5375
    and similar, when pid = 255, the secondary IP would hit 256 that is of course invalid.
cf5375
    
cf5375
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
cf5375
diff --git a/libnozzle/tests/test-common.c b/libnozzle/tests/test-common.c
cf5375
index b36be79..3afd2ec 100644
cf5375
--- a/libnozzle/tests/test-common.c
cf5375
+++ b/libnozzle/tests/test-common.c
cf5375
@@ -124,7 +124,7 @@ void make_local_ips(char *testipv4_1, char *testipv4_2, char *testipv6_1, char *
cf5375
 	pid = (uint8_t *)&mypid;
cf5375
 
cf5375
 	for (i = 0; i < sizeof(pid_t); i++) {
cf5375
-		if (pid[i] == 0) {
cf5375
+		if ((pid[i] == 0) || (pid[i] == 255)) {
cf5375
 			pid[i] = 128;
cf5375
 		}
cf5375
 	}
cf5375
commit 63567e1e6b6ebb91fe1df43b910d6b9bd78d528f
cf5375
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
Date:   Tue Oct 15 11:53:56 2019 +0200
cf5375
cf5375
    [PMTUd] invalidate MTU for a link if the value is lower than minimum
cf5375
    
cf5375
    Under heavy network load and packet loss, calculated MTU can be
cf5375
    too small. In that case we need to invalidate the link mtu,
cf5375
    that would remove the link from the rotation (and traffic) and
cf5375
    would give PMTUd time to get the right MTU in the next round.
cf5375
    
cf5375
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
cf5375
cf5375
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
cf5375
index d10984f..ab00b47 100644
cf5375
--- a/libknet/threads_pmtud.c
cf5375
+++ b/libknet/threads_pmtud.c
cf5375
@@ -460,7 +460,14 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host,
cf5375
 		}
cf5375
 		dst_link->has_valid_mtu = 0;
cf5375
 	} else {
cf5375
-		dst_link->has_valid_mtu = 1;
cf5375
+		if (dst_link->status.mtu < calc_min_mtu(knet_h)) {
cf5375
+			log_info(knet_h, KNET_SUB_PMTUD,
cf5375
+				 "Invalid MTU detected for host: %u link: %u mtu: %u",
cf5375
+				 dst_host->host_id, dst_link->link_id, dst_link->status.mtu);
cf5375
+			dst_link->has_valid_mtu = 0;
cf5375
+		} else {
cf5375
+			dst_link->has_valid_mtu = 1;
cf5375
+		}
cf5375
 		if (dst_link->has_valid_mtu) {
cf5375
 			if ((saved_pmtud) && (saved_pmtud != dst_link->status.mtu)) {
cf5375
 				log_info(knet_h, KNET_SUB_PMTUD, "PMTUD link change for host: %u link: %u from %u to %u",