Blame SOURCES/bz1763715-fix-mtu-calculation.patch

0c6670
commit b67c63101246b400c7512cb1adbc590ac06cb6ee
0c6670
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
Date:   Tue Jul 30 11:18:33 2019 +0200
0c6670
0c6670
    [crypto] fix log information
0c6670
    
0c6670
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
0c6670
diff --git a/libknet/crypto.c b/libknet/crypto.c
0c6670
index 9f05fba..9d6757b 100644
0c6670
--- a/libknet/crypto.c
0c6670
+++ b/libknet/crypto.c
0c6670
@@ -151,8 +151,6 @@ int crypto_init(
0c6670
 		goto out;
0c6670
 	}
0c6670
 
0c6670
-	log_debug(knet_h, KNET_SUB_CRYPTO, "security network overhead: %zu", knet_h->sec_header_size);
0c6670
-
0c6670
 out:
0c6670
 	if (!err) {
0c6670
 		knet_h->crypto_instance = new;
0c6670
@@ -161,6 +159,8 @@ out:
0c6670
 		knet_h->sec_hash_size = new->sec_hash_size;
0c6670
 		knet_h->sec_salt_size = new->sec_salt_size;
0c6670
 
0c6670
+		log_debug(knet_h, KNET_SUB_CRYPTO, "security network overhead: %zu", knet_h->sec_header_size);
0c6670
+
0c6670
 		if (current) {
0c6670
 			if (crypto_modules_cmds[current->model].ops->fini != NULL) {
0c6670
 				crypto_modules_cmds[current->model].ops->fini(knet_h, current);
0c6670
commit a89c2cd6d3863abe0f3ae0165239177a7461ee5e
0c6670
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
Date:   Wed Jul 31 14:15:07 2019 +0200
0c6670
0c6670
    [udp] log information about detected kernel MTU
0c6670
    
0c6670
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
0c6670
diff --git a/libknet/transport_udp.c b/libknet/transport_udp.c
0c6670
index 53d2ba0..be990bb 100644
0c6670
--- a/libknet/transport_udp.c
0c6670
+++ b/libknet/transport_udp.c
0c6670
@@ -337,6 +337,7 @@ static int read_errs_from_sock(knet_handle_t knet_h, int sockfd)
0c6670
 									break;
0c6670
 								} else {
0c6670
 									knet_h->kernel_mtu = sock_err->ee_info;
0c6670
+									log_debug(knet_h, KNET_SUB_TRANSP_UDP, "detected kernel MTU: %u", knet_h->kernel_mtu);
0c6670
 									pthread_mutex_unlock(&knet_h->kmtu_mutex);
0c6670
 								}
0c6670
 
0c6670
commit 650ef6d26e83dd7827b2e913c52a1fac67ea60d4
0c6670
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
Date:   Fri Aug 2 10:43:09 2019 +0200
0c6670
0c6670
    [docs] add knet packet layout
0c6670
    
0c6670
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
0c6670
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
0c6670
index 603f595..2cd48f9 100644
0c6670
--- a/libknet/threads_pmtud.c
0c6670
+++ b/libknet/threads_pmtud.c
0c6670
@@ -91,6 +91,28 @@ restart:
0c6670
 		failsafe++;
0c6670
 	}
0c6670
 
0c6670
+	/*
0c6670
+	 * unencrypted packet looks like:
0c6670
+	 *
0c6670
+	 * | ip | protocol | knet_header | unencrypted data                                  |
0c6670
+	 * | onwire_len                                                                      |
0c6670
+	 * | overhead_len  |
0c6670
+	 *                 | data_len                                                        |
0c6670
+	 *                               | app MTU                                           |
0c6670
+	 *
0c6670
+	 * encrypted packet looks like (not to scale):
0c6670
+	 *
0c6670
+	 * | ip | protocol | salt | crypto(knet_header | data)      | crypto_data_pad | hash |
0c6670
+	 * | onwire_len                                                                      |
0c6670
+	 * | overhead_len  |
0c6670
+	 *                 | data_len                                                        |
0c6670
+	 *                                             | app MTU    |
0c6670
+	 *
0c6670
+	 * knet_h->sec_block_size is >= 0 if encryption will pad the data
0c6670
+	 * knet_h->sec_salt_size is >= 0 if encryption is enabled
0c6670
+	 * knet_h->sec_hash_size is >= 0 if signing is enabled
0c6670
+	 */
0c6670
+
0c6670
 	data_len = onwire_len - overhead_len;
0c6670
 
0c6670
 	if (knet_h->crypto_instance) {
0c6670
commit dbed772f0cb9070826eac6524646bd2ea7cce8c0
0c6670
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
Date:   Fri Aug 2 10:44:23 2019 +0200
0c6670
0c6670
    [PMTUd] fix MTU calculation when using crypto and add docs
0c6670
    
0c6670
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
0c6670
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
0c6670
index 2cd48f9..1a19806 100644
0c6670
--- a/libknet/threads_pmtud.c
0c6670
+++ b/libknet/threads_pmtud.c
0c6670
@@ -113,29 +113,68 @@ restart:
0c6670
 	 * knet_h->sec_hash_size is >= 0 if signing is enabled
0c6670
 	 */
0c6670
 
0c6670
+	/*
0c6670
+	 * common to all packets
0c6670
+	 */
0c6670
 	data_len = onwire_len - overhead_len;
0c6670
 
0c6670
 	if (knet_h->crypto_instance) {
0c6670
 
0c6670
+realign:
0c6670
 		if (knet_h->sec_block_size) {
0c6670
+
0c6670
+			/*
0c6670
+			 * drop both salt and hash, that leaves only the crypto data and padding
0c6670
+			 * we need to calculate the padding based on the real encrypted data.
0c6670
+			 */
0c6670
+			data_len = data_len - (knet_h->sec_salt_size + knet_h->sec_hash_size);
0c6670
+
0c6670
+			/*
0c6670
+			 * if the crypto mechanism requires padding, calculate the padding
0c6670
+			 * and add it back to data_len because that's what the crypto layer
0c6670
+			 * would do.
0c6670
+			 */
0c6670
 			pad_len = knet_h->sec_block_size - (data_len % knet_h->sec_block_size);
0c6670
+
0c6670
+			/*
0c6670
+			 * if are at the boundary, reset padding
0c6670
+			 */
0c6670
 			if (pad_len == knet_h->sec_block_size) {
0c6670
 				pad_len = 0;
0c6670
 			}
0c6670
 			data_len = data_len + pad_len;
0c6670
-		}
0c6670
 
0c6670
-		data_len = data_len + (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size);
0c6670
-
0c6670
-		if (knet_h->sec_block_size) {
0c6670
+			/*
0c6670
+			 * if our current data_len is higher than max_mtu_len
0c6670
+			 * then we need to reduce by padding size (that is our
0c6670
+			 * increment / decrement value)
0c6670
+			 *
0c6670
+			 * this generally happens only on the first PMTUd run
0c6670
+			 */
0c6670
 			while (data_len + overhead_len >= max_mtu_len) {
0c6670
 				data_len = data_len - knet_h->sec_block_size;
0c6670
 			}
0c6670
+
0c6670
+			/*
0c6670
+			 * add both hash and salt size back, similar to padding above,
0c6670
+			 * the crypto layer will add them to the data_len
0c6670
+			 */
0c6670
+			data_len = data_len + (knet_h->sec_salt_size + knet_h->sec_hash_size);
0c6670
 		}
0c6670
 
0c6670
 		if (dst_link->last_bad_mtu) {
0c6670
-			while (data_len + overhead_len >= dst_link->last_bad_mtu) {
0c6670
-				data_len = data_len - (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size);
0c6670
+			if (data_len + overhead_len >= dst_link->last_bad_mtu) {
0c6670
+				/*
0c6670
+				 * reduce data_len to something lower than last_bad_mtu, overhead_len
0c6670
+				 * and sec_block_size (decrementing step) - 1 (granularity)
0c6670
+				 */
0c6670
+				data_len = dst_link->last_bad_mtu - overhead_len - knet_h->sec_block_size - 1;
0c6670
+				if (knet_h->sec_block_size) {
0c6670
+					/*
0c6670
+					 * make sure that data_len is aligned to the sec_block_size boundary
0c6670
+					 */
0c6670
+					goto realign;
0c6670
+				}
0c6670
 			}
0c6670
 		}
0c6670
 
0c6670
@@ -144,6 +183,10 @@ restart:
0c6670
 			return -1;
0c6670
 		}
0c6670
 
0c6670
+		/*
0c6670
+		 * recalculate onwire_len based on crypto information
0c6670
+		 * and place it in the PMTUd packet info
0c6670
+		 */
0c6670
 		onwire_len = data_len + overhead_len;
0c6670
 		knet_h->pmtudbuf->khp_pmtud_size = onwire_len;
0c6670
 
0c6670
commit a9460c72fafe452b7cb584598aa43a87b44428f0
0c6670
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
Date:   Mon Aug 12 16:52:59 2019 +0200
0c6670
0c6670
    [PMTUd] rework the whole math to calculate MTU
0c6670
    
0c6670
    internal changes:
0c6670
    - drop the concept of sec_header_size that was completely wrong
0c6670
      and unnecessary
0c6670
    - bump crypto API to version 3 due to the above change
0c6670
    - clarify the difference between link->proto_overhead and
0c6670
      link->status->proto_overhead. We cannot rename the status
0c6670
      one as it would also change ABI.
0c6670
    - add onwire.c with documentation on the packet format
0c6670
      and what various len(s) mean in context.
0c6670
    - add 3 new functions to calculate MTUs back and forth
0c6670
      and use them around, hopefully with enough clarification
0c6670
      on why things are done in a given way.
0c6670
    - heavily change thread_pmtud.c to use those new facilities.
0c6670
    - fix major calculation issues when using crypto (non-crypto
0c6670
      was not affected by the problem).
0c6670
    - fix checks around to make sure they match the new math.
0c6670
    - fix padding calculation.
0c6670
    - add functional PMTUd crypto test
0c6670
      this test can take several hours (12+) and should be executed
0c6670
      on a controlled environment since it automatically changes
0c6670
      loopback MTU to run tests.
0c6670
    - fix way the lowest MTU is calculated during a PMTUd run
0c6670
      to avoid spurious double notifications.
0c6670
    - drop redundant checks.
0c6670
    
0c6670
    user visible changes:
0c6670
    - Global MTU is now calculated properly when using crypto
0c6670
      and values will be in general bigger than before due
0c6670
      to incorrect padding calculation in the previous implementation.
0c6670
    
0c6670
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
0c6670
diff --git a/libknet/Makefile.am b/libknet/Makefile.am
0c6670
index d080732..2fa2416 100644
0c6670
--- a/libknet/Makefile.am
0c6670
+++ b/libknet/Makefile.am
0c6670
@@ -36,6 +36,7 @@ sources			= \
0c6670
 			  links_acl_loopback.c \
0c6670
 			  logging.c \
0c6670
 			  netutils.c \
0c6670
+			  onwire.c \
0c6670
 			  threads_common.c \
0c6670
 			  threads_dsthandler.c \
0c6670
 			  threads_heartbeat.c \
0c6670
diff --git a/libknet/crypto.c b/libknet/crypto.c
0c6670
index 9d6757b..afa4f88 100644
0c6670
--- a/libknet/crypto.c
0c6670
+++ b/libknet/crypto.c
0c6670
@@ -154,12 +154,14 @@ int crypto_init(
0c6670
 out:
0c6670
 	if (!err) {
0c6670
 		knet_h->crypto_instance = new;
0c6670
-		knet_h->sec_header_size = new->sec_header_size;
0c6670
 		knet_h->sec_block_size = new->sec_block_size;
0c6670
 		knet_h->sec_hash_size = new->sec_hash_size;
0c6670
 		knet_h->sec_salt_size = new->sec_salt_size;
0c6670
 
0c6670
-		log_debug(knet_h, KNET_SUB_CRYPTO, "security network overhead: %zu", knet_h->sec_header_size);
0c6670
+		log_debug(knet_h, KNET_SUB_CRYPTO, "Hash size: %zu salt size: %zu block size: %zu",
0c6670
+			  knet_h->sec_hash_size,
0c6670
+			  knet_h->sec_salt_size,
0c6670
+			  knet_h->sec_block_size);
0c6670
 
0c6670
 		if (current) {
0c6670
 			if (crypto_modules_cmds[current->model].ops->fini != NULL) {
0c6670
@@ -195,7 +197,6 @@ void crypto_fini(
0c6670
 			crypto_modules_cmds[knet_h->crypto_instance->model].ops->fini(knet_h, knet_h->crypto_instance);
0c6670
 		}
0c6670
 		free(knet_h->crypto_instance);
0c6670
-		knet_h->sec_header_size = 0;
0c6670
 		knet_h->sec_block_size = 0;
0c6670
 		knet_h->sec_hash_size = 0;
0c6670
 		knet_h->sec_salt_size = 0;
0c6670
diff --git a/libknet/crypto_model.h b/libknet/crypto_model.h
0c6670
index 70f6238..b05e49c 100644
0c6670
--- a/libknet/crypto_model.h
0c6670
+++ b/libknet/crypto_model.h
0c6670
@@ -14,13 +14,12 @@
0c6670
 struct crypto_instance {
0c6670
 	int	model;
0c6670
 	void	*model_instance;
0c6670
-	size_t	sec_header_size;
0c6670
 	size_t	sec_block_size;
0c6670
 	size_t	sec_hash_size;
0c6670
 	size_t	sec_salt_size;
0c6670
 };
0c6670
 
0c6670
-#define KNET_CRYPTO_MODEL_ABI 2
0c6670
+#define KNET_CRYPTO_MODEL_ABI 3
0c6670
 
0c6670
 /*
0c6670
  * see compress_model.h for explanation of the various lib related functions
0c6670
diff --git a/libknet/crypto_nss.c b/libknet/crypto_nss.c
0c6670
index 330b40c..c624a47 100644
0c6670
--- a/libknet/crypto_nss.c
0c6670
+++ b/libknet/crypto_nss.c
0c6670
@@ -801,10 +801,7 @@ static int nsscrypto_init(
0c6670
 		goto out_err;
0c6670
 	}
0c6670
 
0c6670
-	crypto_instance->sec_header_size = 0;
0c6670
-
0c6670
 	if (nsscrypto_instance->crypto_hash_type > 0) {
0c6670
-		crypto_instance->sec_header_size += nsshash_len[nsscrypto_instance->crypto_hash_type];
0c6670
 		crypto_instance->sec_hash_size = nsshash_len[nsscrypto_instance->crypto_hash_type];
0c6670
 	}
0c6670
 
0c6670
@@ -821,8 +818,6 @@ static int nsscrypto_init(
0c6670
 			}
0c6670
 		}
0c6670
 
0c6670
-		crypto_instance->sec_header_size += (block_size * 2);
0c6670
-		crypto_instance->sec_header_size += SALT_SIZE;
0c6670
 		crypto_instance->sec_salt_size = SALT_SIZE;
0c6670
 		crypto_instance->sec_block_size = block_size;
0c6670
 	}
0c6670
diff --git a/libknet/crypto_openssl.c b/libknet/crypto_openssl.c
0c6670
index 0cbc6f5..6571498 100644
0c6670
--- a/libknet/crypto_openssl.c
0c6670
+++ b/libknet/crypto_openssl.c
0c6670
@@ -566,11 +566,8 @@ static int opensslcrypto_init(
0c6670
 	memmove(opensslcrypto_instance->private_key, knet_handle_crypto_cfg->private_key, knet_handle_crypto_cfg->private_key_len);
0c6670
 	opensslcrypto_instance->private_key_len = knet_handle_crypto_cfg->private_key_len;
0c6670
 
0c6670
-	crypto_instance->sec_header_size = 0;
0c6670
-
0c6670
 	if (opensslcrypto_instance->crypto_hash_type) {
0c6670
 		crypto_instance->sec_hash_size = EVP_MD_size(opensslcrypto_instance->crypto_hash_type);
0c6670
-		crypto_instance->sec_header_size += crypto_instance->sec_hash_size;
0c6670
 	}
0c6670
 
0c6670
 	if (opensslcrypto_instance->crypto_cipher_type) {
0c6670
@@ -578,8 +575,6 @@ static int opensslcrypto_init(
0c6670
 
0c6670
 		block_size = EVP_CIPHER_block_size(opensslcrypto_instance->crypto_cipher_type);
0c6670
 
0c6670
-		crypto_instance->sec_header_size += (block_size * 2);
0c6670
-		crypto_instance->sec_header_size += SALT_SIZE;
0c6670
 		crypto_instance->sec_salt_size = SALT_SIZE;
0c6670
 		crypto_instance->sec_block_size = block_size;
0c6670
 	}
0c6670
diff --git a/libknet/internals.h b/libknet/internals.h
0c6670
index 3f105a1..31840e4 100644
0c6670
--- a/libknet/internals.h
0c6670
+++ b/libknet/internals.h
0c6670
@@ -71,7 +71,9 @@ struct knet_link {
0c6670
 	uint8_t received_pong;
0c6670
 	struct timespec ping_last;
0c6670
 	/* used by PMTUD thread as temp per-link variables and should always contain the onwire_len value! */
0c6670
-	uint32_t proto_overhead;
0c6670
+	uint32_t proto_overhead;		/* IP + UDP/SCTP overhead. NOT to be confused
0c6670
+						   with stats.proto_overhead that includes also knet headers
0c6670
+						   and crypto headers */
0c6670
 	struct timespec pmtud_last;
0c6670
 	uint32_t last_ping_size;
0c6670
 	uint32_t last_good_mtu;
0c6670
@@ -197,7 +199,6 @@ struct knet_handle {
0c6670
 	int pmtud_forcerun;
0c6670
 	int pmtud_abort;
0c6670
 	struct crypto_instance *crypto_instance;
0c6670
-	size_t sec_header_size;
0c6670
 	size_t sec_block_size;
0c6670
 	size_t sec_hash_size;
0c6670
 	size_t sec_salt_size;
0c6670
diff --git a/libknet/links.c b/libknet/links.c
0c6670
index 51ead5a..03e0af9 100644
0c6670
--- a/libknet/links.c
0c6670
+++ b/libknet/links.c
0c6670
@@ -265,7 +265,32 @@ int knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t l
0c6670
 		host->status.reachable = 1;
0c6670
 		link->status.mtu = KNET_PMTUD_SIZE_V6;
0c6670
 	} else {
0c6670
-		link->status.mtu =  KNET_PMTUD_MIN_MTU_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size;
0c6670
+		/*
0c6670
+		 * calculate the minimum MTU that is safe to use,
0c6670
+		 * based on RFCs and that each network device should
0c6670
+		 * be able to support without any troubles
0c6670
+		 */
0c6670
+		if (link->dynamic == KNET_LINK_STATIC) {
0c6670
+			/*
0c6670
+			 * with static link we can be more precise than using
0c6670
+			 * the generic calc_min_mtu()
0c6670
+			 */
0c6670
+			switch (link->dst_addr.ss_family) {
0c6670
+				case AF_INET6:
0c6670
+					link->status.mtu =  calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V6 - (KNET_PMTUD_OVERHEAD_V6 + link->proto_overhead));
0c6670
+					break;
0c6670
+				case AF_INET:
0c6670
+					link->status.mtu =  calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V4 - (KNET_PMTUD_OVERHEAD_V4 + link->proto_overhead));
0c6670
+					break;
0c6670
+			}
0c6670
+		} else {
0c6670
+			/*
0c6670
+			 * for dynamic links we start with the minimum MTU
0c6670
+			 * possible and PMTUd will kick in immediately
0c6670
+			 * after connection status is 1
0c6670
+			 */
0c6670
+			link->status.mtu =  calc_min_mtu(knet_h);
0c6670
+		}
0c6670
 		link->has_valid_mtu = 1;
0c6670
 	}
0c6670
 
0c6670
diff --git a/libknet/onwire.c b/libknet/onwire.c
0c6670
new file mode 100644
0c6670
index 0000000..143ac4b
0c6670
--- /dev/null
0c6670
+++ b/libknet/onwire.c
0c6670
@@ -0,0 +1,127 @@
0c6670
+/*
0c6670
+ * Copyright (C) 2019 Red Hat, Inc.  All rights reserved.
0c6670
+ *
0c6670
+ * Author: Fabio M. Di Nitto <fabbione@kronosnet.org>
0c6670
+ *
0c6670
+ * This software licensed under LGPL-2.0+
0c6670
+ */
0c6670
+
0c6670
+#include "config.h"
0c6670
+
0c6670
+#include <sys/errno.h>
0c6670
+#include <stdlib.h>
0c6670
+#include <string.h>
0c6670
+
0c6670
+#include "crypto.h"
0c6670
+#include "internals.h"
0c6670
+#include "logging.h"
0c6670
+#include "common.h"
0c6670
+#include "transport_udp.h"
0c6670
+#include "transport_sctp.h"
0c6670
+
0c6670
+/*
0c6670
+ * unencrypted packet looks like:
0c6670
+ *
0c6670
+ * | ip | protocol  | knet_header | unencrypted data                                  |
0c6670
+ * | onwire_len                                                                       |
0c6670
+ * | proto_overhead |
0c6670
+ *                  | data_len                                                        |
0c6670
+ *                                | app MTU                                           |
0c6670
+ *
0c6670
+ * encrypted packet looks like (not to scale):
0c6670
+ *
0c6670
+ * | ip | protocol  | salt | crypto(knet_header | data)      | crypto_data_pad | hash |
0c6670
+ * | onwire_len                                                                       |
0c6670
+ * | proto_overhead |
0c6670
+ *                  | data_len                                                        |
0c6670
+ *                                              | app MTU    |
0c6670
+ *
0c6670
+ * knet_h->sec_block_size is >= 0 if encryption will pad the data
0c6670
+ * knet_h->sec_salt_size is >= 0 if encryption is enabled
0c6670
+ * knet_h->sec_hash_size is >= 0 if signing is enabled
0c6670
+ */
0c6670
+
0c6670
+/*
0c6670
+ * this function takes in the data that we would like to send
0c6670
+ * and tells us the outgoing onwire data size with crypto and
0c6670
+ * all the headers adjustment.
0c6670
+ * calling thread needs to account for protocol overhead.
0c6670
+ */
0c6670
+
0c6670
+size_t calc_data_outlen(knet_handle_t knet_h, size_t inlen)
0c6670
+{
0c6670
+	size_t outlen = inlen, pad_len = 0;
0c6670
+
0c6670
+	if (knet_h->sec_block_size) {
0c6670
+		/*
0c6670
+		 * if the crypto mechanism requires padding, calculate the padding
0c6670
+		 * and add it back to outlen because that's what the crypto layer
0c6670
+		 * would do.
0c6670
+		 */
0c6670
+		pad_len = knet_h->sec_block_size - (outlen % knet_h->sec_block_size);
0c6670
+
0c6670
+		outlen = outlen + pad_len;
0c6670
+	}
0c6670
+
0c6670
+	return outlen + knet_h->sec_salt_size + knet_h->sec_hash_size;
0c6670
+}
0c6670
+
0c6670
+/*
0c6670
+ * this function takes in the data that we would like to send
0c6670
+ * and tells us what is the real maximum data we can send
0c6670
+ * accounting for headers and crypto
0c6670
+ * calling thread needs to account for protocol overhead.
0c6670
+ */
0c6670
+
0c6670
+size_t calc_max_data_outlen(knet_handle_t knet_h, size_t inlen)
0c6670
+{
0c6670
+	size_t outlen = inlen, pad_len = 0;
0c6670
+
0c6670
+	if (knet_h->sec_block_size) {
0c6670
+		/*
0c6670
+		 * drop both salt and hash, that leaves only the crypto data and padding
0c6670
+		 * we need to calculate the padding based on the real encrypted data
0c6670
+		 * that includes the knet_header.
0c6670
+		 */
0c6670
+		outlen = outlen - (knet_h->sec_salt_size + knet_h->sec_hash_size);
0c6670
+
0c6670
+		/*
0c6670
+		 * if the crypto mechanism requires padding, calculate the padding
0c6670
+		 * and remove it, to align the data.
0c6670
+		 * NOTE: we need to remove pad_len + 1 because, based on testing,
0c6670
+		 * if we send data that are already aligned to block_size, the
0c6670
+		 * crypto implementations will add another block_size!
0c6670
+		 * so we want to make sure that our data won't add an unnecessary
0c6670
+		 * block_size that we need to remove later.
0c6670
+		 */
0c6670
+		pad_len = outlen % knet_h->sec_block_size;
0c6670
+
0c6670
+		outlen = outlen - (pad_len + 1);
0c6670
+
0c6670
+		/*
0c6670
+		 * add both hash and salt size back, similar to padding above,
0c6670
+		 * the crypto layer will add them to the outlen
0c6670
+		 */
0c6670
+		outlen = outlen + (knet_h->sec_salt_size + knet_h->sec_hash_size);
0c6670
+	}
0c6670
+
0c6670
+	/*
0c6670
+	 * drop KNET_HEADER_ALL_SIZE to provide a clean application MTU
0c6670
+	 * and various crypto headers
0c6670
+	 */
0c6670
+	outlen = outlen - (KNET_HEADER_ALL_SIZE + knet_h->sec_salt_size + knet_h->sec_hash_size);
0c6670
+
0c6670
+	return outlen;
0c6670
+}
0c6670
+
0c6670
+/*
0c6670
+ * set the lowest possible value as failsafe for all links.
0c6670
+ * KNET_PMTUD_MIN_MTU_V4 < KNET_PMTUD_MIN_MTU_V6
0c6670
+ * KNET_PMTUD_OVERHEAD_V6 > KNET_PMTUD_OVERHEAD_V4
0c6670
+ * KNET_PMTUD_SCTP_OVERHEAD > KNET_PMTUD_UDP_OVERHEAD
0c6670
+ */
0c6670
+
0c6670
+size_t calc_min_mtu(knet_handle_t knet_h)
0c6670
+{
0c6670
+	return calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V4 - (KNET_PMTUD_OVERHEAD_V6 + KNET_PMTUD_SCTP_OVERHEAD));
0c6670
+}
0c6670
diff --git a/libknet/onwire.h b/libknet/onwire.h
0c6670
index 9815bc3..74d4d09 100644
0c6670
--- a/libknet/onwire.h
0c6670
+++ b/libknet/onwire.h
0c6670
@@ -120,7 +120,9 @@ struct knet_header_payload_ping {
0c6670
 #define KNET_PMTUD_SIZE_V4 65535
0c6670
 #define KNET_PMTUD_SIZE_V6 KNET_PMTUD_SIZE_V4
0c6670
 
0c6670
-/* These two get the protocol-specific overheads added to them */
0c6670
+/*
0c6670
+ * IPv4/IPv6 header size
0c6670
+ */
0c6670
 #define KNET_PMTUD_OVERHEAD_V4 20
0c6670
 #define KNET_PMTUD_OVERHEAD_V6 40
0c6670
 
0c6670
@@ -199,4 +201,8 @@ struct knet_header {
0c6670
 #define KNET_HEADER_PMTUD_SIZE (KNET_HEADER_SIZE + sizeof(struct knet_header_payload_pmtud))
0c6670
 #define KNET_HEADER_DATA_SIZE (KNET_HEADER_SIZE + sizeof(struct knet_header_payload_data))
0c6670
 
0c6670
+size_t calc_data_outlen(knet_handle_t knet_h, size_t inlen);
0c6670
+size_t calc_max_data_outlen(knet_handle_t knet_h, size_t inlen);
0c6670
+size_t calc_min_mtu(knet_handle_t knet_h);
0c6670
+
0c6670
 #endif
0c6670
diff --git a/libknet/tests/Makefile.am b/libknet/tests/Makefile.am
0c6670
index 3346596..9160780 100644
0c6670
--- a/libknet/tests/Makefile.am
0c6670
+++ b/libknet/tests/Makefile.am
0c6670
@@ -38,6 +38,12 @@ int_checks		= \
0c6670
 
0c6670
 fun_checks		=
0c6670
 
0c6670
+# checks below need to be executed manually
0c6670
+# or with a specifi environment
0c6670
+
0c6670
+long_run_checks		= \
0c6670
+			  fun_pmtud_crypto_test
0c6670
+
0c6670
 benchmarks		= \
0c6670
 			  knet_bench_test
0c6670
 
0c6670
@@ -45,6 +51,7 @@ noinst_PROGRAMS		= \
0c6670
 			  api_knet_handle_new_limit_test \
0c6670
 			  pckt_test \
0c6670
 			  $(benchmarks) \
0c6670
+			  $(long_run_checks) \
0c6670
 			  $(check_PROGRAMS)
0c6670
 
0c6670
 noinst_SCRIPTS		= \
0c6670
@@ -71,6 +78,7 @@ int_links_acl_ip_test_SOURCES = int_links_acl_ip.c \
0c6670
 				../logging.c \
0c6670
 				../netutils.c \
0c6670
 				../threads_common.c \
0c6670
+				../onwire.c \
0c6670
 				../transports.c \
0c6670
 				../transport_common.c \
0c6670
 				../transport_loopback.c \
0c6670
@@ -88,4 +96,9 @@ knet_bench_test_SOURCES	= knet_bench.c \
0c6670
 			  ../logging.c \
0c6670
 			  ../compat.c \
0c6670
 			  ../transport_common.c \
0c6670
-			  ../threads_common.c
0c6670
+			  ../threads_common.c \
0c6670
+			  ../onwire.c
0c6670
+
0c6670
+fun_pmtud_crypto_test_SOURCES = fun_pmtud_crypto.c \
0c6670
+				test-common.c \
0c6670
+				../onwire.c
0c6670
diff --git a/libknet/tests/api_knet_send_crypto.c b/libknet/tests/api_knet_send_crypto.c
0c6670
index 11de857..5fc5463 100644
0c6670
--- a/libknet/tests/api_knet_send_crypto.c
0c6670
+++ b/libknet/tests/api_knet_send_crypto.c
0c6670
@@ -67,7 +67,7 @@ static void test(const char *model)
0c6670
 	memset(&knet_handle_crypto_cfg, 0, sizeof(struct knet_handle_crypto_cfg));
0c6670
 	strncpy(knet_handle_crypto_cfg.crypto_model, model, sizeof(knet_handle_crypto_cfg.crypto_model) - 1);
0c6670
 	strncpy(knet_handle_crypto_cfg.crypto_cipher_type, "aes128", sizeof(knet_handle_crypto_cfg.crypto_cipher_type) - 1);
0c6670
-	strncpy(knet_handle_crypto_cfg.crypto_hash_type, "sha1", sizeof(knet_handle_crypto_cfg.crypto_hash_type) - 1);
0c6670
+	strncpy(knet_handle_crypto_cfg.crypto_hash_type, "sha256", sizeof(knet_handle_crypto_cfg.crypto_hash_type) - 1);
0c6670
 	knet_handle_crypto_cfg.private_key_len = 2000;
0c6670
 
0c6670
 	if (knet_handle_crypto(knet_h, &knet_handle_crypto_cfg)) {
0c6670
diff --git a/libknet/tests/fun_pmtud_crypto.c b/libknet/tests/fun_pmtud_crypto.c
0c6670
new file mode 100644
0c6670
index 0000000..91c062c
0c6670
--- /dev/null
0c6670
+++ b/libknet/tests/fun_pmtud_crypto.c
0c6670
@@ -0,0 +1,326 @@
0c6670
+/*
0c6670
+ * Copyright (C) 2019 Red Hat, Inc.  All rights reserved.
0c6670
+ *
0c6670
+ * Authors: Fabio M. Di Nitto <fabbione@kronosnet.org>
0c6670
+ *
0c6670
+ * This software licensed under GPL-2.0+
0c6670
+ */
0c6670
+
0c6670
+#include "config.h"
0c6670
+
0c6670
+#include <errno.h>
0c6670
+#include <stdio.h>
0c6670
+#include <stdlib.h>
0c6670
+#include <string.h>
0c6670
+#include <unistd.h>
0c6670
+#include <inttypes.h>
0c6670
+#include <sys/ioctl.h>
0c6670
+#include <net/ethernet.h>
0c6670
+#include <ifaddrs.h>
0c6670
+#include <net/if.h>
0c6670
+
0c6670
+#include "libknet.h"
0c6670
+
0c6670
+#include "compress.h"
0c6670
+#include "internals.h"
0c6670
+#include "netutils.h"
0c6670
+#include "onwire.h"
0c6670
+#include "test-common.h"
0c6670
+
0c6670
+static int private_data;
0c6670
+
0c6670
+static void sock_notify(void *pvt_data,
0c6670
+			int datafd,
0c6670
+			int8_t channel,
0c6670
+			uint8_t tx_rx,
0c6670
+			int error,
0c6670
+			int errorno)
0c6670
+{
0c6670
+	return;
0c6670
+}
0c6670
+
0c6670
+static int iface_fd = 0;
0c6670
+static int default_mtu = 0;
0c6670
+
0c6670
+#ifdef KNET_LINUX
0c6670
+const char *loopback = "lo";
0c6670
+#endif
0c6670
+#ifdef KNET_BSD
0c6670
+const char *loopback = "lo0";
0c6670
+#endif
0c6670
+
0c6670
+static int fd_init(void)
0c6670
+{
0c6670
+#ifdef KNET_LINUX
0c6670
+	return socket(AF_INET, SOCK_STREAM, 0);
0c6670
+#endif
0c6670
+#ifdef KNET_BSD
0c6670
+	return socket(AF_LOCAL, SOCK_DGRAM, 0);
0c6670
+#endif
0c6670
+	return -1;
0c6670
+}
0c6670
+
0c6670
+static int set_iface_mtu(uint32_t mtu)
0c6670
+{
0c6670
+	int err = 0;
0c6670
+	struct ifreq ifr;
0c6670
+
0c6670
+	memset(&ifr, 0, sizeof(struct ifreq));
0c6670
+	strncpy(ifr.ifr_name, loopback, IFNAMSIZ - 1);
0c6670
+	ifr.ifr_mtu = mtu;
0c6670
+
0c6670
+	err = ioctl(iface_fd, SIOCSIFMTU, &ifr);
0c6670
+
0c6670
+	return err;
0c6670
+}
0c6670
+
0c6670
+static int get_iface_mtu(void)
0c6670
+{
0c6670
+	int err = 0, savederrno = 0;
0c6670
+	struct ifreq ifr;
0c6670
+
0c6670
+	memset(&ifr, 0, sizeof(struct ifreq));
0c6670
+	strncpy(ifr.ifr_name, loopback, IFNAMSIZ - 1);
0c6670
+
0c6670
+	err = ioctl(iface_fd, SIOCGIFMTU, &ifr);
0c6670
+	if (err) {
0c6670
+		savederrno = errno;
0c6670
+		goto out_clean;
0c6670
+	}
0c6670
+
0c6670
+	err = ifr.ifr_mtu;
0c6670
+
0c6670
+out_clean:
0c6670
+	errno = savederrno;
0c6670
+	return err;
0c6670
+}
0c6670
+
0c6670
+static int exit_local(int code)
0c6670
+{
0c6670
+	set_iface_mtu(default_mtu);
0c6670
+	close(iface_fd);
0c6670
+	iface_fd = 0;
0c6670
+	exit(code);
0c6670
+}
0c6670
+
0c6670
+static void test_mtu(const char *model, const char *crypto, const char *hash)
0c6670
+{
0c6670
+	knet_handle_t knet_h;
0c6670
+	int logfds[2];
0c6670
+	int datafd = 0;
0c6670
+	int8_t channel = 0;
0c6670
+	struct sockaddr_storage lo;
0c6670
+	struct knet_handle_crypto_cfg knet_handle_crypto_cfg;
0c6670
+	unsigned int data_mtu, expected_mtu;
0c6670
+	size_t calculated_iface_mtu = 0, detected_iface_mtu = 0;
0c6670
+
0c6670
+	if (make_local_sockaddr(&lo, 0) < 0) {
0c6670
+		printf("Unable to convert loopback to sockaddr: %s\n", strerror(errno));
0c6670
+		exit_local(FAIL);
0c6670
+	}
0c6670
+
0c6670
+	setup_logpipes(logfds);
0c6670
+
0c6670
+	knet_h = knet_handle_start(logfds, KNET_LOG_DEBUG);
0c6670
+
0c6670
+	flush_logs(logfds[0], stdout);
0c6670
+
0c6670
+	printf("Test knet_send with %s and valid data\n", model);
0c6670
+
0c6670
+	memset(&knet_handle_crypto_cfg, 0, sizeof(struct knet_handle_crypto_cfg));
0c6670
+	strncpy(knet_handle_crypto_cfg.crypto_model, model, sizeof(knet_handle_crypto_cfg.crypto_model) - 1);
0c6670
+	strncpy(knet_handle_crypto_cfg.crypto_cipher_type, crypto, sizeof(knet_handle_crypto_cfg.crypto_cipher_type) - 1);
0c6670
+	strncpy(knet_handle_crypto_cfg.crypto_hash_type, hash, sizeof(knet_handle_crypto_cfg.crypto_hash_type) - 1);
0c6670
+	knet_handle_crypto_cfg.private_key_len = 2000;
0c6670
+
0c6670
+	if (knet_handle_crypto(knet_h, &knet_handle_crypto_cfg)) {
0c6670
+		printf("knet_handle_crypto failed with correct config: %s\n", strerror(errno));
0c6670
+		knet_handle_free(knet_h);
0c6670
+		flush_logs(logfds[0], stdout);
0c6670
+		close_logpipes(logfds);
0c6670
+		exit_local(FAIL);
0c6670
+        }
0c6670
+
0c6670
+	if (knet_handle_enable_sock_notify(knet_h, &private_data, sock_notify) < 0) {
0c6670
+		printf("knet_handle_enable_sock_notify failed: %s\n", strerror(errno));
0c6670
+		knet_handle_free(knet_h);
0c6670
+		flush_logs(logfds[0], stdout);
0c6670
+		close_logpipes(logfds);
0c6670
+		exit_local(FAIL);
0c6670
+        }
0c6670
+
0c6670
+	datafd = 0;
0c6670
+	channel = -1;
0c6670
+
0c6670
+	if (knet_handle_add_datafd(knet_h, &datafd, &channel) < 0) {
0c6670
+		printf("knet_handle_add_datafd failed: %s\n", strerror(errno));
0c6670
+		knet_handle_free(knet_h);
0c6670
+		flush_logs(logfds[0], stdout);
0c6670
+		close_logpipes(logfds);
0c6670
+		exit_local(FAIL);
0c6670
+	}
0c6670
+
0c6670
+	if (knet_host_add(knet_h, 1) < 0) {
0c6670
+		printf("knet_host_add failed: %s\n", strerror(errno));
0c6670
+		knet_handle_free(knet_h);
0c6670
+		flush_logs(logfds[0], stdout);
0c6670
+		close_logpipes(logfds);
0c6670
+		exit_local(FAIL);
0c6670
+	}
0c6670
+
0c6670
+	if (knet_link_set_config(knet_h, 1, 0, KNET_TRANSPORT_UDP, &lo, &lo, 0) < 0) {
0c6670
+		printf("Unable to configure link: %s\n", strerror(errno));
0c6670
+		knet_host_remove(knet_h, 1);
0c6670
+		knet_handle_free(knet_h);
0c6670
+		flush_logs(logfds[0], stdout);
0c6670
+		close_logpipes(logfds);
0c6670
+		exit_local(FAIL);
0c6670
+	}
0c6670
+
0c6670
+	if (knet_link_set_pong_count(knet_h, 1, 0, 1) < 0) {
0c6670
+		printf("knet_link_set_pong_count failed: %s\n", strerror(errno));
0c6670
+		knet_host_remove(knet_h, 1);
0c6670
+		knet_handle_free(knet_h);
0c6670
+		flush_logs(logfds[0], stdout);
0c6670
+		close_logpipes(logfds);
0c6670
+		exit_local(FAIL);
0c6670
+	}
0c6670
+
0c6670
+	if (knet_link_set_enable(knet_h, 1, 0, 1) < 0) {
0c6670
+		printf("knet_link_set_enable failed: %s\n", strerror(errno));
0c6670
+		knet_link_clear_config(knet_h, 1, 0);
0c6670
+		knet_host_remove(knet_h, 1);
0c6670
+		knet_handle_free(knet_h);
0c6670
+		flush_logs(logfds[0], stdout);
0c6670
+		close_logpipes(logfds);
0c6670
+		exit_local(FAIL);
0c6670
+	}
0c6670
+
0c6670
+	if (wait_for_host(knet_h, 1, 4, logfds[0], stdout) < 0) {
0c6670
+		printf("timeout waiting for host to be reachable");
0c6670
+		knet_link_set_enable(knet_h, 1, 0, 0);
0c6670
+		knet_link_clear_config(knet_h, 1, 0);
0c6670
+		knet_host_remove(knet_h, 1);
0c6670
+		knet_handle_free(knet_h);
0c6670
+		flush_logs(logfds[0], stdout);
0c6670
+		close_logpipes(logfds);
0c6670
+		exit_local(FAIL);
0c6670
+	}
0c6670
+
0c6670
+	flush_logs(logfds[0], stdout);
0c6670
+
0c6670
+	if (knet_handle_pmtud_get(knet_h, &data_mtu) < 0) {
0c6670
+		printf("knet_handle_pmtud_get failed error: %s\n", strerror(errno));
0c6670
+		knet_link_set_enable(knet_h, 1, 0, 0);
0c6670
+		knet_link_clear_config(knet_h, 1, 0);
0c6670
+		knet_host_remove(knet_h, 1);
0c6670
+		knet_handle_free(knet_h);
0c6670
+		flush_logs(logfds[0], stdout);
0c6670
+		close_logpipes(logfds);
0c6670
+		exit_local(FAIL);
0c6670
+	}
0c6670
+
0c6670
+	calculated_iface_mtu = calc_data_outlen(knet_h, data_mtu + KNET_HEADER_ALL_SIZE) + 28;
0c6670
+	detected_iface_mtu = get_iface_mtu();
0c6670
+	/*
0c6670
+	 * 28 = 20 IP header + 8 UDP header
0c6670
+	 */
0c6670
+	expected_mtu = calc_max_data_outlen(knet_h, detected_iface_mtu - 28);
0c6670
+
0c6670
+	if (expected_mtu != data_mtu) {
0c6670
+		printf("Wrong MTU detected! interface mtu: %zu knet mtu: %u expected mtu: %u\n", detected_iface_mtu, data_mtu, expected_mtu);
0c6670
+		knet_link_set_enable(knet_h, 1, 0, 0);
0c6670
+		knet_link_clear_config(knet_h, 1, 0);
0c6670
+		knet_host_remove(knet_h, 1);
0c6670
+		knet_handle_free(knet_h);
0c6670
+		flush_logs(logfds[0], stdout);
0c6670
+		close_logpipes(logfds);
0c6670
+		exit_local(FAIL);
0c6670
+	}
0c6670
+
0c6670
+	if ((detected_iface_mtu - calculated_iface_mtu) >= knet_h->sec_block_size) {
0c6670
+		printf("Wrong MTU detected! real iface mtu: %zu calculated: %zu\n", detected_iface_mtu, calculated_iface_mtu);
0c6670
+		knet_link_set_enable(knet_h, 1, 0, 0);
0c6670
+		knet_link_clear_config(knet_h, 1, 0);
0c6670
+		knet_host_remove(knet_h, 1);
0c6670
+		knet_handle_free(knet_h);
0c6670
+		flush_logs(logfds[0], stdout);
0c6670
+		close_logpipes(logfds);
0c6670
+		exit_local(FAIL);
0c6670
+	}
0c6670
+
0c6670
+	knet_link_set_enable(knet_h, 1, 0, 0);
0c6670
+	knet_link_clear_config(knet_h, 1, 0);
0c6670
+	knet_host_remove(knet_h, 1);
0c6670
+	knet_handle_free(knet_h);
0c6670
+	flush_logs(logfds[0], stdout);
0c6670
+	close_logpipes(logfds);
0c6670
+}
0c6670
+
0c6670
+static void test(const char *model, const char *crypto, const char *hash)
0c6670
+{
0c6670
+	int i = 576;
0c6670
+	int max = 65535;
0c6670
+
0c6670
+	while (i <= max) {
0c6670
+		printf("Setting interface MTU to: %i\n", i);
0c6670
+		set_iface_mtu(i);
0c6670
+		test_mtu(model, crypto, hash);
0c6670
+		if (i == max) {
0c6670
+			break;
0c6670
+		}
0c6670
+		i = i + 15;
0c6670
+		if (i > max) {
0c6670
+			i = max;
0c6670
+		}
0c6670
+	}
0c6670
+}
0c6670
+
0c6670
+int main(int argc, char *argv[])
0c6670
+{
0c6670
+	struct knet_crypto_info crypto_list[16];
0c6670
+	size_t crypto_list_entries;
0c6670
+
0c6670
+#ifdef KNET_BSD
0c6670
+	if (is_memcheck() || is_helgrind()) {
0c6670
+		printf("valgrind-freebsd cannot run this test properly. Skipping\n");
0c6670
+		return SKIP;
0c6670
+	}
0c6670
+#endif
0c6670
+
0c6670
+	if (geteuid() != 0) {
0c6670
+		printf("This test requires root privileges\n");
0c6670
+		return SKIP;
0c6670
+	}
0c6670
+
0c6670
+	iface_fd = fd_init();
0c6670
+	if (iface_fd < 0) {
0c6670
+		printf("fd_init failed: %s\n", strerror(errno));
0c6670
+		return FAIL;
0c6670
+	}
0c6670
+
0c6670
+	default_mtu = get_iface_mtu();
0c6670
+	if (default_mtu < 0) {
0c6670
+		printf("get_iface_mtu failed: %s\n", strerror(errno));
0c6670
+		return FAIL;
0c6670
+	}
0c6670
+
0c6670
+	memset(crypto_list, 0, sizeof(crypto_list));
0c6670
+
0c6670
+	if (knet_get_crypto_list(crypto_list, &crypto_list_entries) < 0) {
0c6670
+		printf("knet_get_crypto_list failed: %s\n", strerror(errno));
0c6670
+		return FAIL;
0c6670
+	}
0c6670
+
0c6670
+	if (crypto_list_entries == 0) {
0c6670
+		printf("no crypto modules detected. Skipping\n");
0c6670
+		return SKIP;
0c6670
+	}
0c6670
+
0c6670
+	test(crypto_list[0].name, "aes128", "sha1");
0c6670
+	test(crypto_list[0].name, "aes128", "sha256");
0c6670
+	test(crypto_list[0].name, "aes256", "sha1");
0c6670
+	test(crypto_list[0].name, "aes256", "sha256");
0c6670
+
0c6670
+	exit_local(PASS);
0c6670
+}
0c6670
diff --git a/libknet/threads_common.c b/libknet/threads_common.c
0c6670
index 1f3e1e3..03edfc4 100644
0c6670
--- a/libknet/threads_common.c
0c6670
+++ b/libknet/threads_common.c
0c6670
@@ -161,7 +161,7 @@ void force_pmtud_run(knet_handle_t knet_h, uint8_t subsystem, uint8_t reset_mtu)
0c6670
 {
0c6670
 	if (reset_mtu) {
0c6670
 		log_debug(knet_h, subsystem, "PMTUd has been reset to default");
0c6670
-		knet_h->data_mtu = KNET_PMTUD_MIN_MTU_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size;
0c6670
+		knet_h->data_mtu = calc_min_mtu(knet_h);
0c6670
 		if (knet_h->pmtud_notify_fn) {
0c6670
 			knet_h->pmtud_notify_fn(knet_h->pmtud_notify_fn_private_data,
0c6670
 						knet_h->data_mtu);
0c6670
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
0c6670
index 1a19806..1dd1788 100644
0c6670
--- a/libknet/threads_pmtud.c
0c6670
+++ b/libknet/threads_pmtud.c
0c6670
@@ -25,16 +25,16 @@
0c6670
 static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link)
0c6670
 {
0c6670
 	int err, ret, savederrno, mutex_retry_limit, failsafe, use_kernel_mtu, warn_once;
0c6670
-	uint32_t kernel_mtu; /* record kernel_mtu from EMSGSIZE */
0c6670
-	size_t onwire_len;   /* current packet onwire size */
0c6670
-	size_t overhead_len; /* onwire packet overhead (protocol based) */
0c6670
-	size_t max_mtu_len;  /* max mtu for protocol */
0c6670
-	size_t data_len;     /* how much data we can send in the packet
0c6670
-			      * generally would be onwire_len - overhead_len
0c6670
-			      * needs to be adjusted for crypto
0c6670
-			      */
0c6670
-	size_t pad_len;	     /* crypto packet pad size, needs to move into crypto.c callbacks */
0c6670
-	ssize_t len;	     /* len of what we were able to sendto onwire */
0c6670
+	uint32_t kernel_mtu;		/* record kernel_mtu from EMSGSIZE */
0c6670
+	size_t onwire_len;   		/* current packet onwire size */
0c6670
+	size_t ipproto_overhead_len;	/* onwire packet overhead (protocol based) */
0c6670
+	size_t max_mtu_len;		/* max mtu for protocol */
0c6670
+	size_t data_len;		/* how much data we can send in the packet
0c6670
+					 * generally would be onwire_len - ipproto_overhead_len
0c6670
+					 * needs to be adjusted for crypto
0c6670
+					 */
0c6670
+	size_t app_mtu_len;		/* real data that we can send onwire */
0c6670
+	ssize_t len;			/* len of what we were able to sendto onwire */
0c6670
 
0c6670
 	struct timespec ts;
0c6670
 	unsigned long long pong_timeout_adj_tmp;
0c6670
@@ -45,20 +45,16 @@ static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_
0c6670
 	mutex_retry_limit = 0;
0c6670
 	failsafe = 0;
0c6670
 
0c6670
-	dst_link->last_bad_mtu = 0;
0c6670
-
0c6670
 	knet_h->pmtudbuf->khp_pmtud_link = dst_link->link_id;
0c6670
 
0c6670
 	switch (dst_link->dst_addr.ss_family) {
0c6670
 		case AF_INET6:
0c6670
 			max_mtu_len = KNET_PMTUD_SIZE_V6;
0c6670
-			overhead_len = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead;
0c6670
-			dst_link->last_good_mtu = dst_link->last_ping_size + overhead_len;
0c6670
+			ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead;
0c6670
 			break;
0c6670
 		case AF_INET:
0c6670
 			max_mtu_len = KNET_PMTUD_SIZE_V4;
0c6670
-			overhead_len = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead;
0c6670
-			dst_link->last_good_mtu = dst_link->last_ping_size + overhead_len;
0c6670
+			ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead;
0c6670
 			break;
0c6670
 		default:
0c6670
 			log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD aborted, unknown protocol");
0c6670
@@ -66,6 +62,9 @@ static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_
0c6670
 			break;
0c6670
 	}
0c6670
 
0c6670
+	dst_link->last_bad_mtu = 0;
0c6670
+	dst_link->last_good_mtu = dst_link->last_ping_size + ipproto_overhead_len;
0c6670
+
0c6670
 	/*
0c6670
 	 * discovery starts from the top because kernel will
0c6670
 	 * refuse to send packets > current iface mtu.
0c6670
@@ -92,107 +91,39 @@ restart:
0c6670
 	}
0c6670
 
0c6670
 	/*
0c6670
-	 * unencrypted packet looks like:
0c6670
-	 *
0c6670
-	 * | ip | protocol | knet_header | unencrypted data                                  |
0c6670
-	 * | onwire_len                                                                      |
0c6670
-	 * | overhead_len  |
0c6670
-	 *                 | data_len                                                        |
0c6670
-	 *                               | app MTU                                           |
0c6670
-	 *
0c6670
-	 * encrypted packet looks like (not to scale):
0c6670
-	 *
0c6670
-	 * | ip | protocol | salt | crypto(knet_header | data)      | crypto_data_pad | hash |
0c6670
-	 * | onwire_len                                                                      |
0c6670
-	 * | overhead_len  |
0c6670
-	 *                 | data_len                                                        |
0c6670
-	 *                                             | app MTU    |
0c6670
-	 *
0c6670
-	 * knet_h->sec_block_size is >= 0 if encryption will pad the data
0c6670
-	 * knet_h->sec_salt_size is >= 0 if encryption is enabled
0c6670
-	 * knet_h->sec_hash_size is >= 0 if signing is enabled
0c6670
+	 * common to all packets
0c6670
 	 */
0c6670
 
0c6670
 	/*
0c6670
-	 * common to all packets
0c6670
+	 * calculate the application MTU based on current onwire_len minus ipproto_overhead_len
0c6670
 	 */
0c6670
-	data_len = onwire_len - overhead_len;
0c6670
-
0c6670
-	if (knet_h->crypto_instance) {
0c6670
 
0c6670
-realign:
0c6670
-		if (knet_h->sec_block_size) {
0c6670
+	app_mtu_len = calc_max_data_outlen(knet_h, onwire_len - ipproto_overhead_len);
0c6670
 
0c6670
-			/*
0c6670
-			 * drop both salt and hash, that leaves only the crypto data and padding
0c6670
-			 * we need to calculate the padding based on the real encrypted data.
0c6670
-			 */
0c6670
-			data_len = data_len - (knet_h->sec_salt_size + knet_h->sec_hash_size);
0c6670
-
0c6670
-			/*
0c6670
-			 * if the crypto mechanism requires padding, calculate the padding
0c6670
-			 * and add it back to data_len because that's what the crypto layer
0c6670
-			 * would do.
0c6670
-			 */
0c6670
-			pad_len = knet_h->sec_block_size - (data_len % knet_h->sec_block_size);
0c6670
-
0c6670
-			/*
0c6670
-			 * if are at the boundary, reset padding
0c6670
-			 */
0c6670
-			if (pad_len == knet_h->sec_block_size) {
0c6670
-				pad_len = 0;
0c6670
-			}
0c6670
-			data_len = data_len + pad_len;
0c6670
-
0c6670
-			/*
0c6670
-			 * if our current data_len is higher than max_mtu_len
0c6670
-			 * then we need to reduce by padding size (that is our
0c6670
-			 * increment / decrement value)
0c6670
-			 *
0c6670
-			 * this generally happens only on the first PMTUd run
0c6670
-			 */
0c6670
-			while (data_len + overhead_len >= max_mtu_len) {
0c6670
-				data_len = data_len - knet_h->sec_block_size;
0c6670
-			}
0c6670
+	/*
0c6670
+	 * recalculate onwire len back that might be different based
0c6670
+	 * on data padding from crypto layer.
0c6670
+	 */
0c6670
 
0c6670
-			/*
0c6670
-			 * add both hash and salt size back, similar to padding above,
0c6670
-			 * the crypto layer will add them to the data_len
0c6670
-			 */
0c6670
-			data_len = data_len + (knet_h->sec_salt_size + knet_h->sec_hash_size);
0c6670
-		}
0c6670
+	onwire_len = calc_data_outlen(knet_h, app_mtu_len + KNET_HEADER_ALL_SIZE) + ipproto_overhead_len;
0c6670
 
0c6670
-		if (dst_link->last_bad_mtu) {
0c6670
-			if (data_len + overhead_len >= dst_link->last_bad_mtu) {
0c6670
-				/*
0c6670
-				 * reduce data_len to something lower than last_bad_mtu, overhead_len
0c6670
-				 * and sec_block_size (decrementing step) - 1 (granularity)
0c6670
-				 */
0c6670
-				data_len = dst_link->last_bad_mtu - overhead_len - knet_h->sec_block_size - 1;
0c6670
-				if (knet_h->sec_block_size) {
0c6670
-					/*
0c6670
-					 * make sure that data_len is aligned to the sec_block_size boundary
0c6670
-					 */
0c6670
-					goto realign;
0c6670
-				}
0c6670
-			}
0c6670
-		}
0c6670
+	/*
0c6670
+	 * calculate the size of what we need to send to sendto(2).
0c6670
+	 * see also onwire.c for packet format explanation.
0c6670
+	 */
0c6670
+	data_len = app_mtu_len + knet_h->sec_hash_size + knet_h->sec_salt_size + KNET_HEADER_ALL_SIZE;
0c6670
 
0c6670
-		if (data_len < (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size) + 1) {
0c6670
+	if (knet_h->crypto_instance) {
0c6670
+		if (data_len < (knet_h->sec_hash_size + knet_h->sec_salt_size) + 1) {
0c6670
 			log_debug(knet_h, KNET_SUB_PMTUD, "Aborting PMTUD process: link mtu smaller than crypto header detected (link might have been disconnected)");
0c6670
 			return -1;
0c6670
 		}
0c6670
 
0c6670
-		/*
0c6670
-		 * recalculate onwire_len based on crypto information
0c6670
-		 * and place it in the PMTUd packet info
0c6670
-		 */
0c6670
-		onwire_len = data_len + overhead_len;
0c6670
 		knet_h->pmtudbuf->khp_pmtud_size = onwire_len;
0c6670
 
0c6670
 		if (crypto_encrypt_and_sign(knet_h,
0c6670
 					    (const unsigned char *)knet_h->pmtudbuf,
0c6670
-					    data_len - (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size),
0c6670
+					    data_len - (knet_h->sec_hash_size + knet_h->sec_salt_size),
0c6670
 					    knet_h->pmtudbuf_crypt,
0c6670
 					    (ssize_t *)&data_len) < 0) {
0c6670
 			log_debug(knet_h, KNET_SUB_PMTUD, "Unable to crypto pmtud packet");
0c6670
@@ -201,11 +132,8 @@ realign:
0c6670
 
0c6670
 		outbuf = knet_h->pmtudbuf_crypt;
0c6670
 		knet_h->stats_extra.tx_crypt_pmtu_packets++;
0c6670
-
0c6670
 	} else {
0c6670
-
0c6670
 		knet_h->pmtudbuf->khp_pmtud_size = onwire_len;
0c6670
-
0c6670
 	}
0c6670
 
0c6670
 	/* link has gone down, aborting pmtud */
0c6670
@@ -417,7 +345,7 @@ retry:
0c6670
 				/*
0c6670
 				 * account for IP overhead, knet headers and crypto in PMTU calculation
0c6670
 				 */
0c6670
-				dst_link->status.mtu = onwire_len - dst_link->status.proto_overhead;
0c6670
+				dst_link->status.mtu = calc_max_data_outlen(knet_h, onwire_len - ipproto_overhead_len);
0c6670
 				pthread_mutex_unlock(&knet_h->pmtud_mutex);
0c6670
 				return 0;
0c6670
 			}
0c6670
@@ -437,7 +365,7 @@ retry:
0c6670
 	goto restart;
0c6670
 }
0c6670
 
0c6670
-static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link, unsigned int *min_mtu, int force_run)
0c6670
+static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link, int force_run)
0c6670
 {
0c6670
 	uint8_t saved_valid_pmtud;
0c6670
 	unsigned int saved_pmtud;
0c6670
@@ -455,17 +383,22 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host,
0c6670
 		timespec_diff(dst_link->pmtud_last, clock_now, &diff_pmtud);
0c6670
 
0c6670
 		if (diff_pmtud < interval) {
0c6670
-			*min_mtu = dst_link->status.mtu;
0c6670
 			return dst_link->has_valid_mtu;
0c6670
 		}
0c6670
 	}
0c6670
 
0c6670
+	/*
0c6670
+	 * status.proto_overhead should include all IP/(UDP|SCTP)/knet headers
0c6670
+	 *
0c6670
+	 * please note that it is not the same as link->proto_overhead that
0c6670
+	 * includes only either UDP or SCTP (at the moment) overhead.
0c6670
+	 */
0c6670
 	switch (dst_link->dst_addr.ss_family) {
0c6670
 		case AF_INET6:
0c6670
-			dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_header_size;
0c6670
+			dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_hash_size + knet_h->sec_salt_size;
0c6670
 			break;
0c6670
 		case AF_INET:
0c6670
-			dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_header_size;
0c6670
+			dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_hash_size + knet_h->sec_salt_size;
0c6670
 			break;
0c6670
 	}
0c6670
 
0c6670
@@ -486,26 +419,6 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host,
0c6670
 		dst_link->has_valid_mtu = 0;
0c6670
 	} else {
0c6670
 		dst_link->has_valid_mtu = 1;
0c6670
-		switch (dst_link->dst_addr.ss_family) {
0c6670
-			case AF_INET6:
0c6670
-				if (((dst_link->status.mtu + dst_link->status.proto_overhead) < KNET_PMTUD_MIN_MTU_V6) ||
0c6670
-				    ((dst_link->status.mtu + dst_link->status.proto_overhead) > KNET_PMTUD_SIZE_V6)) {
0c6670
-					log_debug(knet_h, KNET_SUB_PMTUD,
0c6670
-						  "PMTUD detected an IPv6 MTU out of bound value (%u) for host: %u link: %u.",
0c6670
-						  dst_link->status.mtu + dst_link->status.proto_overhead, dst_host->host_id, dst_link->link_id);
0c6670
-					dst_link->has_valid_mtu = 0;
0c6670
-				}
0c6670
-				break;
0c6670
-			case AF_INET:
0c6670
-				if (((dst_link->status.mtu + dst_link->status.proto_overhead) < KNET_PMTUD_MIN_MTU_V4) ||
0c6670
-				    ((dst_link->status.mtu + dst_link->status.proto_overhead) > KNET_PMTUD_SIZE_V4)) {
0c6670
-					log_debug(knet_h, KNET_SUB_PMTUD,
0c6670
-						  "PMTUD detected an IPv4 MTU out of bound value (%u) for host: %u link: %u.",
0c6670
-						  dst_link->status.mtu + dst_link->status.proto_overhead, dst_host->host_id, dst_link->link_id);
0c6670
-					dst_link->has_valid_mtu = 0;
0c6670
-				}
0c6670
-				break;
0c6670
-		}
0c6670
 		if (dst_link->has_valid_mtu) {
0c6670
 			if ((saved_pmtud) && (saved_pmtud != dst_link->status.mtu)) {
0c6670
 				log_info(knet_h, KNET_SUB_PMTUD, "PMTUD link change for host: %u link: %u from %u to %u",
0c6670
@@ -513,9 +426,6 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host,
0c6670
 			}
0c6670
 			log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD completed for host: %u link: %u current link mtu: %u",
0c6670
 				  dst_host->host_id, dst_link->link_id, dst_link->status.mtu);
0c6670
-			if (dst_link->status.mtu < *min_mtu) {
0c6670
-				*min_mtu = dst_link->status.mtu;
0c6670
-			}
0c6670
 
0c6670
 			/*
0c6670
 			 * set pmtud_last, if we can, after we are done with the PMTUd process
0c6670
@@ -541,14 +451,14 @@ void *_handle_pmtud_link_thread(void *data)
0c6670
 	struct knet_host *dst_host;
0c6670
 	struct knet_link *dst_link;
0c6670
 	int link_idx;
0c6670
-	unsigned int min_mtu, have_mtu;
0c6670
+	unsigned int have_mtu;
0c6670
 	unsigned int lower_mtu;
0c6670
 	int link_has_mtu;
0c6670
 	int force_run = 0;
0c6670
 
0c6670
 	set_thread_status(knet_h, KNET_THREAD_PMTUD, KNET_THREAD_STARTED);
0c6670
 
0c6670
-	knet_h->data_mtu = KNET_PMTUD_MIN_MTU_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size;
0c6670
+	knet_h->data_mtu = calc_min_mtu(knet_h);
0c6670
 
0c6670
 	/* preparing pmtu buffer */
0c6670
 	knet_h->pmtudbuf->kh_version = KNET_HEADER_VERSION;
0c6670
@@ -578,7 +488,6 @@ void *_handle_pmtud_link_thread(void *data)
0c6670
 		}
0c6670
 
0c6670
 		lower_mtu = KNET_PMTUD_SIZE_V4;
0c6670
-		min_mtu = KNET_PMTUD_SIZE_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size;
0c6670
 		have_mtu = 0;
0c6670
 
0c6670
 		for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) {
0c6670
@@ -593,14 +502,14 @@ void *_handle_pmtud_link_thread(void *data)
0c6670
 				     (dst_link->status.dynconnected != 1)))
0c6670
 					continue;
0c6670
 
0c6670
-				link_has_mtu = _handle_check_pmtud(knet_h, dst_host, dst_link, &min_mtu, force_run);
0c6670
+				link_has_mtu = _handle_check_pmtud(knet_h, dst_host, dst_link, force_run);
0c6670
 				if (errno == EDEADLK) {
0c6670
 					goto out_unlock;
0c6670
 				}
0c6670
 				if (link_has_mtu) {
0c6670
 					have_mtu = 1;
0c6670
-					if (min_mtu < lower_mtu) {
0c6670
-						lower_mtu = min_mtu;
0c6670
+					if (dst_link->status.mtu < lower_mtu) {
0c6670
+						lower_mtu = dst_link->status.mtu;
0c6670
 					}
0c6670
 				}
0c6670
 			}
0c6670
commit 499f589404db791d8e68c84c8ba3a857aeea5083
0c6670
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
Date:   Tue Aug 13 06:41:32 2019 +0200
0c6670
0c6670
    [PMTUd] add dynamic pong timeout when using crypto
0c6670
    
0c6670
    problem originally reported by proxmox community, users
0c6670
    observed that under pressure the MTU would flap back and forth
0c6670
    between 2 values due to other node response timeout.
0c6670
    
0c6670
    implement a dynamic timeout multiplier when using crypto that
0c6670
    should solve the problem in a more flexible fashion.
0c6670
    
0c6670
    When a timeout hits, those new logs will show:
0c6670
    
0c6670
    [knet]: [info] host: host: 1 (passive) best link: 0 (pri: 0)
0c6670
    [knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0
0c6670
    [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (4) for host 1 link: 0
0c6670
    [knet]: [info] pmtud: PMTUD link change for host: 1 link: 0 from 469 to 65429
0c6670
    [knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429
0c6670
    [knet]: [info] pmtud: Global data MTU changed to: 65429
0c6670
    [knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0
0c6670
    [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (8) for host 1 link: 0
0c6670
    [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (16) for host 1 link: 0
0c6670
    [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (32) for host 1 link: 0
0c6670
    [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (64) for host 1 link: 0
0c6670
    [knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429
0c6670
    [knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0
0c6670
    [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (128) for host 1 link: 0
0c6670
    [knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429
0c6670
    
0c6670
    and when the latency reduces and it is safe to be more responsive again:
0c6670
    
0c6670
    [knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0
0c6670
    [knet]: [debug] pmtud: Decreasing PMTUd response timeout multiplier to (64) for host 1 link: 0
0c6670
    [knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429
0c6670
    
0c6670
    ....
0c6670
    
0c6670
    testing this patch on normal hosts is a bit challenging tho.
0c6670
    
0c6670
    Patch was tested by hardcoding a super low timeout.
0c6670
    and using a long running version of api_knet_send_crypto_test with a short PMTUd setfreq (10 sec).
0c6670
    
0c6670
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
0c6670
diff --git a/libknet/internals.h b/libknet/internals.h
0c6670
index 31840e4..d1a4757 100644
0c6670
--- a/libknet/internals.h
0c6670
+++ b/libknet/internals.h
0c6670
@@ -80,6 +80,7 @@ struct knet_link {
0c6670
 	uint32_t last_bad_mtu;
0c6670
 	uint32_t last_sent_mtu;
0c6670
 	uint32_t last_recv_mtu;
0c6670
+	uint32_t pmtud_crypto_timeout_multiplier;/* used by PMTUd to adjust timeouts on high loads */
0c6670
 	uint8_t has_valid_mtu;
0c6670
 };
0c6670
 
0c6670
diff --git a/libknet/links.c b/libknet/links.c
0c6670
index 03e0af9..f7eccc3 100644
0c6670
--- a/libknet/links.c
0c6670
+++ b/libknet/links.c
0c6670
@@ -219,6 +219,7 @@ int knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t l
0c6670
 		}
0c6670
 	}
0c6670
 
0c6670
+	link->pmtud_crypto_timeout_multiplier = KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN;
0c6670
 	link->pong_count = KNET_LINK_DEFAULT_PONG_COUNT;
0c6670
 	link->has_valid_mtu = 0;
0c6670
 	link->ping_interval = KNET_LINK_DEFAULT_PING_INTERVAL * 1000; /* microseconds */
0c6670
diff --git a/libknet/links.h b/libknet/links.h
0c6670
index e14958d..c8ca610 100644
0c6670
--- a/libknet/links.h
0c6670
+++ b/libknet/links.h
0c6670
@@ -30,6 +30,16 @@
0c6670
  */
0c6670
 #define KNET_LINK_PONG_TIMEOUT_LAT_MUL	2
0c6670
 
0c6670
+/*
0c6670
+ * under heavy load with crypto enabled, it takes much
0c6670
+ * longer time to receive a response from the other node.
0c6670
+ *
0c6670
+ * 128 is somewhat arbitrary number but we want to set a limit
0c6670
+ * and report failures after that.
0c6670
+ */
0c6670
+#define KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN	  2
0c6670
+#define KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MAX	128
0c6670
+
0c6670
 int _link_updown(knet_handle_t knet_h, knet_node_id_t node_id, uint8_t link_id,
0c6670
 		 unsigned int enabled, unsigned int connected);
0c6670
 
0c6670
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
0c6670
index 1dd1788..d342697 100644
0c6670
--- a/libknet/threads_pmtud.c
0c6670
+++ b/libknet/threads_pmtud.c
0c6670
@@ -36,8 +36,9 @@ static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_
0c6670
 	size_t app_mtu_len;		/* real data that we can send onwire */
0c6670
 	ssize_t len;			/* len of what we were able to sendto onwire */
0c6670
 
0c6670
-	struct timespec ts;
0c6670
-	unsigned long long pong_timeout_adj_tmp;
0c6670
+	struct timespec ts, pmtud_crypto_start_ts, pmtud_crypto_stop_ts;
0c6670
+	unsigned long long pong_timeout_adj_tmp, timediff;
0c6670
+	int pmtud_crypto_reduce = 1;
0c6670
 	unsigned char *outbuf = (unsigned char *)knet_h->pmtudbuf;
0c6670
 
0c6670
 	warn_once = 0;
0c6670
@@ -242,6 +243,15 @@ retry:
0c6670
 			return -1;
0c6670
 		}
0c6670
 
0c6670
+		/*
0c6670
+		 * non fatal, we can wait the next round to reduce the
0c6670
+		 * multiplier
0c6670
+		 */
0c6670
+		if (clock_gettime(CLOCK_MONOTONIC, &pmtud_crypto_start_ts) < 0) {
0c6670
+			log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get current time: %s", strerror(errno));
0c6670
+			pmtud_crypto_reduce = 0;
0c6670
+		}
0c6670
+
0c6670
 		/*
0c6670
 		 * set PMTUd reply timeout to match pong_timeout on a given link
0c6670
 		 *
0c6670
@@ -261,7 +271,7 @@ retry:
0c6670
 			/*
0c6670
 			 * crypto, under pressure, is a royal PITA
0c6670
 			 */
0c6670
-			pong_timeout_adj_tmp = dst_link->pong_timeout_adj * 2;
0c6670
+			pong_timeout_adj_tmp = dst_link->pong_timeout_adj * dst_link->pmtud_crypto_timeout_multiplier;
0c6670
 		} else {
0c6670
 			pong_timeout_adj_tmp = dst_link->pong_timeout_adj;
0c6670
 		}
0c6670
@@ -295,6 +305,17 @@ retry:
0c6670
 
0c6670
 		if (ret) {
0c6670
 			if (ret == ETIMEDOUT) {
0c6670
+				if ((knet_h->crypto_instance) && (dst_link->pmtud_crypto_timeout_multiplier < KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MAX)) {
0c6670
+					dst_link->pmtud_crypto_timeout_multiplier = dst_link->pmtud_crypto_timeout_multiplier * 2;
0c6670
+					pmtud_crypto_reduce = 0;
0c6670
+					log_debug(knet_h, KNET_SUB_PMTUD,
0c6670
+							"Increasing PMTUd response timeout multiplier to (%u) for host %u link: %u",
0c6670
+							dst_link->pmtud_crypto_timeout_multiplier,
0c6670
+							dst_host->host_id,
0c6670
+							dst_link->link_id);
0c6670
+					pthread_mutex_unlock(&knet_h->pmtud_mutex);
0c6670
+					goto restart;
0c6670
+				}
0c6670
 				if (!warn_once) {
0c6670
 					log_warn(knet_h, KNET_SUB_PMTUD,
0c6670
 							"possible MTU misconfiguration detected. "
0c6670
@@ -323,6 +344,23 @@ retry:
0c6670
 			}
0c6670
 		}
0c6670
 
0c6670
+		if ((knet_h->crypto_instance) && (pmtud_crypto_reduce == 1) &&
0c6670
+		    (dst_link->pmtud_crypto_timeout_multiplier > KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN)) {
0c6670
+			if (!clock_gettime(CLOCK_MONOTONIC, &pmtud_crypto_stop_ts)) {
0c6670
+				timespec_diff(pmtud_crypto_start_ts, pmtud_crypto_stop_ts, &timediff);
0c6670
+				if (((pong_timeout_adj_tmp * 1000) / 2) > timediff) {
0c6670
+					dst_link->pmtud_crypto_timeout_multiplier = dst_link->pmtud_crypto_timeout_multiplier / 2;
0c6670
+					log_debug(knet_h, KNET_SUB_PMTUD,
0c6670
+							"Decreasing PMTUd response timeout multiplier to (%u) for host %u link: %u",
0c6670
+							dst_link->pmtud_crypto_timeout_multiplier,
0c6670
+							dst_host->host_id,
0c6670
+							dst_link->link_id);
0c6670
+				}
0c6670
+			} else {
0c6670
+				log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get current time: %s", strerror(errno));
0c6670
+			}
0c6670
+		}
0c6670
+
0c6670
 		if ((dst_link->last_recv_mtu != onwire_len) || (ret)) {
0c6670
 			dst_link->last_bad_mtu = onwire_len;
0c6670
 		} else {
0c6670
commit 5f3476849523e9ee486481b429b471a1ab3cac20
0c6670
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
Date:   Thu Jul 18 07:50:37 2019 +0200
0c6670
0c6670
    [handle] make sure that the pmtud buf contains at least knet header size
0c6670
    
0c6670
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
0c6670
diff --git a/libknet/handle.c b/libknet/handle.c
0c6670
index 4835e99..1fb9c9b 100644
0c6670
--- a/libknet/handle.c
0c6670
+++ b/libknet/handle.c
0c6670
@@ -234,14 +234,14 @@ static int _init_buffers(knet_handle_t knet_h)
0c6670
 	}
0c6670
 	memset(knet_h->pingbuf, 0, KNET_HEADER_PING_SIZE);
0c6670
 
0c6670
-	knet_h->pmtudbuf = malloc(KNET_PMTUD_SIZE_V6);
0c6670
+	knet_h->pmtudbuf = malloc(KNET_PMTUD_SIZE_V6 + KNET_HEADER_ALL_SIZE);
0c6670
 	if (!knet_h->pmtudbuf) {
0c6670
 		savederrno = errno;
0c6670
 		log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for pmtud buffer: %s",
0c6670
 			strerror(savederrno));
0c6670
 		goto exit_fail;
0c6670
 	}
0c6670
-	memset(knet_h->pmtudbuf, 0, KNET_PMTUD_SIZE_V6);
0c6670
+	memset(knet_h->pmtudbuf, 0, KNET_PMTUD_SIZE_V6 + KNET_HEADER_ALL_SIZE);
0c6670
 
0c6670
 	for (i = 0; i < PCKT_FRAG_MAX; i++) {
0c6670
 		bufsize = ceil((float)KNET_MAX_PACKET_SIZE / (i + 1)) + KNET_HEADER_ALL_SIZE + KNET_DATABUFSIZE_CRYPT_PAD;
0c6670
commit 3b3b6d2a7e1fee7eb41c6bacc1005ff90f7dd5cb
0c6670
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
Date:   Thu Jul 18 10:23:14 2019 +0200
0c6670
0c6670
    [tests] fix knet_bench coverity errors
0c6670
    
0c6670
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
0c6670
diff --git a/libknet/tests/knet_bench.c b/libknet/tests/knet_bench.c
0c6670
index dfe5238..dc04239 100644
0c6670
--- a/libknet/tests/knet_bench.c
0c6670
+++ b/libknet/tests/knet_bench.c
0c6670
@@ -277,22 +277,24 @@ static void setup_knet(int argc, char *argv[])
0c6670
 					printf("Error: -p can only be specified once\n");
0c6670
 					exit(FAIL);
0c6670
 				}
0c6670
-				policystr = optarg;
0c6670
-				if (!strcmp(policystr, "active")) {
0c6670
-					policy = KNET_LINK_POLICY_ACTIVE;
0c6670
-					policyfound = 1;
0c6670
-				}
0c6670
-				/*
0c6670
-				 * we can't use rr because clangs can't compile
0c6670
-				 * an array of 3 strings, one of which is 2 bytes long
0c6670
-				 */
0c6670
-				if (!strcmp(policystr, "round-robin")) {
0c6670
-					policy = KNET_LINK_POLICY_RR;
0c6670
-					policyfound = 1;
0c6670
-				}
0c6670
-				if (!strcmp(policystr, "passive")) {
0c6670
-					policy = KNET_LINK_POLICY_PASSIVE;
0c6670
-					policyfound = 1;
0c6670
+				if (optarg) {
0c6670
+					policystr = optarg;
0c6670
+					if (!strcmp(policystr, "active")) {
0c6670
+						policy = KNET_LINK_POLICY_ACTIVE;
0c6670
+						policyfound = 1;
0c6670
+					}
0c6670
+					/*
0c6670
+					 * we can't use rr because clangs can't compile
0c6670
+					 * an array of 3 strings, one of which is 2 bytes long
0c6670
+					 */
0c6670
+					if (!strcmp(policystr, "round-robin")) {
0c6670
+						policy = KNET_LINK_POLICY_RR;
0c6670
+						policyfound = 1;
0c6670
+					}
0c6670
+					if (!strcmp(policystr, "passive")) {
0c6670
+						policy = KNET_LINK_POLICY_PASSIVE;
0c6670
+						policyfound = 1;
0c6670
+					}
0c6670
 				}
0c6670
 				if (!policyfound) {
0c6670
 					printf("Error: invalid policy %s specified. -p accepts active|passive|rr\n", policystr);
0c6670
@@ -304,14 +306,16 @@ static void setup_knet(int argc, char *argv[])
0c6670
 					printf("Error: -P can only be specified once\n");
0c6670
 					exit(FAIL);
0c6670
 				}
0c6670
-				protostr = optarg;
0c6670
-				if (!strcmp(protostr, "UDP")) {
0c6670
-					protocol = KNET_TRANSPORT_UDP;
0c6670
-					protofound = 1;
0c6670
-				}
0c6670
-				if (!strcmp(protostr, "SCTP")) {
0c6670
-					protocol = KNET_TRANSPORT_SCTP;
0c6670
-					protofound = 1;
0c6670
+				if (optarg) {
0c6670
+					protostr = optarg;
0c6670
+					if (!strcmp(protostr, "UDP")) {
0c6670
+						protocol = KNET_TRANSPORT_UDP;
0c6670
+						protofound = 1;
0c6670
+					}
0c6670
+					if (!strcmp(protostr, "SCTP")) {
0c6670
+						protocol = KNET_TRANSPORT_SCTP;
0c6670
+						protofound = 1;
0c6670
+					}
0c6670
 				}
0c6670
 				if (!protofound) {
0c6670
 					printf("Error: invalid protocol %s specified. -P accepts udp|sctp\n", policystr);
0c6670
@@ -380,17 +384,22 @@ static void setup_knet(int argc, char *argv[])
0c6670
 				}
0c6670
 				break;
0c6670
 			case 'T':
0c6670
-				if (!strcmp("ping", optarg)) {
0c6670
-					test_type = TEST_PING;
0c6670
-				}
0c6670
-				if (!strcmp("ping_data", optarg)) {
0c6670
-					test_type = TEST_PING_AND_DATA;
0c6670
-				}
0c6670
-				if (!strcmp("perf-by-size", optarg)) {
0c6670
-					test_type = TEST_PERF_BY_SIZE;
0c6670
-				}
0c6670
-				if (!strcmp("perf-by-time", optarg)) {
0c6670
-					test_type = TEST_PERF_BY_TIME;
0c6670
+				if (optarg) {
0c6670
+					if (!strcmp("ping", optarg)) {
0c6670
+						test_type = TEST_PING;
0c6670
+					}
0c6670
+					if (!strcmp("ping_data", optarg)) {
0c6670
+						test_type = TEST_PING_AND_DATA;
0c6670
+					}
0c6670
+					if (!strcmp("perf-by-size", optarg)) {
0c6670
+						test_type = TEST_PERF_BY_SIZE;
0c6670
+					}
0c6670
+					if (!strcmp("perf-by-time", optarg)) {
0c6670
+						test_type = TEST_PERF_BY_TIME;
0c6670
+					}
0c6670
+				} else {
0c6670
+					printf("Error: -T requires an option\n");
0c6670
+					exit(FAIL);
0c6670
 				}
0c6670
 				break;
0c6670
 			case 'S':
0c6670
@@ -957,15 +966,14 @@ static void display_stats(int level)
0c6670
 	struct knet_link_stats total_link_stats;
0c6670
 	knet_node_id_t host_list[KNET_MAX_HOST];
0c6670
 	uint8_t link_list[KNET_MAX_LINK];
0c6670
-	int res;
0c6670
 	unsigned int i,j;
0c6670
 	size_t num_hosts, num_links;
0c6670
 
0c6670
-	res = knet_handle_get_stats(knet_h, &handle_stats, sizeof(handle_stats));
0c6670
-	if (res) {
0c6670
+	if (knet_handle_get_stats(knet_h, &handle_stats, sizeof(handle_stats)) < 0) {
0c6670
 		perror("[info]: failed to get knet handle stats");
0c6670
 		return;
0c6670
 	}
0c6670
+
0c6670
 	if (compresscfg || cryptocfg) {
0c6670
 		printf("\n");
0c6670
 		printf("[stat]: handle stats\n");
0c6670
@@ -1005,8 +1013,7 @@ static void display_stats(int level)
0c6670
 
0c6670
 	memset(&total_link_stats, 0, sizeof(struct knet_link_stats));
0c6670
 
0c6670
-	res = knet_host_get_host_list(knet_h, host_list, &num_hosts);
0c6670
-	if (res) {
0c6670
+	if (knet_host_get_host_list(knet_h, host_list, &num_hosts) < 0) {
0c6670
 		perror("[info]: cannot get host list for stats");
0c6670
 		return;
0c6670
 	}
0c6670
@@ -1015,18 +1022,16 @@ static void display_stats(int level)
0c6670
 	qsort(host_list, num_hosts, sizeof(uint16_t), node_compare);
0c6670
 
0c6670
 	for (j=0; j
0c6670
-		res = knet_link_get_link_list(knet_h, host_list[j], link_list, &num_links);
0c6670
-		if (res) {
0c6670
+		if (knet_link_get_link_list(knet_h, host_list[j], link_list, &num_links) < 0) {
0c6670
 			perror("[info]: cannot get link list for stats");
0c6670
 			return;
0c6670
 		}
0c6670
 
0c6670
 		for (i=0; i < num_links; i++) {
0c6670
-			res = knet_link_get_status(knet_h,
0c6670
-						   host_list[j],
0c6670
-						   link_list[i],
0c6670
-						   &link_status,
0c6670
-						   sizeof(link_status));
0c6670
+			if (knet_link_get_status(knet_h, host_list[j], link_list[i], &link_status, sizeof(link_status)) < 0) {
0c6670
+				perror("[info]: cannot get link status");
0c6670
+				return;
0c6670
+			}
0c6670
 
0c6670
 			total_link_stats.tx_data_packets += link_status.stats.tx_data_packets;
0c6670
 			total_link_stats.rx_data_packets += link_status.stats.rx_data_packets;
0c6670
commit d74380a82c00716aafb780f5602182fce90d381f
0c6670
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
Date:   Wed Jul 24 08:38:56 2019 +0200
0c6670
0c6670
    [PMTUd] do not double unlock global read lock
0c6670
    
0c6670
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
0c6670
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
0c6670
index d342697..f884760 100644
0c6670
--- a/libknet/threads_pmtud.c
0c6670
+++ b/libknet/threads_pmtud.c
0c6670
@@ -297,7 +297,11 @@ retry:
0c6670
 			return -1;
0c6670
 		}
0c6670
 
0c6670
-		if (shutdown_in_progress(knet_h)) {
0c6670
+		/*
0c6670
+		 * we cannot use shutdown_in_progress in here because
0c6670
+		 * we already hold the read lock
0c6670
+		 */
0c6670
+		if (knet_h->fini_in_progress) {
0c6670
 			pthread_mutex_unlock(&knet_h->pmtud_mutex);
0c6670
 			log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD aborted. shutdown in progress");
0c6670
 			return -1;
0c6670
commit 01242c683b18b813a67c13d3fc0546fec34f9f7c
0c6670
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
Date:   Mon Sep 9 15:11:25 2019 +0200
0c6670
0c6670
    [pmtud] switch to use async version of dstcache update due to locking context (read vs write)
0c6670
    
0c6670
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
0c6670
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
0c6670
index f884760..d10984f 100644
0c6670
--- a/libknet/threads_pmtud.c
0c6670
+++ b/libknet/threads_pmtud.c
0c6670
@@ -481,7 +481,7 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host,
0c6670
 	}
0c6670
 
0c6670
 	if (saved_valid_pmtud != dst_link->has_valid_mtu) {
0c6670
-		_host_dstcache_update_sync(knet_h, dst_host);
0c6670
+		_host_dstcache_update_async(knet_h, dst_host);
0c6670
 	}
0c6670
 
0c6670
 	return dst_link->has_valid_mtu;
0c6670
commit a70f0adf0d4d38ed614bf2eef1a4e66fec2f2c92
0c6670
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
Date:   Fri Sep 13 07:28:55 2019 +0200
0c6670
0c6670
    [tests] fix ip generation boundaries
0c6670
    
0c6670
    https://ci.kronosnet.org/job/knet-build-all-voting/1450/knet-build-all-voting=rhel80z-s390x/console
0c6670
    
0c6670
    and similar, when pid = 255, the secondary IP would hit 256 that is of course invalid.
0c6670
    
0c6670
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
0c6670
diff --git a/libnozzle/tests/test-common.c b/libnozzle/tests/test-common.c
0c6670
index b36be79..3afd2ec 100644
0c6670
--- a/libnozzle/tests/test-common.c
0c6670
+++ b/libnozzle/tests/test-common.c
0c6670
@@ -124,7 +124,7 @@ void make_local_ips(char *testipv4_1, char *testipv4_2, char *testipv6_1, char *
0c6670
 	pid = (uint8_t *)&mypid;
0c6670
 
0c6670
 	for (i = 0; i < sizeof(pid_t); i++) {
0c6670
-		if (pid[i] == 0) {
0c6670
+		if ((pid[i] == 0) || (pid[i] == 255)) {
0c6670
 			pid[i] = 128;
0c6670
 		}
0c6670
 	}
0c6670
commit 63567e1e6b6ebb91fe1df43b910d6b9bd78d528f
0c6670
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
Date:   Tue Oct 15 11:53:56 2019 +0200
0c6670
0c6670
    [PMTUd] invalidate MTU for a link if the value is lower than minimum
0c6670
    
0c6670
    Under heavy network load and packet loss, calculated MTU can be
0c6670
    too small. In that case we need to invalidate the link mtu,
0c6670
    that would remove the link from the rotation (and traffic) and
0c6670
    would give PMTUd time to get the right MTU in the next round.
0c6670
    
0c6670
    Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
0c6670
0c6670
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
0c6670
index d10984f..ab00b47 100644
0c6670
--- a/libknet/threads_pmtud.c
0c6670
+++ b/libknet/threads_pmtud.c
0c6670
@@ -460,7 +460,14 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host,
0c6670
 		}
0c6670
 		dst_link->has_valid_mtu = 0;
0c6670
 	} else {
0c6670
-		dst_link->has_valid_mtu = 1;
0c6670
+		if (dst_link->status.mtu < calc_min_mtu(knet_h)) {
0c6670
+			log_info(knet_h, KNET_SUB_PMTUD,
0c6670
+				 "Invalid MTU detected for host: %u link: %u mtu: %u",
0c6670
+				 dst_host->host_id, dst_link->link_id, dst_link->status.mtu);
0c6670
+			dst_link->has_valid_mtu = 0;
0c6670
+		} else {
0c6670
+			dst_link->has_valid_mtu = 1;
0c6670
+		}
0c6670
 		if (dst_link->has_valid_mtu) {
0c6670
 			if ((saved_pmtud) && (saved_pmtud != dst_link->status.mtu)) {
0c6670
 				log_info(knet_h, KNET_SUB_PMTUD, "PMTUD link change for host: %u link: %u from %u to %u",