diff --git a/SOURCES/bz1736872-fix-mtu-calculation.patch b/SOURCES/bz1736872-fix-mtu-calculation.patch new file mode 100644 index 0000000..f4c3fb2 --- /dev/null +++ b/SOURCES/bz1736872-fix-mtu-calculation.patch @@ -0,0 +1,1751 @@ +commit b67c63101246b400c7512cb1adbc590ac06cb6ee +Author: Fabio M. Di Nitto +Date: Tue Jul 30 11:18:33 2019 +0200 + + [crypto] fix log information + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/crypto.c b/libknet/crypto.c +index 9f05fba..9d6757b 100644 +--- a/libknet/crypto.c ++++ b/libknet/crypto.c +@@ -151,8 +151,6 @@ int crypto_init( + goto out; + } + +- log_debug(knet_h, KNET_SUB_CRYPTO, "security network overhead: %zu", knet_h->sec_header_size); +- + out: + if (!err) { + knet_h->crypto_instance = new; +@@ -161,6 +159,8 @@ out: + knet_h->sec_hash_size = new->sec_hash_size; + knet_h->sec_salt_size = new->sec_salt_size; + ++ log_debug(knet_h, KNET_SUB_CRYPTO, "security network overhead: %zu", knet_h->sec_header_size); ++ + if (current) { + if (crypto_modules_cmds[current->model].ops->fini != NULL) { + crypto_modules_cmds[current->model].ops->fini(knet_h, current); +commit a89c2cd6d3863abe0f3ae0165239177a7461ee5e +Author: Fabio M. Di Nitto +Date: Wed Jul 31 14:15:07 2019 +0200 + + [udp] log information about detected kernel MTU + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/transport_udp.c b/libknet/transport_udp.c +index 53d2ba0..be990bb 100644 +--- a/libknet/transport_udp.c ++++ b/libknet/transport_udp.c +@@ -337,6 +337,7 @@ static int read_errs_from_sock(knet_handle_t knet_h, int sockfd) + break; + } else { + knet_h->kernel_mtu = sock_err->ee_info; ++ log_debug(knet_h, KNET_SUB_TRANSP_UDP, "detected kernel MTU: %u", knet_h->kernel_mtu); + pthread_mutex_unlock(&knet_h->kmtu_mutex); + } + +commit 650ef6d26e83dd7827b2e913c52a1fac67ea60d4 +Author: Fabio M. Di Nitto +Date: Fri Aug 2 10:43:09 2019 +0200 + + [docs] add knet packet layout + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c +index 603f595..2cd48f9 100644 +--- a/libknet/threads_pmtud.c ++++ b/libknet/threads_pmtud.c +@@ -91,6 +91,28 @@ restart: + failsafe++; + } + ++ /* ++ * unencrypted packet looks like: ++ * ++ * | ip | protocol | knet_header | unencrypted data | ++ * | onwire_len | ++ * | overhead_len | ++ * | data_len | ++ * | app MTU | ++ * ++ * encrypted packet looks like (not to scale): ++ * ++ * | ip | protocol | salt | crypto(knet_header | data) | crypto_data_pad | hash | ++ * | onwire_len | ++ * | overhead_len | ++ * | data_len | ++ * | app MTU | ++ * ++ * knet_h->sec_block_size is >= 0 if encryption will pad the data ++ * knet_h->sec_salt_size is >= 0 if encryption is enabled ++ * knet_h->sec_hash_size is >= 0 if signing is enabled ++ */ ++ + data_len = onwire_len - overhead_len; + + if (knet_h->crypto_instance) { +commit dbed772f0cb9070826eac6524646bd2ea7cce8c0 +Author: Fabio M. Di Nitto +Date: Fri Aug 2 10:44:23 2019 +0200 + + [PMTUd] fix MTU calculation when using crypto and add docs + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c +index 2cd48f9..1a19806 100644 +--- a/libknet/threads_pmtud.c ++++ b/libknet/threads_pmtud.c +@@ -113,29 +113,68 @@ restart: + * knet_h->sec_hash_size is >= 0 if signing is enabled + */ + ++ /* ++ * common to all packets ++ */ + data_len = onwire_len - overhead_len; + + if (knet_h->crypto_instance) { + ++realign: + if (knet_h->sec_block_size) { ++ ++ /* ++ * drop both salt and hash, that leaves only the crypto data and padding ++ * we need to calculate the padding based on the real encrypted data. ++ */ ++ data_len = data_len - (knet_h->sec_salt_size + knet_h->sec_hash_size); ++ ++ /* ++ * if the crypto mechanism requires padding, calculate the padding ++ * and add it back to data_len because that's what the crypto layer ++ * would do. ++ */ + pad_len = knet_h->sec_block_size - (data_len % knet_h->sec_block_size); ++ ++ /* ++ * if are at the boundary, reset padding ++ */ + if (pad_len == knet_h->sec_block_size) { + pad_len = 0; + } + data_len = data_len + pad_len; +- } + +- data_len = data_len + (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size); +- +- if (knet_h->sec_block_size) { ++ /* ++ * if our current data_len is higher than max_mtu_len ++ * then we need to reduce by padding size (that is our ++ * increment / decrement value) ++ * ++ * this generally happens only on the first PMTUd run ++ */ + while (data_len + overhead_len >= max_mtu_len) { + data_len = data_len - knet_h->sec_block_size; + } ++ ++ /* ++ * add both hash and salt size back, similar to padding above, ++ * the crypto layer will add them to the data_len ++ */ ++ data_len = data_len + (knet_h->sec_salt_size + knet_h->sec_hash_size); + } + + if (dst_link->last_bad_mtu) { +- while (data_len + overhead_len >= dst_link->last_bad_mtu) { +- data_len = data_len - (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size); ++ if (data_len + overhead_len >= dst_link->last_bad_mtu) { ++ /* ++ * reduce data_len to something lower than last_bad_mtu, overhead_len ++ * and sec_block_size (decrementing step) - 1 (granularity) ++ */ ++ data_len = dst_link->last_bad_mtu - overhead_len - knet_h->sec_block_size - 1; ++ if (knet_h->sec_block_size) { ++ /* ++ * make sure that data_len is aligned to the sec_block_size boundary ++ */ ++ goto realign; ++ } + } + } + +@@ -144,6 +183,10 @@ restart: + return -1; + } + ++ /* ++ * recalculate onwire_len based on crypto information ++ * and place it in the PMTUd packet info ++ */ + onwire_len = data_len + overhead_len; + knet_h->pmtudbuf->khp_pmtud_size = onwire_len; + +commit a9460c72fafe452b7cb584598aa43a87b44428f0 +Author: Fabio M. Di Nitto +Date: Mon Aug 12 16:52:59 2019 +0200 + + [PMTUd] rework the whole math to calculate MTU + + internal changes: + - drop the concept of sec_header_size that was completely wrong + and unnecessary + - bump crypto API to version 3 due to the above change + - clarify the difference between link->proto_overhead and + link->status->proto_overhead. We cannot rename the status + one as it would also change ABI. + - add onwire.c with documentation on the packet format + and what various len(s) mean in context. + - add 3 new functions to calculate MTUs back and forth + and use them around, hopefully with enough clarification + on why things are done in a given way. + - heavily change thread_pmtud.c to use those new facilities. + - fix major calculation issues when using crypto (non-crypto + was not affected by the problem). + - fix checks around to make sure they match the new math. + - fix padding calculation. + - add functional PMTUd crypto test + this test can take several hours (12+) and should be executed + on a controlled environment since it automatically changes + loopback MTU to run tests. + - fix way the lowest MTU is calculated during a PMTUd run + to avoid spurious double notifications. + - drop redundant checks. + + user visible changes: + - Global MTU is now calculated properly when using crypto + and values will be in general bigger than before due + to incorrect padding calculation in the previous implementation. + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/Makefile.am b/libknet/Makefile.am +index d080732..2fa2416 100644 +--- a/libknet/Makefile.am ++++ b/libknet/Makefile.am +@@ -36,6 +36,7 @@ sources = \ + links_acl_loopback.c \ + logging.c \ + netutils.c \ ++ onwire.c \ + threads_common.c \ + threads_dsthandler.c \ + threads_heartbeat.c \ +diff --git a/libknet/crypto.c b/libknet/crypto.c +index 9d6757b..afa4f88 100644 +--- a/libknet/crypto.c ++++ b/libknet/crypto.c +@@ -154,12 +154,14 @@ int crypto_init( + out: + if (!err) { + knet_h->crypto_instance = new; +- knet_h->sec_header_size = new->sec_header_size; + knet_h->sec_block_size = new->sec_block_size; + knet_h->sec_hash_size = new->sec_hash_size; + knet_h->sec_salt_size = new->sec_salt_size; + +- log_debug(knet_h, KNET_SUB_CRYPTO, "security network overhead: %zu", knet_h->sec_header_size); ++ log_debug(knet_h, KNET_SUB_CRYPTO, "Hash size: %zu salt size: %zu block size: %zu", ++ knet_h->sec_hash_size, ++ knet_h->sec_salt_size, ++ knet_h->sec_block_size); + + if (current) { + if (crypto_modules_cmds[current->model].ops->fini != NULL) { +@@ -195,7 +197,6 @@ void crypto_fini( + crypto_modules_cmds[knet_h->crypto_instance->model].ops->fini(knet_h, knet_h->crypto_instance); + } + free(knet_h->crypto_instance); +- knet_h->sec_header_size = 0; + knet_h->sec_block_size = 0; + knet_h->sec_hash_size = 0; + knet_h->sec_salt_size = 0; +diff --git a/libknet/crypto_model.h b/libknet/crypto_model.h +index 70f6238..b05e49c 100644 +--- a/libknet/crypto_model.h ++++ b/libknet/crypto_model.h +@@ -14,13 +14,12 @@ + struct crypto_instance { + int model; + void *model_instance; +- size_t sec_header_size; + size_t sec_block_size; + size_t sec_hash_size; + size_t sec_salt_size; + }; + +-#define KNET_CRYPTO_MODEL_ABI 2 ++#define KNET_CRYPTO_MODEL_ABI 3 + + /* + * see compress_model.h for explanation of the various lib related functions +diff --git a/libknet/crypto_nss.c b/libknet/crypto_nss.c +index 330b40c..c624a47 100644 +--- a/libknet/crypto_nss.c ++++ b/libknet/crypto_nss.c +@@ -801,10 +801,7 @@ static int nsscrypto_init( + goto out_err; + } + +- crypto_instance->sec_header_size = 0; +- + if (nsscrypto_instance->crypto_hash_type > 0) { +- crypto_instance->sec_header_size += nsshash_len[nsscrypto_instance->crypto_hash_type]; + crypto_instance->sec_hash_size = nsshash_len[nsscrypto_instance->crypto_hash_type]; + } + +@@ -821,8 +818,6 @@ static int nsscrypto_init( + } + } + +- crypto_instance->sec_header_size += (block_size * 2); +- crypto_instance->sec_header_size += SALT_SIZE; + crypto_instance->sec_salt_size = SALT_SIZE; + crypto_instance->sec_block_size = block_size; + } +diff --git a/libknet/crypto_openssl.c b/libknet/crypto_openssl.c +index 0cbc6f5..6571498 100644 +--- a/libknet/crypto_openssl.c ++++ b/libknet/crypto_openssl.c +@@ -566,11 +566,8 @@ static int opensslcrypto_init( + memmove(opensslcrypto_instance->private_key, knet_handle_crypto_cfg->private_key, knet_handle_crypto_cfg->private_key_len); + opensslcrypto_instance->private_key_len = knet_handle_crypto_cfg->private_key_len; + +- crypto_instance->sec_header_size = 0; +- + if (opensslcrypto_instance->crypto_hash_type) { + crypto_instance->sec_hash_size = EVP_MD_size(opensslcrypto_instance->crypto_hash_type); +- crypto_instance->sec_header_size += crypto_instance->sec_hash_size; + } + + if (opensslcrypto_instance->crypto_cipher_type) { +@@ -578,8 +575,6 @@ static int opensslcrypto_init( + + block_size = EVP_CIPHER_block_size(opensslcrypto_instance->crypto_cipher_type); + +- crypto_instance->sec_header_size += (block_size * 2); +- crypto_instance->sec_header_size += SALT_SIZE; + crypto_instance->sec_salt_size = SALT_SIZE; + crypto_instance->sec_block_size = block_size; + } +diff --git a/libknet/internals.h b/libknet/internals.h +index 3f105a1..31840e4 100644 +--- a/libknet/internals.h ++++ b/libknet/internals.h +@@ -71,7 +71,9 @@ struct knet_link { + uint8_t received_pong; + struct timespec ping_last; + /* used by PMTUD thread as temp per-link variables and should always contain the onwire_len value! */ +- uint32_t proto_overhead; ++ uint32_t proto_overhead; /* IP + UDP/SCTP overhead. NOT to be confused ++ with stats.proto_overhead that includes also knet headers ++ and crypto headers */ + struct timespec pmtud_last; + uint32_t last_ping_size; + uint32_t last_good_mtu; +@@ -197,7 +199,6 @@ struct knet_handle { + int pmtud_forcerun; + int pmtud_abort; + struct crypto_instance *crypto_instance; +- size_t sec_header_size; + size_t sec_block_size; + size_t sec_hash_size; + size_t sec_salt_size; +diff --git a/libknet/links.c b/libknet/links.c +index 51ead5a..03e0af9 100644 +--- a/libknet/links.c ++++ b/libknet/links.c +@@ -265,7 +265,32 @@ int knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t l + host->status.reachable = 1; + link->status.mtu = KNET_PMTUD_SIZE_V6; + } else { +- link->status.mtu = KNET_PMTUD_MIN_MTU_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size; ++ /* ++ * calculate the minimum MTU that is safe to use, ++ * based on RFCs and that each network device should ++ * be able to support without any troubles ++ */ ++ if (link->dynamic == KNET_LINK_STATIC) { ++ /* ++ * with static link we can be more precise than using ++ * the generic calc_min_mtu() ++ */ ++ switch (link->dst_addr.ss_family) { ++ case AF_INET6: ++ link->status.mtu = calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V6 - (KNET_PMTUD_OVERHEAD_V6 + link->proto_overhead)); ++ break; ++ case AF_INET: ++ link->status.mtu = calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V4 - (KNET_PMTUD_OVERHEAD_V4 + link->proto_overhead)); ++ break; ++ } ++ } else { ++ /* ++ * for dynamic links we start with the minimum MTU ++ * possible and PMTUd will kick in immediately ++ * after connection status is 1 ++ */ ++ link->status.mtu = calc_min_mtu(knet_h); ++ } + link->has_valid_mtu = 1; + } + +diff --git a/libknet/onwire.c b/libknet/onwire.c +new file mode 100644 +index 0000000..143ac4b +--- /dev/null ++++ b/libknet/onwire.c +@@ -0,0 +1,127 @@ ++/* ++ * Copyright (C) 2019 Red Hat, Inc. All rights reserved. ++ * ++ * Author: Fabio M. Di Nitto ++ * ++ * This software licensed under LGPL-2.0+ ++ */ ++ ++#include "config.h" ++ ++#include ++#include ++#include ++ ++#include "crypto.h" ++#include "internals.h" ++#include "logging.h" ++#include "common.h" ++#include "transport_udp.h" ++#include "transport_sctp.h" ++ ++/* ++ * unencrypted packet looks like: ++ * ++ * | ip | protocol | knet_header | unencrypted data | ++ * | onwire_len | ++ * | proto_overhead | ++ * | data_len | ++ * | app MTU | ++ * ++ * encrypted packet looks like (not to scale): ++ * ++ * | ip | protocol | salt | crypto(knet_header | data) | crypto_data_pad | hash | ++ * | onwire_len | ++ * | proto_overhead | ++ * | data_len | ++ * | app MTU | ++ * ++ * knet_h->sec_block_size is >= 0 if encryption will pad the data ++ * knet_h->sec_salt_size is >= 0 if encryption is enabled ++ * knet_h->sec_hash_size is >= 0 if signing is enabled ++ */ ++ ++/* ++ * this function takes in the data that we would like to send ++ * and tells us the outgoing onwire data size with crypto and ++ * all the headers adjustment. ++ * calling thread needs to account for protocol overhead. ++ */ ++ ++size_t calc_data_outlen(knet_handle_t knet_h, size_t inlen) ++{ ++ size_t outlen = inlen, pad_len = 0; ++ ++ if (knet_h->sec_block_size) { ++ /* ++ * if the crypto mechanism requires padding, calculate the padding ++ * and add it back to outlen because that's what the crypto layer ++ * would do. ++ */ ++ pad_len = knet_h->sec_block_size - (outlen % knet_h->sec_block_size); ++ ++ outlen = outlen + pad_len; ++ } ++ ++ return outlen + knet_h->sec_salt_size + knet_h->sec_hash_size; ++} ++ ++/* ++ * this function takes in the data that we would like to send ++ * and tells us what is the real maximum data we can send ++ * accounting for headers and crypto ++ * calling thread needs to account for protocol overhead. ++ */ ++ ++size_t calc_max_data_outlen(knet_handle_t knet_h, size_t inlen) ++{ ++ size_t outlen = inlen, pad_len = 0; ++ ++ if (knet_h->sec_block_size) { ++ /* ++ * drop both salt and hash, that leaves only the crypto data and padding ++ * we need to calculate the padding based on the real encrypted data ++ * that includes the knet_header. ++ */ ++ outlen = outlen - (knet_h->sec_salt_size + knet_h->sec_hash_size); ++ ++ /* ++ * if the crypto mechanism requires padding, calculate the padding ++ * and remove it, to align the data. ++ * NOTE: we need to remove pad_len + 1 because, based on testing, ++ * if we send data that are already aligned to block_size, the ++ * crypto implementations will add another block_size! ++ * so we want to make sure that our data won't add an unnecessary ++ * block_size that we need to remove later. ++ */ ++ pad_len = outlen % knet_h->sec_block_size; ++ ++ outlen = outlen - (pad_len + 1); ++ ++ /* ++ * add both hash and salt size back, similar to padding above, ++ * the crypto layer will add them to the outlen ++ */ ++ outlen = outlen + (knet_h->sec_salt_size + knet_h->sec_hash_size); ++ } ++ ++ /* ++ * drop KNET_HEADER_ALL_SIZE to provide a clean application MTU ++ * and various crypto headers ++ */ ++ outlen = outlen - (KNET_HEADER_ALL_SIZE + knet_h->sec_salt_size + knet_h->sec_hash_size); ++ ++ return outlen; ++} ++ ++/* ++ * set the lowest possible value as failsafe for all links. ++ * KNET_PMTUD_MIN_MTU_V4 < KNET_PMTUD_MIN_MTU_V6 ++ * KNET_PMTUD_OVERHEAD_V6 > KNET_PMTUD_OVERHEAD_V4 ++ * KNET_PMTUD_SCTP_OVERHEAD > KNET_PMTUD_UDP_OVERHEAD ++ */ ++ ++size_t calc_min_mtu(knet_handle_t knet_h) ++{ ++ return calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V4 - (KNET_PMTUD_OVERHEAD_V6 + KNET_PMTUD_SCTP_OVERHEAD)); ++} +diff --git a/libknet/onwire.h b/libknet/onwire.h +index 9815bc3..74d4d09 100644 +--- a/libknet/onwire.h ++++ b/libknet/onwire.h +@@ -120,7 +120,9 @@ struct knet_header_payload_ping { + #define KNET_PMTUD_SIZE_V4 65535 + #define KNET_PMTUD_SIZE_V6 KNET_PMTUD_SIZE_V4 + +-/* These two get the protocol-specific overheads added to them */ ++/* ++ * IPv4/IPv6 header size ++ */ + #define KNET_PMTUD_OVERHEAD_V4 20 + #define KNET_PMTUD_OVERHEAD_V6 40 + +@@ -199,4 +201,8 @@ struct knet_header { + #define KNET_HEADER_PMTUD_SIZE (KNET_HEADER_SIZE + sizeof(struct knet_header_payload_pmtud)) + #define KNET_HEADER_DATA_SIZE (KNET_HEADER_SIZE + sizeof(struct knet_header_payload_data)) + ++size_t calc_data_outlen(knet_handle_t knet_h, size_t inlen); ++size_t calc_max_data_outlen(knet_handle_t knet_h, size_t inlen); ++size_t calc_min_mtu(knet_handle_t knet_h); ++ + #endif +diff --git a/libknet/tests/Makefile.am b/libknet/tests/Makefile.am +index 3346596..9160780 100644 +--- a/libknet/tests/Makefile.am ++++ b/libknet/tests/Makefile.am +@@ -38,6 +38,12 @@ int_checks = \ + + fun_checks = + ++# checks below need to be executed manually ++# or with a specifi environment ++ ++long_run_checks = \ ++ fun_pmtud_crypto_test ++ + benchmarks = \ + knet_bench_test + +@@ -45,6 +51,7 @@ noinst_PROGRAMS = \ + api_knet_handle_new_limit_test \ + pckt_test \ + $(benchmarks) \ ++ $(long_run_checks) \ + $(check_PROGRAMS) + + noinst_SCRIPTS = \ +@@ -71,6 +78,7 @@ int_links_acl_ip_test_SOURCES = int_links_acl_ip.c \ + ../logging.c \ + ../netutils.c \ + ../threads_common.c \ ++ ../onwire.c \ + ../transports.c \ + ../transport_common.c \ + ../transport_loopback.c \ +@@ -88,4 +96,9 @@ knet_bench_test_SOURCES = knet_bench.c \ + ../logging.c \ + ../compat.c \ + ../transport_common.c \ +- ../threads_common.c ++ ../threads_common.c \ ++ ../onwire.c ++ ++fun_pmtud_crypto_test_SOURCES = fun_pmtud_crypto.c \ ++ test-common.c \ ++ ../onwire.c +diff --git a/libknet/tests/api_knet_send_crypto.c b/libknet/tests/api_knet_send_crypto.c +index 11de857..5fc5463 100644 +--- a/libknet/tests/api_knet_send_crypto.c ++++ b/libknet/tests/api_knet_send_crypto.c +@@ -67,7 +67,7 @@ static void test(const char *model) + memset(&knet_handle_crypto_cfg, 0, sizeof(struct knet_handle_crypto_cfg)); + strncpy(knet_handle_crypto_cfg.crypto_model, model, sizeof(knet_handle_crypto_cfg.crypto_model) - 1); + strncpy(knet_handle_crypto_cfg.crypto_cipher_type, "aes128", sizeof(knet_handle_crypto_cfg.crypto_cipher_type) - 1); +- strncpy(knet_handle_crypto_cfg.crypto_hash_type, "sha1", sizeof(knet_handle_crypto_cfg.crypto_hash_type) - 1); ++ strncpy(knet_handle_crypto_cfg.crypto_hash_type, "sha256", sizeof(knet_handle_crypto_cfg.crypto_hash_type) - 1); + knet_handle_crypto_cfg.private_key_len = 2000; + + if (knet_handle_crypto(knet_h, &knet_handle_crypto_cfg)) { +diff --git a/libknet/tests/fun_pmtud_crypto.c b/libknet/tests/fun_pmtud_crypto.c +new file mode 100644 +index 0000000..91c062c +--- /dev/null ++++ b/libknet/tests/fun_pmtud_crypto.c +@@ -0,0 +1,326 @@ ++/* ++ * Copyright (C) 2019 Red Hat, Inc. All rights reserved. ++ * ++ * Authors: Fabio M. Di Nitto ++ * ++ * This software licensed under GPL-2.0+ ++ */ ++ ++#include "config.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "libknet.h" ++ ++#include "compress.h" ++#include "internals.h" ++#include "netutils.h" ++#include "onwire.h" ++#include "test-common.h" ++ ++static int private_data; ++ ++static void sock_notify(void *pvt_data, ++ int datafd, ++ int8_t channel, ++ uint8_t tx_rx, ++ int error, ++ int errorno) ++{ ++ return; ++} ++ ++static int iface_fd = 0; ++static int default_mtu = 0; ++ ++#ifdef KNET_LINUX ++const char *loopback = "lo"; ++#endif ++#ifdef KNET_BSD ++const char *loopback = "lo0"; ++#endif ++ ++static int fd_init(void) ++{ ++#ifdef KNET_LINUX ++ return socket(AF_INET, SOCK_STREAM, 0); ++#endif ++#ifdef KNET_BSD ++ return socket(AF_LOCAL, SOCK_DGRAM, 0); ++#endif ++ return -1; ++} ++ ++static int set_iface_mtu(uint32_t mtu) ++{ ++ int err = 0; ++ struct ifreq ifr; ++ ++ memset(&ifr, 0, sizeof(struct ifreq)); ++ strncpy(ifr.ifr_name, loopback, IFNAMSIZ - 1); ++ ifr.ifr_mtu = mtu; ++ ++ err = ioctl(iface_fd, SIOCSIFMTU, &ifr); ++ ++ return err; ++} ++ ++static int get_iface_mtu(void) ++{ ++ int err = 0, savederrno = 0; ++ struct ifreq ifr; ++ ++ memset(&ifr, 0, sizeof(struct ifreq)); ++ strncpy(ifr.ifr_name, loopback, IFNAMSIZ - 1); ++ ++ err = ioctl(iface_fd, SIOCGIFMTU, &ifr); ++ if (err) { ++ savederrno = errno; ++ goto out_clean; ++ } ++ ++ err = ifr.ifr_mtu; ++ ++out_clean: ++ errno = savederrno; ++ return err; ++} ++ ++static int exit_local(int code) ++{ ++ set_iface_mtu(default_mtu); ++ close(iface_fd); ++ iface_fd = 0; ++ exit(code); ++} ++ ++static void test_mtu(const char *model, const char *crypto, const char *hash) ++{ ++ knet_handle_t knet_h; ++ int logfds[2]; ++ int datafd = 0; ++ int8_t channel = 0; ++ struct sockaddr_storage lo; ++ struct knet_handle_crypto_cfg knet_handle_crypto_cfg; ++ unsigned int data_mtu, expected_mtu; ++ size_t calculated_iface_mtu = 0, detected_iface_mtu = 0; ++ ++ if (make_local_sockaddr(&lo, 0) < 0) { ++ printf("Unable to convert loopback to sockaddr: %s\n", strerror(errno)); ++ exit_local(FAIL); ++ } ++ ++ setup_logpipes(logfds); ++ ++ knet_h = knet_handle_start(logfds, KNET_LOG_DEBUG); ++ ++ flush_logs(logfds[0], stdout); ++ ++ printf("Test knet_send with %s and valid data\n", model); ++ ++ memset(&knet_handle_crypto_cfg, 0, sizeof(struct knet_handle_crypto_cfg)); ++ strncpy(knet_handle_crypto_cfg.crypto_model, model, sizeof(knet_handle_crypto_cfg.crypto_model) - 1); ++ strncpy(knet_handle_crypto_cfg.crypto_cipher_type, crypto, sizeof(knet_handle_crypto_cfg.crypto_cipher_type) - 1); ++ strncpy(knet_handle_crypto_cfg.crypto_hash_type, hash, sizeof(knet_handle_crypto_cfg.crypto_hash_type) - 1); ++ knet_handle_crypto_cfg.private_key_len = 2000; ++ ++ if (knet_handle_crypto(knet_h, &knet_handle_crypto_cfg)) { ++ printf("knet_handle_crypto failed with correct config: %s\n", strerror(errno)); ++ knet_handle_free(knet_h); ++ flush_logs(logfds[0], stdout); ++ close_logpipes(logfds); ++ exit_local(FAIL); ++ } ++ ++ if (knet_handle_enable_sock_notify(knet_h, &private_data, sock_notify) < 0) { ++ printf("knet_handle_enable_sock_notify failed: %s\n", strerror(errno)); ++ knet_handle_free(knet_h); ++ flush_logs(logfds[0], stdout); ++ close_logpipes(logfds); ++ exit_local(FAIL); ++ } ++ ++ datafd = 0; ++ channel = -1; ++ ++ if (knet_handle_add_datafd(knet_h, &datafd, &channel) < 0) { ++ printf("knet_handle_add_datafd failed: %s\n", strerror(errno)); ++ knet_handle_free(knet_h); ++ flush_logs(logfds[0], stdout); ++ close_logpipes(logfds); ++ exit_local(FAIL); ++ } ++ ++ if (knet_host_add(knet_h, 1) < 0) { ++ printf("knet_host_add failed: %s\n", strerror(errno)); ++ knet_handle_free(knet_h); ++ flush_logs(logfds[0], stdout); ++ close_logpipes(logfds); ++ exit_local(FAIL); ++ } ++ ++ if (knet_link_set_config(knet_h, 1, 0, KNET_TRANSPORT_UDP, &lo, &lo, 0) < 0) { ++ printf("Unable to configure link: %s\n", strerror(errno)); ++ knet_host_remove(knet_h, 1); ++ knet_handle_free(knet_h); ++ flush_logs(logfds[0], stdout); ++ close_logpipes(logfds); ++ exit_local(FAIL); ++ } ++ ++ if (knet_link_set_pong_count(knet_h, 1, 0, 1) < 0) { ++ printf("knet_link_set_pong_count failed: %s\n", strerror(errno)); ++ knet_host_remove(knet_h, 1); ++ knet_handle_free(knet_h); ++ flush_logs(logfds[0], stdout); ++ close_logpipes(logfds); ++ exit_local(FAIL); ++ } ++ ++ if (knet_link_set_enable(knet_h, 1, 0, 1) < 0) { ++ printf("knet_link_set_enable failed: %s\n", strerror(errno)); ++ knet_link_clear_config(knet_h, 1, 0); ++ knet_host_remove(knet_h, 1); ++ knet_handle_free(knet_h); ++ flush_logs(logfds[0], stdout); ++ close_logpipes(logfds); ++ exit_local(FAIL); ++ } ++ ++ if (wait_for_host(knet_h, 1, 4, logfds[0], stdout) < 0) { ++ printf("timeout waiting for host to be reachable"); ++ knet_link_set_enable(knet_h, 1, 0, 0); ++ knet_link_clear_config(knet_h, 1, 0); ++ knet_host_remove(knet_h, 1); ++ knet_handle_free(knet_h); ++ flush_logs(logfds[0], stdout); ++ close_logpipes(logfds); ++ exit_local(FAIL); ++ } ++ ++ flush_logs(logfds[0], stdout); ++ ++ if (knet_handle_pmtud_get(knet_h, &data_mtu) < 0) { ++ printf("knet_handle_pmtud_get failed error: %s\n", strerror(errno)); ++ knet_link_set_enable(knet_h, 1, 0, 0); ++ knet_link_clear_config(knet_h, 1, 0); ++ knet_host_remove(knet_h, 1); ++ knet_handle_free(knet_h); ++ flush_logs(logfds[0], stdout); ++ close_logpipes(logfds); ++ exit_local(FAIL); ++ } ++ ++ calculated_iface_mtu = calc_data_outlen(knet_h, data_mtu + KNET_HEADER_ALL_SIZE) + 28; ++ detected_iface_mtu = get_iface_mtu(); ++ /* ++ * 28 = 20 IP header + 8 UDP header ++ */ ++ expected_mtu = calc_max_data_outlen(knet_h, detected_iface_mtu - 28); ++ ++ if (expected_mtu != data_mtu) { ++ printf("Wrong MTU detected! interface mtu: %zu knet mtu: %u expected mtu: %u\n", detected_iface_mtu, data_mtu, expected_mtu); ++ knet_link_set_enable(knet_h, 1, 0, 0); ++ knet_link_clear_config(knet_h, 1, 0); ++ knet_host_remove(knet_h, 1); ++ knet_handle_free(knet_h); ++ flush_logs(logfds[0], stdout); ++ close_logpipes(logfds); ++ exit_local(FAIL); ++ } ++ ++ if ((detected_iface_mtu - calculated_iface_mtu) >= knet_h->sec_block_size) { ++ printf("Wrong MTU detected! real iface mtu: %zu calculated: %zu\n", detected_iface_mtu, calculated_iface_mtu); ++ knet_link_set_enable(knet_h, 1, 0, 0); ++ knet_link_clear_config(knet_h, 1, 0); ++ knet_host_remove(knet_h, 1); ++ knet_handle_free(knet_h); ++ flush_logs(logfds[0], stdout); ++ close_logpipes(logfds); ++ exit_local(FAIL); ++ } ++ ++ knet_link_set_enable(knet_h, 1, 0, 0); ++ knet_link_clear_config(knet_h, 1, 0); ++ knet_host_remove(knet_h, 1); ++ knet_handle_free(knet_h); ++ flush_logs(logfds[0], stdout); ++ close_logpipes(logfds); ++} ++ ++static void test(const char *model, const char *crypto, const char *hash) ++{ ++ int i = 576; ++ int max = 65535; ++ ++ while (i <= max) { ++ printf("Setting interface MTU to: %i\n", i); ++ set_iface_mtu(i); ++ test_mtu(model, crypto, hash); ++ if (i == max) { ++ break; ++ } ++ i = i + 15; ++ if (i > max) { ++ i = max; ++ } ++ } ++} ++ ++int main(int argc, char *argv[]) ++{ ++ struct knet_crypto_info crypto_list[16]; ++ size_t crypto_list_entries; ++ ++#ifdef KNET_BSD ++ if (is_memcheck() || is_helgrind()) { ++ printf("valgrind-freebsd cannot run this test properly. Skipping\n"); ++ return SKIP; ++ } ++#endif ++ ++ if (geteuid() != 0) { ++ printf("This test requires root privileges\n"); ++ return SKIP; ++ } ++ ++ iface_fd = fd_init(); ++ if (iface_fd < 0) { ++ printf("fd_init failed: %s\n", strerror(errno)); ++ return FAIL; ++ } ++ ++ default_mtu = get_iface_mtu(); ++ if (default_mtu < 0) { ++ printf("get_iface_mtu failed: %s\n", strerror(errno)); ++ return FAIL; ++ } ++ ++ memset(crypto_list, 0, sizeof(crypto_list)); ++ ++ if (knet_get_crypto_list(crypto_list, &crypto_list_entries) < 0) { ++ printf("knet_get_crypto_list failed: %s\n", strerror(errno)); ++ return FAIL; ++ } ++ ++ if (crypto_list_entries == 0) { ++ printf("no crypto modules detected. Skipping\n"); ++ return SKIP; ++ } ++ ++ test(crypto_list[0].name, "aes128", "sha1"); ++ test(crypto_list[0].name, "aes128", "sha256"); ++ test(crypto_list[0].name, "aes256", "sha1"); ++ test(crypto_list[0].name, "aes256", "sha256"); ++ ++ exit_local(PASS); ++} +diff --git a/libknet/threads_common.c b/libknet/threads_common.c +index 1f3e1e3..03edfc4 100644 +--- a/libknet/threads_common.c ++++ b/libknet/threads_common.c +@@ -161,7 +161,7 @@ void force_pmtud_run(knet_handle_t knet_h, uint8_t subsystem, uint8_t reset_mtu) + { + if (reset_mtu) { + log_debug(knet_h, subsystem, "PMTUd has been reset to default"); +- knet_h->data_mtu = KNET_PMTUD_MIN_MTU_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size; ++ knet_h->data_mtu = calc_min_mtu(knet_h); + if (knet_h->pmtud_notify_fn) { + knet_h->pmtud_notify_fn(knet_h->pmtud_notify_fn_private_data, + knet_h->data_mtu); +diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c +index 1a19806..1dd1788 100644 +--- a/libknet/threads_pmtud.c ++++ b/libknet/threads_pmtud.c +@@ -25,16 +25,16 @@ + static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link) + { + int err, ret, savederrno, mutex_retry_limit, failsafe, use_kernel_mtu, warn_once; +- uint32_t kernel_mtu; /* record kernel_mtu from EMSGSIZE */ +- size_t onwire_len; /* current packet onwire size */ +- size_t overhead_len; /* onwire packet overhead (protocol based) */ +- size_t max_mtu_len; /* max mtu for protocol */ +- size_t data_len; /* how much data we can send in the packet +- * generally would be onwire_len - overhead_len +- * needs to be adjusted for crypto +- */ +- size_t pad_len; /* crypto packet pad size, needs to move into crypto.c callbacks */ +- ssize_t len; /* len of what we were able to sendto onwire */ ++ uint32_t kernel_mtu; /* record kernel_mtu from EMSGSIZE */ ++ size_t onwire_len; /* current packet onwire size */ ++ size_t ipproto_overhead_len; /* onwire packet overhead (protocol based) */ ++ size_t max_mtu_len; /* max mtu for protocol */ ++ size_t data_len; /* how much data we can send in the packet ++ * generally would be onwire_len - ipproto_overhead_len ++ * needs to be adjusted for crypto ++ */ ++ size_t app_mtu_len; /* real data that we can send onwire */ ++ ssize_t len; /* len of what we were able to sendto onwire */ + + struct timespec ts; + unsigned long long pong_timeout_adj_tmp; +@@ -45,20 +45,16 @@ static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_ + mutex_retry_limit = 0; + failsafe = 0; + +- dst_link->last_bad_mtu = 0; +- + knet_h->pmtudbuf->khp_pmtud_link = dst_link->link_id; + + switch (dst_link->dst_addr.ss_family) { + case AF_INET6: + max_mtu_len = KNET_PMTUD_SIZE_V6; +- overhead_len = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead; +- dst_link->last_good_mtu = dst_link->last_ping_size + overhead_len; ++ ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead; + break; + case AF_INET: + max_mtu_len = KNET_PMTUD_SIZE_V4; +- overhead_len = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead; +- dst_link->last_good_mtu = dst_link->last_ping_size + overhead_len; ++ ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead; + break; + default: + log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD aborted, unknown protocol"); +@@ -66,6 +62,9 @@ static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_ + break; + } + ++ dst_link->last_bad_mtu = 0; ++ dst_link->last_good_mtu = dst_link->last_ping_size + ipproto_overhead_len; ++ + /* + * discovery starts from the top because kernel will + * refuse to send packets > current iface mtu. +@@ -92,107 +91,39 @@ restart: + } + + /* +- * unencrypted packet looks like: +- * +- * | ip | protocol | knet_header | unencrypted data | +- * | onwire_len | +- * | overhead_len | +- * | data_len | +- * | app MTU | +- * +- * encrypted packet looks like (not to scale): +- * +- * | ip | protocol | salt | crypto(knet_header | data) | crypto_data_pad | hash | +- * | onwire_len | +- * | overhead_len | +- * | data_len | +- * | app MTU | +- * +- * knet_h->sec_block_size is >= 0 if encryption will pad the data +- * knet_h->sec_salt_size is >= 0 if encryption is enabled +- * knet_h->sec_hash_size is >= 0 if signing is enabled ++ * common to all packets + */ + + /* +- * common to all packets ++ * calculate the application MTU based on current onwire_len minus ipproto_overhead_len + */ +- data_len = onwire_len - overhead_len; +- +- if (knet_h->crypto_instance) { + +-realign: +- if (knet_h->sec_block_size) { ++ app_mtu_len = calc_max_data_outlen(knet_h, onwire_len - ipproto_overhead_len); + +- /* +- * drop both salt and hash, that leaves only the crypto data and padding +- * we need to calculate the padding based on the real encrypted data. +- */ +- data_len = data_len - (knet_h->sec_salt_size + knet_h->sec_hash_size); +- +- /* +- * if the crypto mechanism requires padding, calculate the padding +- * and add it back to data_len because that's what the crypto layer +- * would do. +- */ +- pad_len = knet_h->sec_block_size - (data_len % knet_h->sec_block_size); +- +- /* +- * if are at the boundary, reset padding +- */ +- if (pad_len == knet_h->sec_block_size) { +- pad_len = 0; +- } +- data_len = data_len + pad_len; +- +- /* +- * if our current data_len is higher than max_mtu_len +- * then we need to reduce by padding size (that is our +- * increment / decrement value) +- * +- * this generally happens only on the first PMTUd run +- */ +- while (data_len + overhead_len >= max_mtu_len) { +- data_len = data_len - knet_h->sec_block_size; +- } ++ /* ++ * recalculate onwire len back that might be different based ++ * on data padding from crypto layer. ++ */ + +- /* +- * add both hash and salt size back, similar to padding above, +- * the crypto layer will add them to the data_len +- */ +- data_len = data_len + (knet_h->sec_salt_size + knet_h->sec_hash_size); +- } ++ onwire_len = calc_data_outlen(knet_h, app_mtu_len + KNET_HEADER_ALL_SIZE) + ipproto_overhead_len; + +- if (dst_link->last_bad_mtu) { +- if (data_len + overhead_len >= dst_link->last_bad_mtu) { +- /* +- * reduce data_len to something lower than last_bad_mtu, overhead_len +- * and sec_block_size (decrementing step) - 1 (granularity) +- */ +- data_len = dst_link->last_bad_mtu - overhead_len - knet_h->sec_block_size - 1; +- if (knet_h->sec_block_size) { +- /* +- * make sure that data_len is aligned to the sec_block_size boundary +- */ +- goto realign; +- } +- } +- } ++ /* ++ * calculate the size of what we need to send to sendto(2). ++ * see also onwire.c for packet format explanation. ++ */ ++ data_len = app_mtu_len + knet_h->sec_hash_size + knet_h->sec_salt_size + KNET_HEADER_ALL_SIZE; + +- if (data_len < (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size) + 1) { ++ if (knet_h->crypto_instance) { ++ if (data_len < (knet_h->sec_hash_size + knet_h->sec_salt_size) + 1) { + log_debug(knet_h, KNET_SUB_PMTUD, "Aborting PMTUD process: link mtu smaller than crypto header detected (link might have been disconnected)"); + return -1; + } + +- /* +- * recalculate onwire_len based on crypto information +- * and place it in the PMTUd packet info +- */ +- onwire_len = data_len + overhead_len; + knet_h->pmtudbuf->khp_pmtud_size = onwire_len; + + if (crypto_encrypt_and_sign(knet_h, + (const unsigned char *)knet_h->pmtudbuf, +- data_len - (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size), ++ data_len - (knet_h->sec_hash_size + knet_h->sec_salt_size), + knet_h->pmtudbuf_crypt, + (ssize_t *)&data_len) < 0) { + log_debug(knet_h, KNET_SUB_PMTUD, "Unable to crypto pmtud packet"); +@@ -201,11 +132,8 @@ realign: + + outbuf = knet_h->pmtudbuf_crypt; + knet_h->stats_extra.tx_crypt_pmtu_packets++; +- + } else { +- + knet_h->pmtudbuf->khp_pmtud_size = onwire_len; +- + } + + /* link has gone down, aborting pmtud */ +@@ -417,7 +345,7 @@ retry: + /* + * account for IP overhead, knet headers and crypto in PMTU calculation + */ +- dst_link->status.mtu = onwire_len - dst_link->status.proto_overhead; ++ dst_link->status.mtu = calc_max_data_outlen(knet_h, onwire_len - ipproto_overhead_len); + pthread_mutex_unlock(&knet_h->pmtud_mutex); + return 0; + } +@@ -437,7 +365,7 @@ retry: + goto restart; + } + +-static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link, unsigned int *min_mtu, int force_run) ++static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link, int force_run) + { + uint8_t saved_valid_pmtud; + unsigned int saved_pmtud; +@@ -455,17 +383,22 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, + timespec_diff(dst_link->pmtud_last, clock_now, &diff_pmtud); + + if (diff_pmtud < interval) { +- *min_mtu = dst_link->status.mtu; + return dst_link->has_valid_mtu; + } + } + ++ /* ++ * status.proto_overhead should include all IP/(UDP|SCTP)/knet headers ++ * ++ * please note that it is not the same as link->proto_overhead that ++ * includes only either UDP or SCTP (at the moment) overhead. ++ */ + switch (dst_link->dst_addr.ss_family) { + case AF_INET6: +- dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_header_size; ++ dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_hash_size + knet_h->sec_salt_size; + break; + case AF_INET: +- dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_header_size; ++ dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_hash_size + knet_h->sec_salt_size; + break; + } + +@@ -486,26 +419,6 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, + dst_link->has_valid_mtu = 0; + } else { + dst_link->has_valid_mtu = 1; +- switch (dst_link->dst_addr.ss_family) { +- case AF_INET6: +- if (((dst_link->status.mtu + dst_link->status.proto_overhead) < KNET_PMTUD_MIN_MTU_V6) || +- ((dst_link->status.mtu + dst_link->status.proto_overhead) > KNET_PMTUD_SIZE_V6)) { +- log_debug(knet_h, KNET_SUB_PMTUD, +- "PMTUD detected an IPv6 MTU out of bound value (%u) for host: %u link: %u.", +- dst_link->status.mtu + dst_link->status.proto_overhead, dst_host->host_id, dst_link->link_id); +- dst_link->has_valid_mtu = 0; +- } +- break; +- case AF_INET: +- if (((dst_link->status.mtu + dst_link->status.proto_overhead) < KNET_PMTUD_MIN_MTU_V4) || +- ((dst_link->status.mtu + dst_link->status.proto_overhead) > KNET_PMTUD_SIZE_V4)) { +- log_debug(knet_h, KNET_SUB_PMTUD, +- "PMTUD detected an IPv4 MTU out of bound value (%u) for host: %u link: %u.", +- dst_link->status.mtu + dst_link->status.proto_overhead, dst_host->host_id, dst_link->link_id); +- dst_link->has_valid_mtu = 0; +- } +- break; +- } + if (dst_link->has_valid_mtu) { + if ((saved_pmtud) && (saved_pmtud != dst_link->status.mtu)) { + log_info(knet_h, KNET_SUB_PMTUD, "PMTUD link change for host: %u link: %u from %u to %u", +@@ -513,9 +426,6 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, + } + log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD completed for host: %u link: %u current link mtu: %u", + dst_host->host_id, dst_link->link_id, dst_link->status.mtu); +- if (dst_link->status.mtu < *min_mtu) { +- *min_mtu = dst_link->status.mtu; +- } + + /* + * set pmtud_last, if we can, after we are done with the PMTUd process +@@ -541,14 +451,14 @@ void *_handle_pmtud_link_thread(void *data) + struct knet_host *dst_host; + struct knet_link *dst_link; + int link_idx; +- unsigned int min_mtu, have_mtu; ++ unsigned int have_mtu; + unsigned int lower_mtu; + int link_has_mtu; + int force_run = 0; + + set_thread_status(knet_h, KNET_THREAD_PMTUD, KNET_THREAD_STARTED); + +- knet_h->data_mtu = KNET_PMTUD_MIN_MTU_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size; ++ knet_h->data_mtu = calc_min_mtu(knet_h); + + /* preparing pmtu buffer */ + knet_h->pmtudbuf->kh_version = KNET_HEADER_VERSION; +@@ -578,7 +488,6 @@ void *_handle_pmtud_link_thread(void *data) + } + + lower_mtu = KNET_PMTUD_SIZE_V4; +- min_mtu = KNET_PMTUD_SIZE_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size; + have_mtu = 0; + + for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) { +@@ -593,14 +502,14 @@ void *_handle_pmtud_link_thread(void *data) + (dst_link->status.dynconnected != 1))) + continue; + +- link_has_mtu = _handle_check_pmtud(knet_h, dst_host, dst_link, &min_mtu, force_run); ++ link_has_mtu = _handle_check_pmtud(knet_h, dst_host, dst_link, force_run); + if (errno == EDEADLK) { + goto out_unlock; + } + if (link_has_mtu) { + have_mtu = 1; +- if (min_mtu < lower_mtu) { +- lower_mtu = min_mtu; ++ if (dst_link->status.mtu < lower_mtu) { ++ lower_mtu = dst_link->status.mtu; + } + } + } +commit 499f589404db791d8e68c84c8ba3a857aeea5083 +Author: Fabio M. Di Nitto +Date: Tue Aug 13 06:41:32 2019 +0200 + + [PMTUd] add dynamic pong timeout when using crypto + + problem originally reported by proxmox community, users + observed that under pressure the MTU would flap back and forth + between 2 values due to other node response timeout. + + implement a dynamic timeout multiplier when using crypto that + should solve the problem in a more flexible fashion. + + When a timeout hits, those new logs will show: + + [knet]: [info] host: host: 1 (passive) best link: 0 (pri: 0) + [knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0 + [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (4) for host 1 link: 0 + [knet]: [info] pmtud: PMTUD link change for host: 1 link: 0 from 469 to 65429 + [knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429 + [knet]: [info] pmtud: Global data MTU changed to: 65429 + [knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0 + [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (8) for host 1 link: 0 + [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (16) for host 1 link: 0 + [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (32) for host 1 link: 0 + [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (64) for host 1 link: 0 + [knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429 + [knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0 + [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (128) for host 1 link: 0 + [knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429 + + and when the latency reduces and it is safe to be more responsive again: + + [knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0 + [knet]: [debug] pmtud: Decreasing PMTUd response timeout multiplier to (64) for host 1 link: 0 + [knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429 + + .... + + testing this patch on normal hosts is a bit challenging tho. + + Patch was tested by hardcoding a super low timeout. + and using a long running version of api_knet_send_crypto_test with a short PMTUd setfreq (10 sec). + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/internals.h b/libknet/internals.h +index 31840e4..d1a4757 100644 +--- a/libknet/internals.h ++++ b/libknet/internals.h +@@ -80,6 +80,7 @@ struct knet_link { + uint32_t last_bad_mtu; + uint32_t last_sent_mtu; + uint32_t last_recv_mtu; ++ uint32_t pmtud_crypto_timeout_multiplier;/* used by PMTUd to adjust timeouts on high loads */ + uint8_t has_valid_mtu; + }; + +diff --git a/libknet/links.c b/libknet/links.c +index 03e0af9..f7eccc3 100644 +--- a/libknet/links.c ++++ b/libknet/links.c +@@ -219,6 +219,7 @@ int knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t l + } + } + ++ link->pmtud_crypto_timeout_multiplier = KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN; + link->pong_count = KNET_LINK_DEFAULT_PONG_COUNT; + link->has_valid_mtu = 0; + link->ping_interval = KNET_LINK_DEFAULT_PING_INTERVAL * 1000; /* microseconds */ +diff --git a/libknet/links.h b/libknet/links.h +index e14958d..c8ca610 100644 +--- a/libknet/links.h ++++ b/libknet/links.h +@@ -30,6 +30,16 @@ + */ + #define KNET_LINK_PONG_TIMEOUT_LAT_MUL 2 + ++/* ++ * under heavy load with crypto enabled, it takes much ++ * longer time to receive a response from the other node. ++ * ++ * 128 is somewhat arbitrary number but we want to set a limit ++ * and report failures after that. ++ */ ++#define KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN 2 ++#define KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MAX 128 ++ + int _link_updown(knet_handle_t knet_h, knet_node_id_t node_id, uint8_t link_id, + unsigned int enabled, unsigned int connected); + +diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c +index 1dd1788..d342697 100644 +--- a/libknet/threads_pmtud.c ++++ b/libknet/threads_pmtud.c +@@ -36,8 +36,9 @@ static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_ + size_t app_mtu_len; /* real data that we can send onwire */ + ssize_t len; /* len of what we were able to sendto onwire */ + +- struct timespec ts; +- unsigned long long pong_timeout_adj_tmp; ++ struct timespec ts, pmtud_crypto_start_ts, pmtud_crypto_stop_ts; ++ unsigned long long pong_timeout_adj_tmp, timediff; ++ int pmtud_crypto_reduce = 1; + unsigned char *outbuf = (unsigned char *)knet_h->pmtudbuf; + + warn_once = 0; +@@ -242,6 +243,15 @@ retry: + return -1; + } + ++ /* ++ * non fatal, we can wait the next round to reduce the ++ * multiplier ++ */ ++ if (clock_gettime(CLOCK_MONOTONIC, &pmtud_crypto_start_ts) < 0) { ++ log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get current time: %s", strerror(errno)); ++ pmtud_crypto_reduce = 0; ++ } ++ + /* + * set PMTUd reply timeout to match pong_timeout on a given link + * +@@ -261,7 +271,7 @@ retry: + /* + * crypto, under pressure, is a royal PITA + */ +- pong_timeout_adj_tmp = dst_link->pong_timeout_adj * 2; ++ pong_timeout_adj_tmp = dst_link->pong_timeout_adj * dst_link->pmtud_crypto_timeout_multiplier; + } else { + pong_timeout_adj_tmp = dst_link->pong_timeout_adj; + } +@@ -295,6 +305,17 @@ retry: + + if (ret) { + if (ret == ETIMEDOUT) { ++ if ((knet_h->crypto_instance) && (dst_link->pmtud_crypto_timeout_multiplier < KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MAX)) { ++ dst_link->pmtud_crypto_timeout_multiplier = dst_link->pmtud_crypto_timeout_multiplier * 2; ++ pmtud_crypto_reduce = 0; ++ log_debug(knet_h, KNET_SUB_PMTUD, ++ "Increasing PMTUd response timeout multiplier to (%u) for host %u link: %u", ++ dst_link->pmtud_crypto_timeout_multiplier, ++ dst_host->host_id, ++ dst_link->link_id); ++ pthread_mutex_unlock(&knet_h->pmtud_mutex); ++ goto restart; ++ } + if (!warn_once) { + log_warn(knet_h, KNET_SUB_PMTUD, + "possible MTU misconfiguration detected. " +@@ -323,6 +344,23 @@ retry: + } + } + ++ if ((knet_h->crypto_instance) && (pmtud_crypto_reduce == 1) && ++ (dst_link->pmtud_crypto_timeout_multiplier > KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN)) { ++ if (!clock_gettime(CLOCK_MONOTONIC, &pmtud_crypto_stop_ts)) { ++ timespec_diff(pmtud_crypto_start_ts, pmtud_crypto_stop_ts, &timediff); ++ if (((pong_timeout_adj_tmp * 1000) / 2) > timediff) { ++ dst_link->pmtud_crypto_timeout_multiplier = dst_link->pmtud_crypto_timeout_multiplier / 2; ++ log_debug(knet_h, KNET_SUB_PMTUD, ++ "Decreasing PMTUd response timeout multiplier to (%u) for host %u link: %u", ++ dst_link->pmtud_crypto_timeout_multiplier, ++ dst_host->host_id, ++ dst_link->link_id); ++ } ++ } else { ++ log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get current time: %s", strerror(errno)); ++ } ++ } ++ + if ((dst_link->last_recv_mtu != onwire_len) || (ret)) { + dst_link->last_bad_mtu = onwire_len; + } else { +commit 5f3476849523e9ee486481b429b471a1ab3cac20 +Author: Fabio M. Di Nitto +Date: Thu Jul 18 07:50:37 2019 +0200 + + [handle] make sure that the pmtud buf contains at least knet header size + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/handle.c b/libknet/handle.c +index 4835e99..1fb9c9b 100644 +--- a/libknet/handle.c ++++ b/libknet/handle.c +@@ -234,14 +234,14 @@ static int _init_buffers(knet_handle_t knet_h) + } + memset(knet_h->pingbuf, 0, KNET_HEADER_PING_SIZE); + +- knet_h->pmtudbuf = malloc(KNET_PMTUD_SIZE_V6); ++ knet_h->pmtudbuf = malloc(KNET_PMTUD_SIZE_V6 + KNET_HEADER_ALL_SIZE); + if (!knet_h->pmtudbuf) { + savederrno = errno; + log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for pmtud buffer: %s", + strerror(savederrno)); + goto exit_fail; + } +- memset(knet_h->pmtudbuf, 0, KNET_PMTUD_SIZE_V6); ++ memset(knet_h->pmtudbuf, 0, KNET_PMTUD_SIZE_V6 + KNET_HEADER_ALL_SIZE); + + for (i = 0; i < PCKT_FRAG_MAX; i++) { + bufsize = ceil((float)KNET_MAX_PACKET_SIZE / (i + 1)) + KNET_HEADER_ALL_SIZE + KNET_DATABUFSIZE_CRYPT_PAD; +commit 3b3b6d2a7e1fee7eb41c6bacc1005ff90f7dd5cb +Author: Fabio M. Di Nitto +Date: Thu Jul 18 10:23:14 2019 +0200 + + [tests] fix knet_bench coverity errors + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/tests/knet_bench.c b/libknet/tests/knet_bench.c +index dfe5238..dc04239 100644 +--- a/libknet/tests/knet_bench.c ++++ b/libknet/tests/knet_bench.c +@@ -277,22 +277,24 @@ static void setup_knet(int argc, char *argv[]) + printf("Error: -p can only be specified once\n"); + exit(FAIL); + } +- policystr = optarg; +- if (!strcmp(policystr, "active")) { +- policy = KNET_LINK_POLICY_ACTIVE; +- policyfound = 1; +- } +- /* +- * we can't use rr because clangs can't compile +- * an array of 3 strings, one of which is 2 bytes long +- */ +- if (!strcmp(policystr, "round-robin")) { +- policy = KNET_LINK_POLICY_RR; +- policyfound = 1; +- } +- if (!strcmp(policystr, "passive")) { +- policy = KNET_LINK_POLICY_PASSIVE; +- policyfound = 1; ++ if (optarg) { ++ policystr = optarg; ++ if (!strcmp(policystr, "active")) { ++ policy = KNET_LINK_POLICY_ACTIVE; ++ policyfound = 1; ++ } ++ /* ++ * we can't use rr because clangs can't compile ++ * an array of 3 strings, one of which is 2 bytes long ++ */ ++ if (!strcmp(policystr, "round-robin")) { ++ policy = KNET_LINK_POLICY_RR; ++ policyfound = 1; ++ } ++ if (!strcmp(policystr, "passive")) { ++ policy = KNET_LINK_POLICY_PASSIVE; ++ policyfound = 1; ++ } + } + if (!policyfound) { + printf("Error: invalid policy %s specified. -p accepts active|passive|rr\n", policystr); +@@ -304,14 +306,16 @@ static void setup_knet(int argc, char *argv[]) + printf("Error: -P can only be specified once\n"); + exit(FAIL); + } +- protostr = optarg; +- if (!strcmp(protostr, "UDP")) { +- protocol = KNET_TRANSPORT_UDP; +- protofound = 1; +- } +- if (!strcmp(protostr, "SCTP")) { +- protocol = KNET_TRANSPORT_SCTP; +- protofound = 1; ++ if (optarg) { ++ protostr = optarg; ++ if (!strcmp(protostr, "UDP")) { ++ protocol = KNET_TRANSPORT_UDP; ++ protofound = 1; ++ } ++ if (!strcmp(protostr, "SCTP")) { ++ protocol = KNET_TRANSPORT_SCTP; ++ protofound = 1; ++ } + } + if (!protofound) { + printf("Error: invalid protocol %s specified. -P accepts udp|sctp\n", policystr); +@@ -380,17 +384,22 @@ static void setup_knet(int argc, char *argv[]) + } + break; + case 'T': +- if (!strcmp("ping", optarg)) { +- test_type = TEST_PING; +- } +- if (!strcmp("ping_data", optarg)) { +- test_type = TEST_PING_AND_DATA; +- } +- if (!strcmp("perf-by-size", optarg)) { +- test_type = TEST_PERF_BY_SIZE; +- } +- if (!strcmp("perf-by-time", optarg)) { +- test_type = TEST_PERF_BY_TIME; ++ if (optarg) { ++ if (!strcmp("ping", optarg)) { ++ test_type = TEST_PING; ++ } ++ if (!strcmp("ping_data", optarg)) { ++ test_type = TEST_PING_AND_DATA; ++ } ++ if (!strcmp("perf-by-size", optarg)) { ++ test_type = TEST_PERF_BY_SIZE; ++ } ++ if (!strcmp("perf-by-time", optarg)) { ++ test_type = TEST_PERF_BY_TIME; ++ } ++ } else { ++ printf("Error: -T requires an option\n"); ++ exit(FAIL); + } + break; + case 'S': +@@ -957,15 +966,14 @@ static void display_stats(int level) + struct knet_link_stats total_link_stats; + knet_node_id_t host_list[KNET_MAX_HOST]; + uint8_t link_list[KNET_MAX_LINK]; +- int res; + unsigned int i,j; + size_t num_hosts, num_links; + +- res = knet_handle_get_stats(knet_h, &handle_stats, sizeof(handle_stats)); +- if (res) { ++ if (knet_handle_get_stats(knet_h, &handle_stats, sizeof(handle_stats)) < 0) { + perror("[info]: failed to get knet handle stats"); + return; + } ++ + if (compresscfg || cryptocfg) { + printf("\n"); + printf("[stat]: handle stats\n"); +@@ -1005,8 +1013,7 @@ static void display_stats(int level) + + memset(&total_link_stats, 0, sizeof(struct knet_link_stats)); + +- res = knet_host_get_host_list(knet_h, host_list, &num_hosts); +- if (res) { ++ if (knet_host_get_host_list(knet_h, host_list, &num_hosts) < 0) { + perror("[info]: cannot get host list for stats"); + return; + } +@@ -1015,18 +1022,16 @@ static void display_stats(int level) + qsort(host_list, num_hosts, sizeof(uint16_t), node_compare); + + for (j=0; j +Date: Wed Jul 24 08:38:56 2019 +0200 + + [PMTUd] do not double unlock global read lock + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c +index d342697..f884760 100644 +--- a/libknet/threads_pmtud.c ++++ b/libknet/threads_pmtud.c +@@ -297,7 +297,11 @@ retry: + return -1; + } + +- if (shutdown_in_progress(knet_h)) { ++ /* ++ * we cannot use shutdown_in_progress in here because ++ * we already hold the read lock ++ */ ++ if (knet_h->fini_in_progress) { + pthread_mutex_unlock(&knet_h->pmtud_mutex); + log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD aborted. shutdown in progress"); + return -1; +commit 01242c683b18b813a67c13d3fc0546fec34f9f7c +Author: Fabio M. Di Nitto +Date: Mon Sep 9 15:11:25 2019 +0200 + + [pmtud] switch to use async version of dstcache update due to locking context (read vs write) + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c +index f884760..d10984f 100644 +--- a/libknet/threads_pmtud.c ++++ b/libknet/threads_pmtud.c +@@ -481,7 +481,7 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, + } + + if (saved_valid_pmtud != dst_link->has_valid_mtu) { +- _host_dstcache_update_sync(knet_h, dst_host); ++ _host_dstcache_update_async(knet_h, dst_host); + } + + return dst_link->has_valid_mtu; +commit a70f0adf0d4d38ed614bf2eef1a4e66fec2f2c92 +Author: Fabio M. Di Nitto +Date: Fri Sep 13 07:28:55 2019 +0200 + + [tests] fix ip generation boundaries + + https://ci.kronosnet.org/job/knet-build-all-voting/1450/knet-build-all-voting=rhel80z-s390x/console + + and similar, when pid = 255, the secondary IP would hit 256 that is of course invalid. + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libnozzle/tests/test-common.c b/libnozzle/tests/test-common.c +index b36be79..3afd2ec 100644 +--- a/libnozzle/tests/test-common.c ++++ b/libnozzle/tests/test-common.c +@@ -124,7 +124,7 @@ void make_local_ips(char *testipv4_1, char *testipv4_2, char *testipv6_1, char * + pid = (uint8_t *)&mypid; + + for (i = 0; i < sizeof(pid_t); i++) { +- if (pid[i] == 0) { ++ if ((pid[i] == 0) || (pid[i] == 255)) { + pid[i] = 128; + } + } +commit 63567e1e6b6ebb91fe1df43b910d6b9bd78d528f +Author: Fabio M. Di Nitto +Date: Tue Oct 15 11:53:56 2019 +0200 + + [PMTUd] invalidate MTU for a link if the value is lower than minimum + + Under heavy network load and packet loss, calculated MTU can be + too small. In that case we need to invalidate the link mtu, + that would remove the link from the rotation (and traffic) and + would give PMTUd time to get the right MTU in the next round. + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c +index d10984f..ab00b47 100644 +--- a/libknet/threads_pmtud.c ++++ b/libknet/threads_pmtud.c +@@ -460,7 +460,14 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, + } + dst_link->has_valid_mtu = 0; + } else { +- dst_link->has_valid_mtu = 1; ++ if (dst_link->status.mtu < calc_min_mtu(knet_h)) { ++ log_info(knet_h, KNET_SUB_PMTUD, ++ "Invalid MTU detected for host: %u link: %u mtu: %u", ++ dst_host->host_id, dst_link->link_id, dst_link->status.mtu); ++ dst_link->has_valid_mtu = 0; ++ } else { ++ dst_link->has_valid_mtu = 1; ++ } + if (dst_link->has_valid_mtu) { + if ((saved_pmtud) && (saved_pmtud != dst_link->status.mtu)) { + log_info(knet_h, KNET_SUB_PMTUD, "PMTUD link change for host: %u link: %u from %u to %u", diff --git a/SOURCES/bz1753517-link-mem-corruption.patch b/SOURCES/bz1753517-link-mem-corruption.patch new file mode 100644 index 0000000..1c6706a --- /dev/null +++ b/SOURCES/bz1753517-link-mem-corruption.patch @@ -0,0 +1,37 @@ +commit 93f3df56ce1008c362df679b2768edbf2e5a860a +Author: Fabio M. Di Nitto +Date: Thu Sep 19 09:02:44 2019 +0200 + + [links] fix memory corryption of link structure + + the index would overflow the buffer and overwrite data in the link + structure. Depending on what was written the cluster could fall + apart in many ways, from crashing, to hung. + + Fixes: https://github.com/kronosnet/kronosnet/issues/255 + + thanks to the proxmox developers and community for reporting the issue + and for all the help reproducing / debugging the problem. + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/links.c b/libknet/links.c +index 6abbd48..3d52511 100644 +--- a/libknet/links.c ++++ b/libknet/links.c +@@ -62,13 +62,13 @@ int _link_updown(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, + if (connected) { + time(&link->status.stats.last_up_times[link->status.stats.last_up_time_index]); + link->status.stats.up_count++; +- if (++link->status.stats.last_up_time_index > MAX_LINK_EVENTS) { ++ if (++link->status.stats.last_up_time_index >= MAX_LINK_EVENTS) { + link->status.stats.last_up_time_index = 0; + } + } else { + time(&link->status.stats.last_down_times[link->status.stats.last_down_time_index]); + link->status.stats.down_count++; +- if (++link->status.stats.last_down_time_index > MAX_LINK_EVENTS) { ++ if (++link->status.stats.last_down_time_index >= MAX_LINK_EVENTS) { + link->status.stats.last_down_time_index = 0; + } + } diff --git a/SOURCES/bz1754442-link-mem-corruption.patch b/SOURCES/bz1754442-link-mem-corruption.patch deleted file mode 100644 index 1c6706a..0000000 --- a/SOURCES/bz1754442-link-mem-corruption.patch +++ /dev/null @@ -1,37 +0,0 @@ -commit 93f3df56ce1008c362df679b2768edbf2e5a860a -Author: Fabio M. Di Nitto -Date: Thu Sep 19 09:02:44 2019 +0200 - - [links] fix memory corryption of link structure - - the index would overflow the buffer and overwrite data in the link - structure. Depending on what was written the cluster could fall - apart in many ways, from crashing, to hung. - - Fixes: https://github.com/kronosnet/kronosnet/issues/255 - - thanks to the proxmox developers and community for reporting the issue - and for all the help reproducing / debugging the problem. - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libknet/links.c b/libknet/links.c -index 6abbd48..3d52511 100644 ---- a/libknet/links.c -+++ b/libknet/links.c -@@ -62,13 +62,13 @@ int _link_updown(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, - if (connected) { - time(&link->status.stats.last_up_times[link->status.stats.last_up_time_index]); - link->status.stats.up_count++; -- if (++link->status.stats.last_up_time_index > MAX_LINK_EVENTS) { -+ if (++link->status.stats.last_up_time_index >= MAX_LINK_EVENTS) { - link->status.stats.last_up_time_index = 0; - } - } else { - time(&link->status.stats.last_down_times[link->status.stats.last_down_time_index]); - link->status.stats.down_count++; -- if (++link->status.stats.last_down_time_index > MAX_LINK_EVENTS) { -+ if (++link->status.stats.last_down_time_index >= MAX_LINK_EVENTS) { - link->status.stats.last_down_time_index = 0; - } - } diff --git a/SOURCES/bz1761711-fix-data-deliver-corruption-from-fragmented-packets.patch b/SOURCES/bz1761711-fix-data-deliver-corruption-from-fragmented-packets.patch new file mode 100644 index 0000000..45861c4 --- /dev/null +++ b/SOURCES/bz1761711-fix-data-deliver-corruption-from-fragmented-packets.patch @@ -0,0 +1,415 @@ +commit db21da87bba6017c8343f9c6f255b21813ffd5d0 +Author: Fabio M. Di Nitto +Date: Tue Oct 15 06:46:36 2019 +0200 + + [host] rename variables to make it easier to read the code + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/host.c b/libknet/host.c +index abb1f89..ac26b89 100644 +--- a/libknet/host.c ++++ b/libknet/host.c +@@ -569,7 +569,7 @@ static void _clear_cbuffers(struct knet_host *host, seq_num_t rx_seq_num) + + int _seq_num_lookup(struct knet_host *host, seq_num_t seq_num, int defrag_buf, int clear_buf) + { +- size_t i, j; /* circular buffer indexes */ ++ size_t head, tail; /* circular buffer indexes */ + seq_num_t seq_dist; + char *dst_cbuf = host->circular_buffer; + char *dst_cbuf_defrag = host->circular_buffer_defrag; +@@ -585,13 +585,13 @@ int _seq_num_lookup(struct knet_host *host, seq_num_t seq_num, int defrag_buf, i + seq_dist = *dst_seq_num - seq_num; + } + +- j = seq_num % KNET_CBUFFER_SIZE; ++ head = seq_num % KNET_CBUFFER_SIZE; + + if (seq_dist < KNET_CBUFFER_SIZE) { /* seq num is in ring buffer */ + if (!defrag_buf) { +- return (dst_cbuf[j] == 0) ? 1 : 0; ++ return (dst_cbuf[head] == 0) ? 1 : 0; + } else { +- return (dst_cbuf_defrag[j] == 0) ? 1 : 0; ++ return (dst_cbuf_defrag[head] == 0) ? 1 : 0; + } + } else if (seq_dist <= SEQ_MAX - KNET_CBUFFER_SIZE) { + memset(dst_cbuf, 0, KNET_CBUFFER_SIZE); +@@ -600,16 +600,16 @@ int _seq_num_lookup(struct knet_host *host, seq_num_t seq_num, int defrag_buf, i + } + + /* cleaning up circular buffer */ +- i = (*dst_seq_num + 1) % KNET_CBUFFER_SIZE; ++ tail = (*dst_seq_num + 1) % KNET_CBUFFER_SIZE; + +- if (i > j) { +- memset(dst_cbuf + i, 0, KNET_CBUFFER_SIZE - i); +- memset(dst_cbuf, 0, j + 1); +- memset(dst_cbuf_defrag + i, 0, KNET_CBUFFER_SIZE - i); +- memset(dst_cbuf_defrag, 0, j + 1); ++ if (tail > head) { ++ memset(dst_cbuf + tail, 0, KNET_CBUFFER_SIZE - tail); ++ memset(dst_cbuf, 0, head + 1); ++ memset(dst_cbuf_defrag + tail, 0, KNET_CBUFFER_SIZE - tail); ++ memset(dst_cbuf_defrag, 0, head + 1); + } else { +- memset(dst_cbuf + i, 0, j - i + 1); +- memset(dst_cbuf_defrag + i, 0, j - i + 1); ++ memset(dst_cbuf + tail, 0, head - tail + 1); ++ memset(dst_cbuf_defrag + tail, 0, head - tail + 1); + } + + *dst_seq_num = seq_num; +commit 1e473cf26d55c2b6ff8d5bfaa5aa689554de803c +Author: Fabio M. Di Nitto +Date: Tue Oct 15 06:53:24 2019 +0200 + + [host] fix defrag buffers reclaim logic + + The problem: + + - let's assume a 2 nodes (A and B) cluster setup + - node A sends fragmented packets to node B and there is + packet loss on the network. + - node B receives all those fragments and attempts to + reassemble them. + - node A sends packet seq_num X in Y fragments. + - node B receives only part of the fragments and stores + them in a defrag buf. + - packet loss stops. + - node A continues to send packets and a seq_num + roll-over takes place. + - node A sends a new packet seq_num X in Y fragments. + - node B gets confused here because the parts of the old + packet seq_num X are still stored and the buffer + has not been reclaimed. + - node B continues to rebuild packet seq_num X with + old stale data and new data from after the roll-over. + - node B completes reassembling the packet and delivers + junk to the application. + + The solution: + + Add a much stronger buffer reclaim logic that will apply + on each received packet and not only when defrag buffers + are needed, as there might be a mix of fragmented and not + fragmented packets in-flight. + + The new logic creates a window of N packets that can be + handled at the same time (based on the number of buffers) + and clear everything else. + + Fixes https://github.com/kronosnet/kronosnet/issues/261 + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/host.c b/libknet/host.c +index ac26b89..85d4626 100644 +--- a/libknet/host.c ++++ b/libknet/host.c +@@ -562,6 +562,35 @@ static void _clear_cbuffers(struct knet_host *host, seq_num_t rx_seq_num) + } + } + ++static void _reclaim_old_defrag_bufs(struct knet_host *host, seq_num_t seq_num) ++{ ++ seq_num_t head, tail; /* seq_num boundaries */ ++ int i; ++ ++ head = seq_num + 1; ++ tail = seq_num - (KNET_MAX_LINK + 1); ++ ++ /* ++ * expire old defrag buffers ++ */ ++ for (i = 0; i < KNET_MAX_LINK; i++) { ++ if (host->defrag_buf[i].in_use) { ++ /* ++ * head has done a rollover to 0+ ++ */ ++ if (tail > head) { ++ if ((host->defrag_buf[i].pckt_seq >= head) && (host->defrag_buf[i].pckt_seq <= tail)) { ++ host->defrag_buf[i].in_use = 0; ++ } ++ } else { ++ if ((host->defrag_buf[i].pckt_seq >= head) || (host->defrag_buf[i].pckt_seq <= tail)){ ++ host->defrag_buf[i].in_use = 0; ++ } ++ } ++ } ++ } ++} ++ + /* + * check if a given packet seq num is in the circular buffers + * defrag_buf = 0 -> use normal cbuf 1 -> use the defrag buffer lookup +@@ -579,6 +608,8 @@ int _seq_num_lookup(struct knet_host *host, seq_num_t seq_num, int defrag_buf, i + _clear_cbuffers(host, seq_num); + } + ++ _reclaim_old_defrag_bufs(host, seq_num); ++ + if (seq_num < *dst_seq_num) { + seq_dist = (SEQ_MAX - seq_num) + *dst_seq_num; + } else { +commit 5bd88ebd63af20577095c2c98975f0f1781ba46a +Author: Fabio M. Di Nitto +Date: Tue Oct 15 07:02:05 2019 +0200 + + [rx] copy data into the defrag buffer only if we know the size of the frame + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/threads_rx.c b/libknet/threads_rx.c +index b2a5dad..6c26cdc 100644 +--- a/libknet/threads_rx.c ++++ b/libknet/threads_rx.c +@@ -186,8 +186,10 @@ static int pckt_defrag(knet_handle_t knet_h, struct knet_header *inbuf, ssize_t + defrag_buf->frag_size = *len; + } + +- memmove(defrag_buf->buf + ((inbuf->khp_data_frag_seq - 1) * defrag_buf->frag_size), +- inbuf->khp_data_userdata, *len); ++ if (defrag_buf->frag_size) { ++ memmove(defrag_buf->buf + ((inbuf->khp_data_frag_seq - 1) * defrag_buf->frag_size), ++ inbuf->khp_data_userdata, *len); ++ } + + defrag_buf->frag_recv++; + defrag_buf->frag_map[inbuf->khp_data_frag_seq] = 1; +commit cd59986900510119d8e7b63d33ad35466d480858 +Author: Fabio M. Di Nitto +Date: Tue Oct 15 07:16:22 2019 +0200 + + [test] add ability to knet_bench to specify a fixed packet size for perf test + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/tests/knet_bench.c b/libknet/tests/knet_bench.c +index dc04239..54b5303 100644 +--- a/libknet/tests/knet_bench.c ++++ b/libknet/tests/knet_bench.c +@@ -67,6 +67,8 @@ static int test_type = TEST_PING; + static uint64_t perf_by_size_size = 1 * ONE_GIGABYTE; + static uint64_t perf_by_time_secs = 10; + ++static uint32_t force_packet_size = 0; ++ + struct node { + int nodeid; + int links; +@@ -109,6 +111,7 @@ static void print_help(void) + printf(" -s nodeid that will generate traffic for benchmarks\n"); + printf(" -S [size|seconds] when used in combination with -T perf-by-size it indicates how many GB of traffic to generate for the test. (default: 1GB)\n"); + printf(" when used in combination with -T perf-by-time it indicates how many Seconds of traffic to generate for the test. (default: 10 seconds)\n"); ++ printf(" -x force packet size for perf-by-time or perf-by-size\n"); + printf(" -C repeat the test continously (default: off)\n"); + printf(" -X[XX] show stats at the end of the run (default: 1)\n"); + printf(" 1: show handle stats, 2: show summary link stats\n"); +@@ -250,7 +253,7 @@ static void setup_knet(int argc, char *argv[]) + + memset(nodes, 0, sizeof(nodes)); + +- while ((rv = getopt(argc, argv, "aCT:S:s:ldfom:wb:t:n:c:p:X::P:z:h")) != EOF) { ++ while ((rv = getopt(argc, argv, "aCT:S:s:ldfom:wb:t:n:c:p:x:X::P:z:h")) != EOF) { + switch(rv) { + case 'h': + print_help(); +@@ -406,6 +409,13 @@ static void setup_knet(int argc, char *argv[]) + perf_by_size_size = (uint64_t)atoi(optarg) * ONE_GIGABYTE; + perf_by_time_secs = (uint64_t)atoi(optarg); + break; ++ case 'x': ++ force_packet_size = (uint32_t)atoi(optarg); ++ if ((force_packet_size < 1) || (force_packet_size > 65536)) { ++ printf("Unsupported packet size %u (accepted 1 - 65536)\n", force_packet_size); ++ exit(FAIL); ++ } ++ break; + case 'C': + continous = 1; + break; +@@ -874,7 +884,7 @@ static int setup_send_buffers_common(struct knet_mmsghdr *msg, struct iovec *iov + printf("TXT: Unable to malloc!\n"); + return -1; + } +- memset(tx_buf[i], 0, KNET_MAX_PACKET_SIZE); ++ memset(tx_buf[i], i, KNET_MAX_PACKET_SIZE); + iov_out[i].iov_base = (void *)tx_buf[i]; + memset(&msg[i].msg_hdr, 0, sizeof(struct msghdr)); + msg[i].msg_hdr.msg_iov = &iov_out[i]; +@@ -898,6 +908,9 @@ static void send_perf_data_by_size(void) + setup_send_buffers_common(msg, iov_out, tx_buf); + + while (packetsize <= KNET_MAX_PACKET_SIZE) { ++ if (force_packet_size) { ++ packetsize = force_packet_size; ++ } + for (i = 0; i < PCKT_FRAG_MAX; i++) { + iov_out[i].iov_len = packetsize; + } +@@ -926,7 +939,7 @@ static void send_perf_data_by_size(void) + + knet_send(knet_h, ctrl_message, TEST_STOP, channel); + +- if (packetsize == KNET_MAX_PACKET_SIZE) { ++ if ((packetsize == KNET_MAX_PACKET_SIZE) || (force_packet_size)) { + break; + } + +@@ -1175,6 +1188,9 @@ static void send_perf_data_by_time(void) + memset(&clock_end, 0, sizeof(clock_start)); + + while (packetsize <= KNET_MAX_PACKET_SIZE) { ++ if (force_packet_size) { ++ packetsize = force_packet_size; ++ } + for (i = 0; i < PCKT_FRAG_MAX; i++) { + iov_out[i].iov_len = packetsize; + } +@@ -1205,7 +1221,7 @@ static void send_perf_data_by_time(void) + + knet_send(knet_h, ctrl_message, TEST_STOP, channel); + +- if (packetsize == KNET_MAX_PACKET_SIZE) { ++ if ((packetsize == KNET_MAX_PACKET_SIZE) || (force_packet_size)) { + break; + } + +commit e28e2ea7c7e8139a6792ec1508215d4560b53e65 +Author: Fabio M. Di Nitto +Date: Wed Oct 16 08:10:23 2019 +0200 + + [test] add packet verification option to knet_bench + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/tests/knet_bench.c b/libknet/tests/knet_bench.c +index 54b5303..c9e1c06 100644 +--- a/libknet/tests/knet_bench.c ++++ b/libknet/tests/knet_bench.c +@@ -47,6 +47,7 @@ static char *compresscfg = NULL; + static char *cryptocfg = NULL; + static int machine_output = 0; + static int use_access_lists = 0; ++static int use_pckt_verification = 0; + + static int bench_shutdown_in_progress = 0; + static pthread_mutex_t shutdown_mutex = PTHREAD_MUTEX_INITIALIZER; +@@ -76,6 +77,11 @@ struct node { + struct sockaddr_storage address[KNET_MAX_LINK]; + }; + ++struct pckt_ver { ++ uint32_t len; ++ uint32_t chksum; ++}; ++ + static void print_help(void) + { + printf("knet_bench usage:\n"); +@@ -117,6 +123,7 @@ static void print_help(void) + printf(" 1: show handle stats, 2: show summary link stats\n"); + printf(" 3: show detailed link stats\n"); + printf(" -a enable machine parsable output (default: off).\n"); ++ printf(" -v enable packet verification for performance tests (default: off).\n"); + } + + static void parse_nodes(char *nodesinfo[MAX_NODES], int onidx, int port, struct node nodes[MAX_NODES], int *thisidx) +@@ -253,7 +260,7 @@ static void setup_knet(int argc, char *argv[]) + + memset(nodes, 0, sizeof(nodes)); + +- while ((rv = getopt(argc, argv, "aCT:S:s:ldfom:wb:t:n:c:p:x:X::P:z:h")) != EOF) { ++ while ((rv = getopt(argc, argv, "aCT:S:s:lvdfom:wb:t:n:c:p:x:X::P:z:h")) != EOF) { + switch(rv) { + case 'h': + print_help(); +@@ -411,11 +418,14 @@ static void setup_knet(int argc, char *argv[]) + break; + case 'x': + force_packet_size = (uint32_t)atoi(optarg); +- if ((force_packet_size < 1) || (force_packet_size > 65536)) { +- printf("Unsupported packet size %u (accepted 1 - 65536)\n", force_packet_size); ++ if ((force_packet_size < 64) || (force_packet_size > 65536)) { ++ printf("Unsupported packet size %u (accepted 64 - 65536)\n", force_packet_size); + exit(FAIL); + } + break; ++ case 'v': ++ use_pckt_verification = 1; ++ break; + case 'C': + continous = 1; + break; +@@ -654,6 +664,24 @@ static void setup_knet(int argc, char *argv[]) + } + } + ++/* ++ * calculate weak chksum (stole from corosync for debugging purposes) ++ */ ++static uint32_t compute_chsum(const unsigned char *data, uint32_t data_len) ++{ ++ unsigned int i; ++ unsigned int checksum = 0; ++ ++ for (i = 0; i < data_len; i++) { ++ if (checksum & 1) { ++ checksum |= 0x10000; ++ } ++ ++ checksum = ((checksum >> 1) + (unsigned char)data[i]) & 0xffff; ++ } ++ return (checksum); ++} ++ + static void *_rx_thread(void *args) + { + int rx_epoll; +@@ -766,6 +794,20 @@ static void *_rx_thread(void *args) + } + continue; + } ++ if (use_pckt_verification) { ++ struct pckt_ver *recv_pckt = (struct pckt_ver *)msg[i].msg_hdr.msg_iov->iov_base; ++ uint32_t chksum; ++ ++ if (msg[i].msg_len != recv_pckt->len) { ++ printf("Wrong packet len received: %u expected: %u!\n", msg[i].msg_len, recv_pckt->len); ++ exit(FAIL); ++ } ++ chksum = compute_chsum((const unsigned char *)msg[i].msg_hdr.msg_iov->iov_base + sizeof(struct pckt_ver), msg[i].msg_len - sizeof(struct pckt_ver)); ++ if (recv_pckt->chksum != chksum){ ++ printf("Wrong packet checksum received: %u expected: %u!\n", recv_pckt->chksum, chksum); ++ exit(FAIL); ++ } ++ } + rx_pkts++; + rx_bytes = rx_bytes + msg[i].msg_len; + current_pckt_size = msg[i].msg_len; +@@ -913,6 +955,11 @@ static void send_perf_data_by_size(void) + } + for (i = 0; i < PCKT_FRAG_MAX; i++) { + iov_out[i].iov_len = packetsize; ++ if (use_pckt_verification) { ++ struct pckt_ver *tx_pckt = (struct pckt_ver *)&iov_out[i].iov_base; ++ tx_pckt->len = iov_out[i].iov_len; ++ tx_pckt->chksum = compute_chsum((const unsigned char *)iov_out[i].iov_base + sizeof(struct pckt_ver), iov_out[i].iov_len - sizeof(struct pckt_ver)); ++ } + } + + total_pkts_to_tx = perf_by_size_size / packetsize; +@@ -1193,6 +1240,11 @@ static void send_perf_data_by_time(void) + } + for (i = 0; i < PCKT_FRAG_MAX; i++) { + iov_out[i].iov_len = packetsize; ++ if (use_pckt_verification) { ++ struct pckt_ver *tx_pckt = (struct pckt_ver *)iov_out[i].iov_base; ++ tx_pckt->len = iov_out[i].iov_len; ++ tx_pckt->chksum = compute_chsum((const unsigned char *)iov_out[i].iov_base + sizeof(struct pckt_ver), iov_out[i].iov_len - sizeof(struct pckt_ver)); ++ } + } + printf("[info]: testing with %u bytes packet size for %" PRIu64 " seconds.\n", packetsize, perf_by_time_secs); + diff --git a/SOURCES/bz1763714-fix-data-deliver-corruption-from-fragmented-packets.patch b/SOURCES/bz1763714-fix-data-deliver-corruption-from-fragmented-packets.patch deleted file mode 100644 index 45861c4..0000000 --- a/SOURCES/bz1763714-fix-data-deliver-corruption-from-fragmented-packets.patch +++ /dev/null @@ -1,415 +0,0 @@ -commit db21da87bba6017c8343f9c6f255b21813ffd5d0 -Author: Fabio M. Di Nitto -Date: Tue Oct 15 06:46:36 2019 +0200 - - [host] rename variables to make it easier to read the code - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libknet/host.c b/libknet/host.c -index abb1f89..ac26b89 100644 ---- a/libknet/host.c -+++ b/libknet/host.c -@@ -569,7 +569,7 @@ static void _clear_cbuffers(struct knet_host *host, seq_num_t rx_seq_num) - - int _seq_num_lookup(struct knet_host *host, seq_num_t seq_num, int defrag_buf, int clear_buf) - { -- size_t i, j; /* circular buffer indexes */ -+ size_t head, tail; /* circular buffer indexes */ - seq_num_t seq_dist; - char *dst_cbuf = host->circular_buffer; - char *dst_cbuf_defrag = host->circular_buffer_defrag; -@@ -585,13 +585,13 @@ int _seq_num_lookup(struct knet_host *host, seq_num_t seq_num, int defrag_buf, i - seq_dist = *dst_seq_num - seq_num; - } - -- j = seq_num % KNET_CBUFFER_SIZE; -+ head = seq_num % KNET_CBUFFER_SIZE; - - if (seq_dist < KNET_CBUFFER_SIZE) { /* seq num is in ring buffer */ - if (!defrag_buf) { -- return (dst_cbuf[j] == 0) ? 1 : 0; -+ return (dst_cbuf[head] == 0) ? 1 : 0; - } else { -- return (dst_cbuf_defrag[j] == 0) ? 1 : 0; -+ return (dst_cbuf_defrag[head] == 0) ? 1 : 0; - } - } else if (seq_dist <= SEQ_MAX - KNET_CBUFFER_SIZE) { - memset(dst_cbuf, 0, KNET_CBUFFER_SIZE); -@@ -600,16 +600,16 @@ int _seq_num_lookup(struct knet_host *host, seq_num_t seq_num, int defrag_buf, i - } - - /* cleaning up circular buffer */ -- i = (*dst_seq_num + 1) % KNET_CBUFFER_SIZE; -+ tail = (*dst_seq_num + 1) % KNET_CBUFFER_SIZE; - -- if (i > j) { -- memset(dst_cbuf + i, 0, KNET_CBUFFER_SIZE - i); -- memset(dst_cbuf, 0, j + 1); -- memset(dst_cbuf_defrag + i, 0, KNET_CBUFFER_SIZE - i); -- memset(dst_cbuf_defrag, 0, j + 1); -+ if (tail > head) { -+ memset(dst_cbuf + tail, 0, KNET_CBUFFER_SIZE - tail); -+ memset(dst_cbuf, 0, head + 1); -+ memset(dst_cbuf_defrag + tail, 0, KNET_CBUFFER_SIZE - tail); -+ memset(dst_cbuf_defrag, 0, head + 1); - } else { -- memset(dst_cbuf + i, 0, j - i + 1); -- memset(dst_cbuf_defrag + i, 0, j - i + 1); -+ memset(dst_cbuf + tail, 0, head - tail + 1); -+ memset(dst_cbuf_defrag + tail, 0, head - tail + 1); - } - - *dst_seq_num = seq_num; -commit 1e473cf26d55c2b6ff8d5bfaa5aa689554de803c -Author: Fabio M. Di Nitto -Date: Tue Oct 15 06:53:24 2019 +0200 - - [host] fix defrag buffers reclaim logic - - The problem: - - - let's assume a 2 nodes (A and B) cluster setup - - node A sends fragmented packets to node B and there is - packet loss on the network. - - node B receives all those fragments and attempts to - reassemble them. - - node A sends packet seq_num X in Y fragments. - - node B receives only part of the fragments and stores - them in a defrag buf. - - packet loss stops. - - node A continues to send packets and a seq_num - roll-over takes place. - - node A sends a new packet seq_num X in Y fragments. - - node B gets confused here because the parts of the old - packet seq_num X are still stored and the buffer - has not been reclaimed. - - node B continues to rebuild packet seq_num X with - old stale data and new data from after the roll-over. - - node B completes reassembling the packet and delivers - junk to the application. - - The solution: - - Add a much stronger buffer reclaim logic that will apply - on each received packet and not only when defrag buffers - are needed, as there might be a mix of fragmented and not - fragmented packets in-flight. - - The new logic creates a window of N packets that can be - handled at the same time (based on the number of buffers) - and clear everything else. - - Fixes https://github.com/kronosnet/kronosnet/issues/261 - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libknet/host.c b/libknet/host.c -index ac26b89..85d4626 100644 ---- a/libknet/host.c -+++ b/libknet/host.c -@@ -562,6 +562,35 @@ static void _clear_cbuffers(struct knet_host *host, seq_num_t rx_seq_num) - } - } - -+static void _reclaim_old_defrag_bufs(struct knet_host *host, seq_num_t seq_num) -+{ -+ seq_num_t head, tail; /* seq_num boundaries */ -+ int i; -+ -+ head = seq_num + 1; -+ tail = seq_num - (KNET_MAX_LINK + 1); -+ -+ /* -+ * expire old defrag buffers -+ */ -+ for (i = 0; i < KNET_MAX_LINK; i++) { -+ if (host->defrag_buf[i].in_use) { -+ /* -+ * head has done a rollover to 0+ -+ */ -+ if (tail > head) { -+ if ((host->defrag_buf[i].pckt_seq >= head) && (host->defrag_buf[i].pckt_seq <= tail)) { -+ host->defrag_buf[i].in_use = 0; -+ } -+ } else { -+ if ((host->defrag_buf[i].pckt_seq >= head) || (host->defrag_buf[i].pckt_seq <= tail)){ -+ host->defrag_buf[i].in_use = 0; -+ } -+ } -+ } -+ } -+} -+ - /* - * check if a given packet seq num is in the circular buffers - * defrag_buf = 0 -> use normal cbuf 1 -> use the defrag buffer lookup -@@ -579,6 +608,8 @@ int _seq_num_lookup(struct knet_host *host, seq_num_t seq_num, int defrag_buf, i - _clear_cbuffers(host, seq_num); - } - -+ _reclaim_old_defrag_bufs(host, seq_num); -+ - if (seq_num < *dst_seq_num) { - seq_dist = (SEQ_MAX - seq_num) + *dst_seq_num; - } else { -commit 5bd88ebd63af20577095c2c98975f0f1781ba46a -Author: Fabio M. Di Nitto -Date: Tue Oct 15 07:02:05 2019 +0200 - - [rx] copy data into the defrag buffer only if we know the size of the frame - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libknet/threads_rx.c b/libknet/threads_rx.c -index b2a5dad..6c26cdc 100644 ---- a/libknet/threads_rx.c -+++ b/libknet/threads_rx.c -@@ -186,8 +186,10 @@ static int pckt_defrag(knet_handle_t knet_h, struct knet_header *inbuf, ssize_t - defrag_buf->frag_size = *len; - } - -- memmove(defrag_buf->buf + ((inbuf->khp_data_frag_seq - 1) * defrag_buf->frag_size), -- inbuf->khp_data_userdata, *len); -+ if (defrag_buf->frag_size) { -+ memmove(defrag_buf->buf + ((inbuf->khp_data_frag_seq - 1) * defrag_buf->frag_size), -+ inbuf->khp_data_userdata, *len); -+ } - - defrag_buf->frag_recv++; - defrag_buf->frag_map[inbuf->khp_data_frag_seq] = 1; -commit cd59986900510119d8e7b63d33ad35466d480858 -Author: Fabio M. Di Nitto -Date: Tue Oct 15 07:16:22 2019 +0200 - - [test] add ability to knet_bench to specify a fixed packet size for perf test - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libknet/tests/knet_bench.c b/libknet/tests/knet_bench.c -index dc04239..54b5303 100644 ---- a/libknet/tests/knet_bench.c -+++ b/libknet/tests/knet_bench.c -@@ -67,6 +67,8 @@ static int test_type = TEST_PING; - static uint64_t perf_by_size_size = 1 * ONE_GIGABYTE; - static uint64_t perf_by_time_secs = 10; - -+static uint32_t force_packet_size = 0; -+ - struct node { - int nodeid; - int links; -@@ -109,6 +111,7 @@ static void print_help(void) - printf(" -s nodeid that will generate traffic for benchmarks\n"); - printf(" -S [size|seconds] when used in combination with -T perf-by-size it indicates how many GB of traffic to generate for the test. (default: 1GB)\n"); - printf(" when used in combination with -T perf-by-time it indicates how many Seconds of traffic to generate for the test. (default: 10 seconds)\n"); -+ printf(" -x force packet size for perf-by-time or perf-by-size\n"); - printf(" -C repeat the test continously (default: off)\n"); - printf(" -X[XX] show stats at the end of the run (default: 1)\n"); - printf(" 1: show handle stats, 2: show summary link stats\n"); -@@ -250,7 +253,7 @@ static void setup_knet(int argc, char *argv[]) - - memset(nodes, 0, sizeof(nodes)); - -- while ((rv = getopt(argc, argv, "aCT:S:s:ldfom:wb:t:n:c:p:X::P:z:h")) != EOF) { -+ while ((rv = getopt(argc, argv, "aCT:S:s:ldfom:wb:t:n:c:p:x:X::P:z:h")) != EOF) { - switch(rv) { - case 'h': - print_help(); -@@ -406,6 +409,13 @@ static void setup_knet(int argc, char *argv[]) - perf_by_size_size = (uint64_t)atoi(optarg) * ONE_GIGABYTE; - perf_by_time_secs = (uint64_t)atoi(optarg); - break; -+ case 'x': -+ force_packet_size = (uint32_t)atoi(optarg); -+ if ((force_packet_size < 1) || (force_packet_size > 65536)) { -+ printf("Unsupported packet size %u (accepted 1 - 65536)\n", force_packet_size); -+ exit(FAIL); -+ } -+ break; - case 'C': - continous = 1; - break; -@@ -874,7 +884,7 @@ static int setup_send_buffers_common(struct knet_mmsghdr *msg, struct iovec *iov - printf("TXT: Unable to malloc!\n"); - return -1; - } -- memset(tx_buf[i], 0, KNET_MAX_PACKET_SIZE); -+ memset(tx_buf[i], i, KNET_MAX_PACKET_SIZE); - iov_out[i].iov_base = (void *)tx_buf[i]; - memset(&msg[i].msg_hdr, 0, sizeof(struct msghdr)); - msg[i].msg_hdr.msg_iov = &iov_out[i]; -@@ -898,6 +908,9 @@ static void send_perf_data_by_size(void) - setup_send_buffers_common(msg, iov_out, tx_buf); - - while (packetsize <= KNET_MAX_PACKET_SIZE) { -+ if (force_packet_size) { -+ packetsize = force_packet_size; -+ } - for (i = 0; i < PCKT_FRAG_MAX; i++) { - iov_out[i].iov_len = packetsize; - } -@@ -926,7 +939,7 @@ static void send_perf_data_by_size(void) - - knet_send(knet_h, ctrl_message, TEST_STOP, channel); - -- if (packetsize == KNET_MAX_PACKET_SIZE) { -+ if ((packetsize == KNET_MAX_PACKET_SIZE) || (force_packet_size)) { - break; - } - -@@ -1175,6 +1188,9 @@ static void send_perf_data_by_time(void) - memset(&clock_end, 0, sizeof(clock_start)); - - while (packetsize <= KNET_MAX_PACKET_SIZE) { -+ if (force_packet_size) { -+ packetsize = force_packet_size; -+ } - for (i = 0; i < PCKT_FRAG_MAX; i++) { - iov_out[i].iov_len = packetsize; - } -@@ -1205,7 +1221,7 @@ static void send_perf_data_by_time(void) - - knet_send(knet_h, ctrl_message, TEST_STOP, channel); - -- if (packetsize == KNET_MAX_PACKET_SIZE) { -+ if ((packetsize == KNET_MAX_PACKET_SIZE) || (force_packet_size)) { - break; - } - -commit e28e2ea7c7e8139a6792ec1508215d4560b53e65 -Author: Fabio M. Di Nitto -Date: Wed Oct 16 08:10:23 2019 +0200 - - [test] add packet verification option to knet_bench - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libknet/tests/knet_bench.c b/libknet/tests/knet_bench.c -index 54b5303..c9e1c06 100644 ---- a/libknet/tests/knet_bench.c -+++ b/libknet/tests/knet_bench.c -@@ -47,6 +47,7 @@ static char *compresscfg = NULL; - static char *cryptocfg = NULL; - static int machine_output = 0; - static int use_access_lists = 0; -+static int use_pckt_verification = 0; - - static int bench_shutdown_in_progress = 0; - static pthread_mutex_t shutdown_mutex = PTHREAD_MUTEX_INITIALIZER; -@@ -76,6 +77,11 @@ struct node { - struct sockaddr_storage address[KNET_MAX_LINK]; - }; - -+struct pckt_ver { -+ uint32_t len; -+ uint32_t chksum; -+}; -+ - static void print_help(void) - { - printf("knet_bench usage:\n"); -@@ -117,6 +123,7 @@ static void print_help(void) - printf(" 1: show handle stats, 2: show summary link stats\n"); - printf(" 3: show detailed link stats\n"); - printf(" -a enable machine parsable output (default: off).\n"); -+ printf(" -v enable packet verification for performance tests (default: off).\n"); - } - - static void parse_nodes(char *nodesinfo[MAX_NODES], int onidx, int port, struct node nodes[MAX_NODES], int *thisidx) -@@ -253,7 +260,7 @@ static void setup_knet(int argc, char *argv[]) - - memset(nodes, 0, sizeof(nodes)); - -- while ((rv = getopt(argc, argv, "aCT:S:s:ldfom:wb:t:n:c:p:x:X::P:z:h")) != EOF) { -+ while ((rv = getopt(argc, argv, "aCT:S:s:lvdfom:wb:t:n:c:p:x:X::P:z:h")) != EOF) { - switch(rv) { - case 'h': - print_help(); -@@ -411,11 +418,14 @@ static void setup_knet(int argc, char *argv[]) - break; - case 'x': - force_packet_size = (uint32_t)atoi(optarg); -- if ((force_packet_size < 1) || (force_packet_size > 65536)) { -- printf("Unsupported packet size %u (accepted 1 - 65536)\n", force_packet_size); -+ if ((force_packet_size < 64) || (force_packet_size > 65536)) { -+ printf("Unsupported packet size %u (accepted 64 - 65536)\n", force_packet_size); - exit(FAIL); - } - break; -+ case 'v': -+ use_pckt_verification = 1; -+ break; - case 'C': - continous = 1; - break; -@@ -654,6 +664,24 @@ static void setup_knet(int argc, char *argv[]) - } - } - -+/* -+ * calculate weak chksum (stole from corosync for debugging purposes) -+ */ -+static uint32_t compute_chsum(const unsigned char *data, uint32_t data_len) -+{ -+ unsigned int i; -+ unsigned int checksum = 0; -+ -+ for (i = 0; i < data_len; i++) { -+ if (checksum & 1) { -+ checksum |= 0x10000; -+ } -+ -+ checksum = ((checksum >> 1) + (unsigned char)data[i]) & 0xffff; -+ } -+ return (checksum); -+} -+ - static void *_rx_thread(void *args) - { - int rx_epoll; -@@ -766,6 +794,20 @@ static void *_rx_thread(void *args) - } - continue; - } -+ if (use_pckt_verification) { -+ struct pckt_ver *recv_pckt = (struct pckt_ver *)msg[i].msg_hdr.msg_iov->iov_base; -+ uint32_t chksum; -+ -+ if (msg[i].msg_len != recv_pckt->len) { -+ printf("Wrong packet len received: %u expected: %u!\n", msg[i].msg_len, recv_pckt->len); -+ exit(FAIL); -+ } -+ chksum = compute_chsum((const unsigned char *)msg[i].msg_hdr.msg_iov->iov_base + sizeof(struct pckt_ver), msg[i].msg_len - sizeof(struct pckt_ver)); -+ if (recv_pckt->chksum != chksum){ -+ printf("Wrong packet checksum received: %u expected: %u!\n", recv_pckt->chksum, chksum); -+ exit(FAIL); -+ } -+ } - rx_pkts++; - rx_bytes = rx_bytes + msg[i].msg_len; - current_pckt_size = msg[i].msg_len; -@@ -913,6 +955,11 @@ static void send_perf_data_by_size(void) - } - for (i = 0; i < PCKT_FRAG_MAX; i++) { - iov_out[i].iov_len = packetsize; -+ if (use_pckt_verification) { -+ struct pckt_ver *tx_pckt = (struct pckt_ver *)&iov_out[i].iov_base; -+ tx_pckt->len = iov_out[i].iov_len; -+ tx_pckt->chksum = compute_chsum((const unsigned char *)iov_out[i].iov_base + sizeof(struct pckt_ver), iov_out[i].iov_len - sizeof(struct pckt_ver)); -+ } - } - - total_pkts_to_tx = perf_by_size_size / packetsize; -@@ -1193,6 +1240,11 @@ static void send_perf_data_by_time(void) - } - for (i = 0; i < PCKT_FRAG_MAX; i++) { - iov_out[i].iov_len = packetsize; -+ if (use_pckt_verification) { -+ struct pckt_ver *tx_pckt = (struct pckt_ver *)iov_out[i].iov_base; -+ tx_pckt->len = iov_out[i].iov_len; -+ tx_pckt->chksum = compute_chsum((const unsigned char *)iov_out[i].iov_base + sizeof(struct pckt_ver), iov_out[i].iov_len - sizeof(struct pckt_ver)); -+ } - } - printf("[info]: testing with %u bytes packet size for %" PRIu64 " seconds.\n", packetsize, perf_by_time_secs); - diff --git a/SOURCES/bz1763715-fix-mtu-calculation.patch b/SOURCES/bz1763715-fix-mtu-calculation.patch deleted file mode 100644 index f4c3fb2..0000000 --- a/SOURCES/bz1763715-fix-mtu-calculation.patch +++ /dev/null @@ -1,1751 +0,0 @@ -commit b67c63101246b400c7512cb1adbc590ac06cb6ee -Author: Fabio M. Di Nitto -Date: Tue Jul 30 11:18:33 2019 +0200 - - [crypto] fix log information - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libknet/crypto.c b/libknet/crypto.c -index 9f05fba..9d6757b 100644 ---- a/libknet/crypto.c -+++ b/libknet/crypto.c -@@ -151,8 +151,6 @@ int crypto_init( - goto out; - } - -- log_debug(knet_h, KNET_SUB_CRYPTO, "security network overhead: %zu", knet_h->sec_header_size); -- - out: - if (!err) { - knet_h->crypto_instance = new; -@@ -161,6 +159,8 @@ out: - knet_h->sec_hash_size = new->sec_hash_size; - knet_h->sec_salt_size = new->sec_salt_size; - -+ log_debug(knet_h, KNET_SUB_CRYPTO, "security network overhead: %zu", knet_h->sec_header_size); -+ - if (current) { - if (crypto_modules_cmds[current->model].ops->fini != NULL) { - crypto_modules_cmds[current->model].ops->fini(knet_h, current); -commit a89c2cd6d3863abe0f3ae0165239177a7461ee5e -Author: Fabio M. Di Nitto -Date: Wed Jul 31 14:15:07 2019 +0200 - - [udp] log information about detected kernel MTU - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libknet/transport_udp.c b/libknet/transport_udp.c -index 53d2ba0..be990bb 100644 ---- a/libknet/transport_udp.c -+++ b/libknet/transport_udp.c -@@ -337,6 +337,7 @@ static int read_errs_from_sock(knet_handle_t knet_h, int sockfd) - break; - } else { - knet_h->kernel_mtu = sock_err->ee_info; -+ log_debug(knet_h, KNET_SUB_TRANSP_UDP, "detected kernel MTU: %u", knet_h->kernel_mtu); - pthread_mutex_unlock(&knet_h->kmtu_mutex); - } - -commit 650ef6d26e83dd7827b2e913c52a1fac67ea60d4 -Author: Fabio M. Di Nitto -Date: Fri Aug 2 10:43:09 2019 +0200 - - [docs] add knet packet layout - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c -index 603f595..2cd48f9 100644 ---- a/libknet/threads_pmtud.c -+++ b/libknet/threads_pmtud.c -@@ -91,6 +91,28 @@ restart: - failsafe++; - } - -+ /* -+ * unencrypted packet looks like: -+ * -+ * | ip | protocol | knet_header | unencrypted data | -+ * | onwire_len | -+ * | overhead_len | -+ * | data_len | -+ * | app MTU | -+ * -+ * encrypted packet looks like (not to scale): -+ * -+ * | ip | protocol | salt | crypto(knet_header | data) | crypto_data_pad | hash | -+ * | onwire_len | -+ * | overhead_len | -+ * | data_len | -+ * | app MTU | -+ * -+ * knet_h->sec_block_size is >= 0 if encryption will pad the data -+ * knet_h->sec_salt_size is >= 0 if encryption is enabled -+ * knet_h->sec_hash_size is >= 0 if signing is enabled -+ */ -+ - data_len = onwire_len - overhead_len; - - if (knet_h->crypto_instance) { -commit dbed772f0cb9070826eac6524646bd2ea7cce8c0 -Author: Fabio M. Di Nitto -Date: Fri Aug 2 10:44:23 2019 +0200 - - [PMTUd] fix MTU calculation when using crypto and add docs - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c -index 2cd48f9..1a19806 100644 ---- a/libknet/threads_pmtud.c -+++ b/libknet/threads_pmtud.c -@@ -113,29 +113,68 @@ restart: - * knet_h->sec_hash_size is >= 0 if signing is enabled - */ - -+ /* -+ * common to all packets -+ */ - data_len = onwire_len - overhead_len; - - if (knet_h->crypto_instance) { - -+realign: - if (knet_h->sec_block_size) { -+ -+ /* -+ * drop both salt and hash, that leaves only the crypto data and padding -+ * we need to calculate the padding based on the real encrypted data. -+ */ -+ data_len = data_len - (knet_h->sec_salt_size + knet_h->sec_hash_size); -+ -+ /* -+ * if the crypto mechanism requires padding, calculate the padding -+ * and add it back to data_len because that's what the crypto layer -+ * would do. -+ */ - pad_len = knet_h->sec_block_size - (data_len % knet_h->sec_block_size); -+ -+ /* -+ * if are at the boundary, reset padding -+ */ - if (pad_len == knet_h->sec_block_size) { - pad_len = 0; - } - data_len = data_len + pad_len; -- } - -- data_len = data_len + (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size); -- -- if (knet_h->sec_block_size) { -+ /* -+ * if our current data_len is higher than max_mtu_len -+ * then we need to reduce by padding size (that is our -+ * increment / decrement value) -+ * -+ * this generally happens only on the first PMTUd run -+ */ - while (data_len + overhead_len >= max_mtu_len) { - data_len = data_len - knet_h->sec_block_size; - } -+ -+ /* -+ * add both hash and salt size back, similar to padding above, -+ * the crypto layer will add them to the data_len -+ */ -+ data_len = data_len + (knet_h->sec_salt_size + knet_h->sec_hash_size); - } - - if (dst_link->last_bad_mtu) { -- while (data_len + overhead_len >= dst_link->last_bad_mtu) { -- data_len = data_len - (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size); -+ if (data_len + overhead_len >= dst_link->last_bad_mtu) { -+ /* -+ * reduce data_len to something lower than last_bad_mtu, overhead_len -+ * and sec_block_size (decrementing step) - 1 (granularity) -+ */ -+ data_len = dst_link->last_bad_mtu - overhead_len - knet_h->sec_block_size - 1; -+ if (knet_h->sec_block_size) { -+ /* -+ * make sure that data_len is aligned to the sec_block_size boundary -+ */ -+ goto realign; -+ } - } - } - -@@ -144,6 +183,10 @@ restart: - return -1; - } - -+ /* -+ * recalculate onwire_len based on crypto information -+ * and place it in the PMTUd packet info -+ */ - onwire_len = data_len + overhead_len; - knet_h->pmtudbuf->khp_pmtud_size = onwire_len; - -commit a9460c72fafe452b7cb584598aa43a87b44428f0 -Author: Fabio M. Di Nitto -Date: Mon Aug 12 16:52:59 2019 +0200 - - [PMTUd] rework the whole math to calculate MTU - - internal changes: - - drop the concept of sec_header_size that was completely wrong - and unnecessary - - bump crypto API to version 3 due to the above change - - clarify the difference between link->proto_overhead and - link->status->proto_overhead. We cannot rename the status - one as it would also change ABI. - - add onwire.c with documentation on the packet format - and what various len(s) mean in context. - - add 3 new functions to calculate MTUs back and forth - and use them around, hopefully with enough clarification - on why things are done in a given way. - - heavily change thread_pmtud.c to use those new facilities. - - fix major calculation issues when using crypto (non-crypto - was not affected by the problem). - - fix checks around to make sure they match the new math. - - fix padding calculation. - - add functional PMTUd crypto test - this test can take several hours (12+) and should be executed - on a controlled environment since it automatically changes - loopback MTU to run tests. - - fix way the lowest MTU is calculated during a PMTUd run - to avoid spurious double notifications. - - drop redundant checks. - - user visible changes: - - Global MTU is now calculated properly when using crypto - and values will be in general bigger than before due - to incorrect padding calculation in the previous implementation. - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libknet/Makefile.am b/libknet/Makefile.am -index d080732..2fa2416 100644 ---- a/libknet/Makefile.am -+++ b/libknet/Makefile.am -@@ -36,6 +36,7 @@ sources = \ - links_acl_loopback.c \ - logging.c \ - netutils.c \ -+ onwire.c \ - threads_common.c \ - threads_dsthandler.c \ - threads_heartbeat.c \ -diff --git a/libknet/crypto.c b/libknet/crypto.c -index 9d6757b..afa4f88 100644 ---- a/libknet/crypto.c -+++ b/libknet/crypto.c -@@ -154,12 +154,14 @@ int crypto_init( - out: - if (!err) { - knet_h->crypto_instance = new; -- knet_h->sec_header_size = new->sec_header_size; - knet_h->sec_block_size = new->sec_block_size; - knet_h->sec_hash_size = new->sec_hash_size; - knet_h->sec_salt_size = new->sec_salt_size; - -- log_debug(knet_h, KNET_SUB_CRYPTO, "security network overhead: %zu", knet_h->sec_header_size); -+ log_debug(knet_h, KNET_SUB_CRYPTO, "Hash size: %zu salt size: %zu block size: %zu", -+ knet_h->sec_hash_size, -+ knet_h->sec_salt_size, -+ knet_h->sec_block_size); - - if (current) { - if (crypto_modules_cmds[current->model].ops->fini != NULL) { -@@ -195,7 +197,6 @@ void crypto_fini( - crypto_modules_cmds[knet_h->crypto_instance->model].ops->fini(knet_h, knet_h->crypto_instance); - } - free(knet_h->crypto_instance); -- knet_h->sec_header_size = 0; - knet_h->sec_block_size = 0; - knet_h->sec_hash_size = 0; - knet_h->sec_salt_size = 0; -diff --git a/libknet/crypto_model.h b/libknet/crypto_model.h -index 70f6238..b05e49c 100644 ---- a/libknet/crypto_model.h -+++ b/libknet/crypto_model.h -@@ -14,13 +14,12 @@ - struct crypto_instance { - int model; - void *model_instance; -- size_t sec_header_size; - size_t sec_block_size; - size_t sec_hash_size; - size_t sec_salt_size; - }; - --#define KNET_CRYPTO_MODEL_ABI 2 -+#define KNET_CRYPTO_MODEL_ABI 3 - - /* - * see compress_model.h for explanation of the various lib related functions -diff --git a/libknet/crypto_nss.c b/libknet/crypto_nss.c -index 330b40c..c624a47 100644 ---- a/libknet/crypto_nss.c -+++ b/libknet/crypto_nss.c -@@ -801,10 +801,7 @@ static int nsscrypto_init( - goto out_err; - } - -- crypto_instance->sec_header_size = 0; -- - if (nsscrypto_instance->crypto_hash_type > 0) { -- crypto_instance->sec_header_size += nsshash_len[nsscrypto_instance->crypto_hash_type]; - crypto_instance->sec_hash_size = nsshash_len[nsscrypto_instance->crypto_hash_type]; - } - -@@ -821,8 +818,6 @@ static int nsscrypto_init( - } - } - -- crypto_instance->sec_header_size += (block_size * 2); -- crypto_instance->sec_header_size += SALT_SIZE; - crypto_instance->sec_salt_size = SALT_SIZE; - crypto_instance->sec_block_size = block_size; - } -diff --git a/libknet/crypto_openssl.c b/libknet/crypto_openssl.c -index 0cbc6f5..6571498 100644 ---- a/libknet/crypto_openssl.c -+++ b/libknet/crypto_openssl.c -@@ -566,11 +566,8 @@ static int opensslcrypto_init( - memmove(opensslcrypto_instance->private_key, knet_handle_crypto_cfg->private_key, knet_handle_crypto_cfg->private_key_len); - opensslcrypto_instance->private_key_len = knet_handle_crypto_cfg->private_key_len; - -- crypto_instance->sec_header_size = 0; -- - if (opensslcrypto_instance->crypto_hash_type) { - crypto_instance->sec_hash_size = EVP_MD_size(opensslcrypto_instance->crypto_hash_type); -- crypto_instance->sec_header_size += crypto_instance->sec_hash_size; - } - - if (opensslcrypto_instance->crypto_cipher_type) { -@@ -578,8 +575,6 @@ static int opensslcrypto_init( - - block_size = EVP_CIPHER_block_size(opensslcrypto_instance->crypto_cipher_type); - -- crypto_instance->sec_header_size += (block_size * 2); -- crypto_instance->sec_header_size += SALT_SIZE; - crypto_instance->sec_salt_size = SALT_SIZE; - crypto_instance->sec_block_size = block_size; - } -diff --git a/libknet/internals.h b/libknet/internals.h -index 3f105a1..31840e4 100644 ---- a/libknet/internals.h -+++ b/libknet/internals.h -@@ -71,7 +71,9 @@ struct knet_link { - uint8_t received_pong; - struct timespec ping_last; - /* used by PMTUD thread as temp per-link variables and should always contain the onwire_len value! */ -- uint32_t proto_overhead; -+ uint32_t proto_overhead; /* IP + UDP/SCTP overhead. NOT to be confused -+ with stats.proto_overhead that includes also knet headers -+ and crypto headers */ - struct timespec pmtud_last; - uint32_t last_ping_size; - uint32_t last_good_mtu; -@@ -197,7 +199,6 @@ struct knet_handle { - int pmtud_forcerun; - int pmtud_abort; - struct crypto_instance *crypto_instance; -- size_t sec_header_size; - size_t sec_block_size; - size_t sec_hash_size; - size_t sec_salt_size; -diff --git a/libknet/links.c b/libknet/links.c -index 51ead5a..03e0af9 100644 ---- a/libknet/links.c -+++ b/libknet/links.c -@@ -265,7 +265,32 @@ int knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t l - host->status.reachable = 1; - link->status.mtu = KNET_PMTUD_SIZE_V6; - } else { -- link->status.mtu = KNET_PMTUD_MIN_MTU_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size; -+ /* -+ * calculate the minimum MTU that is safe to use, -+ * based on RFCs and that each network device should -+ * be able to support without any troubles -+ */ -+ if (link->dynamic == KNET_LINK_STATIC) { -+ /* -+ * with static link we can be more precise than using -+ * the generic calc_min_mtu() -+ */ -+ switch (link->dst_addr.ss_family) { -+ case AF_INET6: -+ link->status.mtu = calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V6 - (KNET_PMTUD_OVERHEAD_V6 + link->proto_overhead)); -+ break; -+ case AF_INET: -+ link->status.mtu = calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V4 - (KNET_PMTUD_OVERHEAD_V4 + link->proto_overhead)); -+ break; -+ } -+ } else { -+ /* -+ * for dynamic links we start with the minimum MTU -+ * possible and PMTUd will kick in immediately -+ * after connection status is 1 -+ */ -+ link->status.mtu = calc_min_mtu(knet_h); -+ } - link->has_valid_mtu = 1; - } - -diff --git a/libknet/onwire.c b/libknet/onwire.c -new file mode 100644 -index 0000000..143ac4b ---- /dev/null -+++ b/libknet/onwire.c -@@ -0,0 +1,127 @@ -+/* -+ * Copyright (C) 2019 Red Hat, Inc. All rights reserved. -+ * -+ * Author: Fabio M. Di Nitto -+ * -+ * This software licensed under LGPL-2.0+ -+ */ -+ -+#include "config.h" -+ -+#include -+#include -+#include -+ -+#include "crypto.h" -+#include "internals.h" -+#include "logging.h" -+#include "common.h" -+#include "transport_udp.h" -+#include "transport_sctp.h" -+ -+/* -+ * unencrypted packet looks like: -+ * -+ * | ip | protocol | knet_header | unencrypted data | -+ * | onwire_len | -+ * | proto_overhead | -+ * | data_len | -+ * | app MTU | -+ * -+ * encrypted packet looks like (not to scale): -+ * -+ * | ip | protocol | salt | crypto(knet_header | data) | crypto_data_pad | hash | -+ * | onwire_len | -+ * | proto_overhead | -+ * | data_len | -+ * | app MTU | -+ * -+ * knet_h->sec_block_size is >= 0 if encryption will pad the data -+ * knet_h->sec_salt_size is >= 0 if encryption is enabled -+ * knet_h->sec_hash_size is >= 0 if signing is enabled -+ */ -+ -+/* -+ * this function takes in the data that we would like to send -+ * and tells us the outgoing onwire data size with crypto and -+ * all the headers adjustment. -+ * calling thread needs to account for protocol overhead. -+ */ -+ -+size_t calc_data_outlen(knet_handle_t knet_h, size_t inlen) -+{ -+ size_t outlen = inlen, pad_len = 0; -+ -+ if (knet_h->sec_block_size) { -+ /* -+ * if the crypto mechanism requires padding, calculate the padding -+ * and add it back to outlen because that's what the crypto layer -+ * would do. -+ */ -+ pad_len = knet_h->sec_block_size - (outlen % knet_h->sec_block_size); -+ -+ outlen = outlen + pad_len; -+ } -+ -+ return outlen + knet_h->sec_salt_size + knet_h->sec_hash_size; -+} -+ -+/* -+ * this function takes in the data that we would like to send -+ * and tells us what is the real maximum data we can send -+ * accounting for headers and crypto -+ * calling thread needs to account for protocol overhead. -+ */ -+ -+size_t calc_max_data_outlen(knet_handle_t knet_h, size_t inlen) -+{ -+ size_t outlen = inlen, pad_len = 0; -+ -+ if (knet_h->sec_block_size) { -+ /* -+ * drop both salt and hash, that leaves only the crypto data and padding -+ * we need to calculate the padding based on the real encrypted data -+ * that includes the knet_header. -+ */ -+ outlen = outlen - (knet_h->sec_salt_size + knet_h->sec_hash_size); -+ -+ /* -+ * if the crypto mechanism requires padding, calculate the padding -+ * and remove it, to align the data. -+ * NOTE: we need to remove pad_len + 1 because, based on testing, -+ * if we send data that are already aligned to block_size, the -+ * crypto implementations will add another block_size! -+ * so we want to make sure that our data won't add an unnecessary -+ * block_size that we need to remove later. -+ */ -+ pad_len = outlen % knet_h->sec_block_size; -+ -+ outlen = outlen - (pad_len + 1); -+ -+ /* -+ * add both hash and salt size back, similar to padding above, -+ * the crypto layer will add them to the outlen -+ */ -+ outlen = outlen + (knet_h->sec_salt_size + knet_h->sec_hash_size); -+ } -+ -+ /* -+ * drop KNET_HEADER_ALL_SIZE to provide a clean application MTU -+ * and various crypto headers -+ */ -+ outlen = outlen - (KNET_HEADER_ALL_SIZE + knet_h->sec_salt_size + knet_h->sec_hash_size); -+ -+ return outlen; -+} -+ -+/* -+ * set the lowest possible value as failsafe for all links. -+ * KNET_PMTUD_MIN_MTU_V4 < KNET_PMTUD_MIN_MTU_V6 -+ * KNET_PMTUD_OVERHEAD_V6 > KNET_PMTUD_OVERHEAD_V4 -+ * KNET_PMTUD_SCTP_OVERHEAD > KNET_PMTUD_UDP_OVERHEAD -+ */ -+ -+size_t calc_min_mtu(knet_handle_t knet_h) -+{ -+ return calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V4 - (KNET_PMTUD_OVERHEAD_V6 + KNET_PMTUD_SCTP_OVERHEAD)); -+} -diff --git a/libknet/onwire.h b/libknet/onwire.h -index 9815bc3..74d4d09 100644 ---- a/libknet/onwire.h -+++ b/libknet/onwire.h -@@ -120,7 +120,9 @@ struct knet_header_payload_ping { - #define KNET_PMTUD_SIZE_V4 65535 - #define KNET_PMTUD_SIZE_V6 KNET_PMTUD_SIZE_V4 - --/* These two get the protocol-specific overheads added to them */ -+/* -+ * IPv4/IPv6 header size -+ */ - #define KNET_PMTUD_OVERHEAD_V4 20 - #define KNET_PMTUD_OVERHEAD_V6 40 - -@@ -199,4 +201,8 @@ struct knet_header { - #define KNET_HEADER_PMTUD_SIZE (KNET_HEADER_SIZE + sizeof(struct knet_header_payload_pmtud)) - #define KNET_HEADER_DATA_SIZE (KNET_HEADER_SIZE + sizeof(struct knet_header_payload_data)) - -+size_t calc_data_outlen(knet_handle_t knet_h, size_t inlen); -+size_t calc_max_data_outlen(knet_handle_t knet_h, size_t inlen); -+size_t calc_min_mtu(knet_handle_t knet_h); -+ - #endif -diff --git a/libknet/tests/Makefile.am b/libknet/tests/Makefile.am -index 3346596..9160780 100644 ---- a/libknet/tests/Makefile.am -+++ b/libknet/tests/Makefile.am -@@ -38,6 +38,12 @@ int_checks = \ - - fun_checks = - -+# checks below need to be executed manually -+# or with a specifi environment -+ -+long_run_checks = \ -+ fun_pmtud_crypto_test -+ - benchmarks = \ - knet_bench_test - -@@ -45,6 +51,7 @@ noinst_PROGRAMS = \ - api_knet_handle_new_limit_test \ - pckt_test \ - $(benchmarks) \ -+ $(long_run_checks) \ - $(check_PROGRAMS) - - noinst_SCRIPTS = \ -@@ -71,6 +78,7 @@ int_links_acl_ip_test_SOURCES = int_links_acl_ip.c \ - ../logging.c \ - ../netutils.c \ - ../threads_common.c \ -+ ../onwire.c \ - ../transports.c \ - ../transport_common.c \ - ../transport_loopback.c \ -@@ -88,4 +96,9 @@ knet_bench_test_SOURCES = knet_bench.c \ - ../logging.c \ - ../compat.c \ - ../transport_common.c \ -- ../threads_common.c -+ ../threads_common.c \ -+ ../onwire.c -+ -+fun_pmtud_crypto_test_SOURCES = fun_pmtud_crypto.c \ -+ test-common.c \ -+ ../onwire.c -diff --git a/libknet/tests/api_knet_send_crypto.c b/libknet/tests/api_knet_send_crypto.c -index 11de857..5fc5463 100644 ---- a/libknet/tests/api_knet_send_crypto.c -+++ b/libknet/tests/api_knet_send_crypto.c -@@ -67,7 +67,7 @@ static void test(const char *model) - memset(&knet_handle_crypto_cfg, 0, sizeof(struct knet_handle_crypto_cfg)); - strncpy(knet_handle_crypto_cfg.crypto_model, model, sizeof(knet_handle_crypto_cfg.crypto_model) - 1); - strncpy(knet_handle_crypto_cfg.crypto_cipher_type, "aes128", sizeof(knet_handle_crypto_cfg.crypto_cipher_type) - 1); -- strncpy(knet_handle_crypto_cfg.crypto_hash_type, "sha1", sizeof(knet_handle_crypto_cfg.crypto_hash_type) - 1); -+ strncpy(knet_handle_crypto_cfg.crypto_hash_type, "sha256", sizeof(knet_handle_crypto_cfg.crypto_hash_type) - 1); - knet_handle_crypto_cfg.private_key_len = 2000; - - if (knet_handle_crypto(knet_h, &knet_handle_crypto_cfg)) { -diff --git a/libknet/tests/fun_pmtud_crypto.c b/libknet/tests/fun_pmtud_crypto.c -new file mode 100644 -index 0000000..91c062c ---- /dev/null -+++ b/libknet/tests/fun_pmtud_crypto.c -@@ -0,0 +1,326 @@ -+/* -+ * Copyright (C) 2019 Red Hat, Inc. All rights reserved. -+ * -+ * Authors: Fabio M. Di Nitto -+ * -+ * This software licensed under GPL-2.0+ -+ */ -+ -+#include "config.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "libknet.h" -+ -+#include "compress.h" -+#include "internals.h" -+#include "netutils.h" -+#include "onwire.h" -+#include "test-common.h" -+ -+static int private_data; -+ -+static void sock_notify(void *pvt_data, -+ int datafd, -+ int8_t channel, -+ uint8_t tx_rx, -+ int error, -+ int errorno) -+{ -+ return; -+} -+ -+static int iface_fd = 0; -+static int default_mtu = 0; -+ -+#ifdef KNET_LINUX -+const char *loopback = "lo"; -+#endif -+#ifdef KNET_BSD -+const char *loopback = "lo0"; -+#endif -+ -+static int fd_init(void) -+{ -+#ifdef KNET_LINUX -+ return socket(AF_INET, SOCK_STREAM, 0); -+#endif -+#ifdef KNET_BSD -+ return socket(AF_LOCAL, SOCK_DGRAM, 0); -+#endif -+ return -1; -+} -+ -+static int set_iface_mtu(uint32_t mtu) -+{ -+ int err = 0; -+ struct ifreq ifr; -+ -+ memset(&ifr, 0, sizeof(struct ifreq)); -+ strncpy(ifr.ifr_name, loopback, IFNAMSIZ - 1); -+ ifr.ifr_mtu = mtu; -+ -+ err = ioctl(iface_fd, SIOCSIFMTU, &ifr); -+ -+ return err; -+} -+ -+static int get_iface_mtu(void) -+{ -+ int err = 0, savederrno = 0; -+ struct ifreq ifr; -+ -+ memset(&ifr, 0, sizeof(struct ifreq)); -+ strncpy(ifr.ifr_name, loopback, IFNAMSIZ - 1); -+ -+ err = ioctl(iface_fd, SIOCGIFMTU, &ifr); -+ if (err) { -+ savederrno = errno; -+ goto out_clean; -+ } -+ -+ err = ifr.ifr_mtu; -+ -+out_clean: -+ errno = savederrno; -+ return err; -+} -+ -+static int exit_local(int code) -+{ -+ set_iface_mtu(default_mtu); -+ close(iface_fd); -+ iface_fd = 0; -+ exit(code); -+} -+ -+static void test_mtu(const char *model, const char *crypto, const char *hash) -+{ -+ knet_handle_t knet_h; -+ int logfds[2]; -+ int datafd = 0; -+ int8_t channel = 0; -+ struct sockaddr_storage lo; -+ struct knet_handle_crypto_cfg knet_handle_crypto_cfg; -+ unsigned int data_mtu, expected_mtu; -+ size_t calculated_iface_mtu = 0, detected_iface_mtu = 0; -+ -+ if (make_local_sockaddr(&lo, 0) < 0) { -+ printf("Unable to convert loopback to sockaddr: %s\n", strerror(errno)); -+ exit_local(FAIL); -+ } -+ -+ setup_logpipes(logfds); -+ -+ knet_h = knet_handle_start(logfds, KNET_LOG_DEBUG); -+ -+ flush_logs(logfds[0], stdout); -+ -+ printf("Test knet_send with %s and valid data\n", model); -+ -+ memset(&knet_handle_crypto_cfg, 0, sizeof(struct knet_handle_crypto_cfg)); -+ strncpy(knet_handle_crypto_cfg.crypto_model, model, sizeof(knet_handle_crypto_cfg.crypto_model) - 1); -+ strncpy(knet_handle_crypto_cfg.crypto_cipher_type, crypto, sizeof(knet_handle_crypto_cfg.crypto_cipher_type) - 1); -+ strncpy(knet_handle_crypto_cfg.crypto_hash_type, hash, sizeof(knet_handle_crypto_cfg.crypto_hash_type) - 1); -+ knet_handle_crypto_cfg.private_key_len = 2000; -+ -+ if (knet_handle_crypto(knet_h, &knet_handle_crypto_cfg)) { -+ printf("knet_handle_crypto failed with correct config: %s\n", strerror(errno)); -+ knet_handle_free(knet_h); -+ flush_logs(logfds[0], stdout); -+ close_logpipes(logfds); -+ exit_local(FAIL); -+ } -+ -+ if (knet_handle_enable_sock_notify(knet_h, &private_data, sock_notify) < 0) { -+ printf("knet_handle_enable_sock_notify failed: %s\n", strerror(errno)); -+ knet_handle_free(knet_h); -+ flush_logs(logfds[0], stdout); -+ close_logpipes(logfds); -+ exit_local(FAIL); -+ } -+ -+ datafd = 0; -+ channel = -1; -+ -+ if (knet_handle_add_datafd(knet_h, &datafd, &channel) < 0) { -+ printf("knet_handle_add_datafd failed: %s\n", strerror(errno)); -+ knet_handle_free(knet_h); -+ flush_logs(logfds[0], stdout); -+ close_logpipes(logfds); -+ exit_local(FAIL); -+ } -+ -+ if (knet_host_add(knet_h, 1) < 0) { -+ printf("knet_host_add failed: %s\n", strerror(errno)); -+ knet_handle_free(knet_h); -+ flush_logs(logfds[0], stdout); -+ close_logpipes(logfds); -+ exit_local(FAIL); -+ } -+ -+ if (knet_link_set_config(knet_h, 1, 0, KNET_TRANSPORT_UDP, &lo, &lo, 0) < 0) { -+ printf("Unable to configure link: %s\n", strerror(errno)); -+ knet_host_remove(knet_h, 1); -+ knet_handle_free(knet_h); -+ flush_logs(logfds[0], stdout); -+ close_logpipes(logfds); -+ exit_local(FAIL); -+ } -+ -+ if (knet_link_set_pong_count(knet_h, 1, 0, 1) < 0) { -+ printf("knet_link_set_pong_count failed: %s\n", strerror(errno)); -+ knet_host_remove(knet_h, 1); -+ knet_handle_free(knet_h); -+ flush_logs(logfds[0], stdout); -+ close_logpipes(logfds); -+ exit_local(FAIL); -+ } -+ -+ if (knet_link_set_enable(knet_h, 1, 0, 1) < 0) { -+ printf("knet_link_set_enable failed: %s\n", strerror(errno)); -+ knet_link_clear_config(knet_h, 1, 0); -+ knet_host_remove(knet_h, 1); -+ knet_handle_free(knet_h); -+ flush_logs(logfds[0], stdout); -+ close_logpipes(logfds); -+ exit_local(FAIL); -+ } -+ -+ if (wait_for_host(knet_h, 1, 4, logfds[0], stdout) < 0) { -+ printf("timeout waiting for host to be reachable"); -+ knet_link_set_enable(knet_h, 1, 0, 0); -+ knet_link_clear_config(knet_h, 1, 0); -+ knet_host_remove(knet_h, 1); -+ knet_handle_free(knet_h); -+ flush_logs(logfds[0], stdout); -+ close_logpipes(logfds); -+ exit_local(FAIL); -+ } -+ -+ flush_logs(logfds[0], stdout); -+ -+ if (knet_handle_pmtud_get(knet_h, &data_mtu) < 0) { -+ printf("knet_handle_pmtud_get failed error: %s\n", strerror(errno)); -+ knet_link_set_enable(knet_h, 1, 0, 0); -+ knet_link_clear_config(knet_h, 1, 0); -+ knet_host_remove(knet_h, 1); -+ knet_handle_free(knet_h); -+ flush_logs(logfds[0], stdout); -+ close_logpipes(logfds); -+ exit_local(FAIL); -+ } -+ -+ calculated_iface_mtu = calc_data_outlen(knet_h, data_mtu + KNET_HEADER_ALL_SIZE) + 28; -+ detected_iface_mtu = get_iface_mtu(); -+ /* -+ * 28 = 20 IP header + 8 UDP header -+ */ -+ expected_mtu = calc_max_data_outlen(knet_h, detected_iface_mtu - 28); -+ -+ if (expected_mtu != data_mtu) { -+ printf("Wrong MTU detected! interface mtu: %zu knet mtu: %u expected mtu: %u\n", detected_iface_mtu, data_mtu, expected_mtu); -+ knet_link_set_enable(knet_h, 1, 0, 0); -+ knet_link_clear_config(knet_h, 1, 0); -+ knet_host_remove(knet_h, 1); -+ knet_handle_free(knet_h); -+ flush_logs(logfds[0], stdout); -+ close_logpipes(logfds); -+ exit_local(FAIL); -+ } -+ -+ if ((detected_iface_mtu - calculated_iface_mtu) >= knet_h->sec_block_size) { -+ printf("Wrong MTU detected! real iface mtu: %zu calculated: %zu\n", detected_iface_mtu, calculated_iface_mtu); -+ knet_link_set_enable(knet_h, 1, 0, 0); -+ knet_link_clear_config(knet_h, 1, 0); -+ knet_host_remove(knet_h, 1); -+ knet_handle_free(knet_h); -+ flush_logs(logfds[0], stdout); -+ close_logpipes(logfds); -+ exit_local(FAIL); -+ } -+ -+ knet_link_set_enable(knet_h, 1, 0, 0); -+ knet_link_clear_config(knet_h, 1, 0); -+ knet_host_remove(knet_h, 1); -+ knet_handle_free(knet_h); -+ flush_logs(logfds[0], stdout); -+ close_logpipes(logfds); -+} -+ -+static void test(const char *model, const char *crypto, const char *hash) -+{ -+ int i = 576; -+ int max = 65535; -+ -+ while (i <= max) { -+ printf("Setting interface MTU to: %i\n", i); -+ set_iface_mtu(i); -+ test_mtu(model, crypto, hash); -+ if (i == max) { -+ break; -+ } -+ i = i + 15; -+ if (i > max) { -+ i = max; -+ } -+ } -+} -+ -+int main(int argc, char *argv[]) -+{ -+ struct knet_crypto_info crypto_list[16]; -+ size_t crypto_list_entries; -+ -+#ifdef KNET_BSD -+ if (is_memcheck() || is_helgrind()) { -+ printf("valgrind-freebsd cannot run this test properly. Skipping\n"); -+ return SKIP; -+ } -+#endif -+ -+ if (geteuid() != 0) { -+ printf("This test requires root privileges\n"); -+ return SKIP; -+ } -+ -+ iface_fd = fd_init(); -+ if (iface_fd < 0) { -+ printf("fd_init failed: %s\n", strerror(errno)); -+ return FAIL; -+ } -+ -+ default_mtu = get_iface_mtu(); -+ if (default_mtu < 0) { -+ printf("get_iface_mtu failed: %s\n", strerror(errno)); -+ return FAIL; -+ } -+ -+ memset(crypto_list, 0, sizeof(crypto_list)); -+ -+ if (knet_get_crypto_list(crypto_list, &crypto_list_entries) < 0) { -+ printf("knet_get_crypto_list failed: %s\n", strerror(errno)); -+ return FAIL; -+ } -+ -+ if (crypto_list_entries == 0) { -+ printf("no crypto modules detected. Skipping\n"); -+ return SKIP; -+ } -+ -+ test(crypto_list[0].name, "aes128", "sha1"); -+ test(crypto_list[0].name, "aes128", "sha256"); -+ test(crypto_list[0].name, "aes256", "sha1"); -+ test(crypto_list[0].name, "aes256", "sha256"); -+ -+ exit_local(PASS); -+} -diff --git a/libknet/threads_common.c b/libknet/threads_common.c -index 1f3e1e3..03edfc4 100644 ---- a/libknet/threads_common.c -+++ b/libknet/threads_common.c -@@ -161,7 +161,7 @@ void force_pmtud_run(knet_handle_t knet_h, uint8_t subsystem, uint8_t reset_mtu) - { - if (reset_mtu) { - log_debug(knet_h, subsystem, "PMTUd has been reset to default"); -- knet_h->data_mtu = KNET_PMTUD_MIN_MTU_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size; -+ knet_h->data_mtu = calc_min_mtu(knet_h); - if (knet_h->pmtud_notify_fn) { - knet_h->pmtud_notify_fn(knet_h->pmtud_notify_fn_private_data, - knet_h->data_mtu); -diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c -index 1a19806..1dd1788 100644 ---- a/libknet/threads_pmtud.c -+++ b/libknet/threads_pmtud.c -@@ -25,16 +25,16 @@ - static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link) - { - int err, ret, savederrno, mutex_retry_limit, failsafe, use_kernel_mtu, warn_once; -- uint32_t kernel_mtu; /* record kernel_mtu from EMSGSIZE */ -- size_t onwire_len; /* current packet onwire size */ -- size_t overhead_len; /* onwire packet overhead (protocol based) */ -- size_t max_mtu_len; /* max mtu for protocol */ -- size_t data_len; /* how much data we can send in the packet -- * generally would be onwire_len - overhead_len -- * needs to be adjusted for crypto -- */ -- size_t pad_len; /* crypto packet pad size, needs to move into crypto.c callbacks */ -- ssize_t len; /* len of what we were able to sendto onwire */ -+ uint32_t kernel_mtu; /* record kernel_mtu from EMSGSIZE */ -+ size_t onwire_len; /* current packet onwire size */ -+ size_t ipproto_overhead_len; /* onwire packet overhead (protocol based) */ -+ size_t max_mtu_len; /* max mtu for protocol */ -+ size_t data_len; /* how much data we can send in the packet -+ * generally would be onwire_len - ipproto_overhead_len -+ * needs to be adjusted for crypto -+ */ -+ size_t app_mtu_len; /* real data that we can send onwire */ -+ ssize_t len; /* len of what we were able to sendto onwire */ - - struct timespec ts; - unsigned long long pong_timeout_adj_tmp; -@@ -45,20 +45,16 @@ static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_ - mutex_retry_limit = 0; - failsafe = 0; - -- dst_link->last_bad_mtu = 0; -- - knet_h->pmtudbuf->khp_pmtud_link = dst_link->link_id; - - switch (dst_link->dst_addr.ss_family) { - case AF_INET6: - max_mtu_len = KNET_PMTUD_SIZE_V6; -- overhead_len = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead; -- dst_link->last_good_mtu = dst_link->last_ping_size + overhead_len; -+ ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead; - break; - case AF_INET: - max_mtu_len = KNET_PMTUD_SIZE_V4; -- overhead_len = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead; -- dst_link->last_good_mtu = dst_link->last_ping_size + overhead_len; -+ ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead; - break; - default: - log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD aborted, unknown protocol"); -@@ -66,6 +62,9 @@ static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_ - break; - } - -+ dst_link->last_bad_mtu = 0; -+ dst_link->last_good_mtu = dst_link->last_ping_size + ipproto_overhead_len; -+ - /* - * discovery starts from the top because kernel will - * refuse to send packets > current iface mtu. -@@ -92,107 +91,39 @@ restart: - } - - /* -- * unencrypted packet looks like: -- * -- * | ip | protocol | knet_header | unencrypted data | -- * | onwire_len | -- * | overhead_len | -- * | data_len | -- * | app MTU | -- * -- * encrypted packet looks like (not to scale): -- * -- * | ip | protocol | salt | crypto(knet_header | data) | crypto_data_pad | hash | -- * | onwire_len | -- * | overhead_len | -- * | data_len | -- * | app MTU | -- * -- * knet_h->sec_block_size is >= 0 if encryption will pad the data -- * knet_h->sec_salt_size is >= 0 if encryption is enabled -- * knet_h->sec_hash_size is >= 0 if signing is enabled -+ * common to all packets - */ - - /* -- * common to all packets -+ * calculate the application MTU based on current onwire_len minus ipproto_overhead_len - */ -- data_len = onwire_len - overhead_len; -- -- if (knet_h->crypto_instance) { - --realign: -- if (knet_h->sec_block_size) { -+ app_mtu_len = calc_max_data_outlen(knet_h, onwire_len - ipproto_overhead_len); - -- /* -- * drop both salt and hash, that leaves only the crypto data and padding -- * we need to calculate the padding based on the real encrypted data. -- */ -- data_len = data_len - (knet_h->sec_salt_size + knet_h->sec_hash_size); -- -- /* -- * if the crypto mechanism requires padding, calculate the padding -- * and add it back to data_len because that's what the crypto layer -- * would do. -- */ -- pad_len = knet_h->sec_block_size - (data_len % knet_h->sec_block_size); -- -- /* -- * if are at the boundary, reset padding -- */ -- if (pad_len == knet_h->sec_block_size) { -- pad_len = 0; -- } -- data_len = data_len + pad_len; -- -- /* -- * if our current data_len is higher than max_mtu_len -- * then we need to reduce by padding size (that is our -- * increment / decrement value) -- * -- * this generally happens only on the first PMTUd run -- */ -- while (data_len + overhead_len >= max_mtu_len) { -- data_len = data_len - knet_h->sec_block_size; -- } -+ /* -+ * recalculate onwire len back that might be different based -+ * on data padding from crypto layer. -+ */ - -- /* -- * add both hash and salt size back, similar to padding above, -- * the crypto layer will add them to the data_len -- */ -- data_len = data_len + (knet_h->sec_salt_size + knet_h->sec_hash_size); -- } -+ onwire_len = calc_data_outlen(knet_h, app_mtu_len + KNET_HEADER_ALL_SIZE) + ipproto_overhead_len; - -- if (dst_link->last_bad_mtu) { -- if (data_len + overhead_len >= dst_link->last_bad_mtu) { -- /* -- * reduce data_len to something lower than last_bad_mtu, overhead_len -- * and sec_block_size (decrementing step) - 1 (granularity) -- */ -- data_len = dst_link->last_bad_mtu - overhead_len - knet_h->sec_block_size - 1; -- if (knet_h->sec_block_size) { -- /* -- * make sure that data_len is aligned to the sec_block_size boundary -- */ -- goto realign; -- } -- } -- } -+ /* -+ * calculate the size of what we need to send to sendto(2). -+ * see also onwire.c for packet format explanation. -+ */ -+ data_len = app_mtu_len + knet_h->sec_hash_size + knet_h->sec_salt_size + KNET_HEADER_ALL_SIZE; - -- if (data_len < (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size) + 1) { -+ if (knet_h->crypto_instance) { -+ if (data_len < (knet_h->sec_hash_size + knet_h->sec_salt_size) + 1) { - log_debug(knet_h, KNET_SUB_PMTUD, "Aborting PMTUD process: link mtu smaller than crypto header detected (link might have been disconnected)"); - return -1; - } - -- /* -- * recalculate onwire_len based on crypto information -- * and place it in the PMTUd packet info -- */ -- onwire_len = data_len + overhead_len; - knet_h->pmtudbuf->khp_pmtud_size = onwire_len; - - if (crypto_encrypt_and_sign(knet_h, - (const unsigned char *)knet_h->pmtudbuf, -- data_len - (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size), -+ data_len - (knet_h->sec_hash_size + knet_h->sec_salt_size), - knet_h->pmtudbuf_crypt, - (ssize_t *)&data_len) < 0) { - log_debug(knet_h, KNET_SUB_PMTUD, "Unable to crypto pmtud packet"); -@@ -201,11 +132,8 @@ realign: - - outbuf = knet_h->pmtudbuf_crypt; - knet_h->stats_extra.tx_crypt_pmtu_packets++; -- - } else { -- - knet_h->pmtudbuf->khp_pmtud_size = onwire_len; -- - } - - /* link has gone down, aborting pmtud */ -@@ -417,7 +345,7 @@ retry: - /* - * account for IP overhead, knet headers and crypto in PMTU calculation - */ -- dst_link->status.mtu = onwire_len - dst_link->status.proto_overhead; -+ dst_link->status.mtu = calc_max_data_outlen(knet_h, onwire_len - ipproto_overhead_len); - pthread_mutex_unlock(&knet_h->pmtud_mutex); - return 0; - } -@@ -437,7 +365,7 @@ retry: - goto restart; - } - --static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link, unsigned int *min_mtu, int force_run) -+static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link, int force_run) - { - uint8_t saved_valid_pmtud; - unsigned int saved_pmtud; -@@ -455,17 +383,22 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, - timespec_diff(dst_link->pmtud_last, clock_now, &diff_pmtud); - - if (diff_pmtud < interval) { -- *min_mtu = dst_link->status.mtu; - return dst_link->has_valid_mtu; - } - } - -+ /* -+ * status.proto_overhead should include all IP/(UDP|SCTP)/knet headers -+ * -+ * please note that it is not the same as link->proto_overhead that -+ * includes only either UDP or SCTP (at the moment) overhead. -+ */ - switch (dst_link->dst_addr.ss_family) { - case AF_INET6: -- dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_header_size; -+ dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_hash_size + knet_h->sec_salt_size; - break; - case AF_INET: -- dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_header_size; -+ dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_hash_size + knet_h->sec_salt_size; - break; - } - -@@ -486,26 +419,6 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, - dst_link->has_valid_mtu = 0; - } else { - dst_link->has_valid_mtu = 1; -- switch (dst_link->dst_addr.ss_family) { -- case AF_INET6: -- if (((dst_link->status.mtu + dst_link->status.proto_overhead) < KNET_PMTUD_MIN_MTU_V6) || -- ((dst_link->status.mtu + dst_link->status.proto_overhead) > KNET_PMTUD_SIZE_V6)) { -- log_debug(knet_h, KNET_SUB_PMTUD, -- "PMTUD detected an IPv6 MTU out of bound value (%u) for host: %u link: %u.", -- dst_link->status.mtu + dst_link->status.proto_overhead, dst_host->host_id, dst_link->link_id); -- dst_link->has_valid_mtu = 0; -- } -- break; -- case AF_INET: -- if (((dst_link->status.mtu + dst_link->status.proto_overhead) < KNET_PMTUD_MIN_MTU_V4) || -- ((dst_link->status.mtu + dst_link->status.proto_overhead) > KNET_PMTUD_SIZE_V4)) { -- log_debug(knet_h, KNET_SUB_PMTUD, -- "PMTUD detected an IPv4 MTU out of bound value (%u) for host: %u link: %u.", -- dst_link->status.mtu + dst_link->status.proto_overhead, dst_host->host_id, dst_link->link_id); -- dst_link->has_valid_mtu = 0; -- } -- break; -- } - if (dst_link->has_valid_mtu) { - if ((saved_pmtud) && (saved_pmtud != dst_link->status.mtu)) { - log_info(knet_h, KNET_SUB_PMTUD, "PMTUD link change for host: %u link: %u from %u to %u", -@@ -513,9 +426,6 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, - } - log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD completed for host: %u link: %u current link mtu: %u", - dst_host->host_id, dst_link->link_id, dst_link->status.mtu); -- if (dst_link->status.mtu < *min_mtu) { -- *min_mtu = dst_link->status.mtu; -- } - - /* - * set pmtud_last, if we can, after we are done with the PMTUd process -@@ -541,14 +451,14 @@ void *_handle_pmtud_link_thread(void *data) - struct knet_host *dst_host; - struct knet_link *dst_link; - int link_idx; -- unsigned int min_mtu, have_mtu; -+ unsigned int have_mtu; - unsigned int lower_mtu; - int link_has_mtu; - int force_run = 0; - - set_thread_status(knet_h, KNET_THREAD_PMTUD, KNET_THREAD_STARTED); - -- knet_h->data_mtu = KNET_PMTUD_MIN_MTU_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size; -+ knet_h->data_mtu = calc_min_mtu(knet_h); - - /* preparing pmtu buffer */ - knet_h->pmtudbuf->kh_version = KNET_HEADER_VERSION; -@@ -578,7 +488,6 @@ void *_handle_pmtud_link_thread(void *data) - } - - lower_mtu = KNET_PMTUD_SIZE_V4; -- min_mtu = KNET_PMTUD_SIZE_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size; - have_mtu = 0; - - for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) { -@@ -593,14 +502,14 @@ void *_handle_pmtud_link_thread(void *data) - (dst_link->status.dynconnected != 1))) - continue; - -- link_has_mtu = _handle_check_pmtud(knet_h, dst_host, dst_link, &min_mtu, force_run); -+ link_has_mtu = _handle_check_pmtud(knet_h, dst_host, dst_link, force_run); - if (errno == EDEADLK) { - goto out_unlock; - } - if (link_has_mtu) { - have_mtu = 1; -- if (min_mtu < lower_mtu) { -- lower_mtu = min_mtu; -+ if (dst_link->status.mtu < lower_mtu) { -+ lower_mtu = dst_link->status.mtu; - } - } - } -commit 499f589404db791d8e68c84c8ba3a857aeea5083 -Author: Fabio M. Di Nitto -Date: Tue Aug 13 06:41:32 2019 +0200 - - [PMTUd] add dynamic pong timeout when using crypto - - problem originally reported by proxmox community, users - observed that under pressure the MTU would flap back and forth - between 2 values due to other node response timeout. - - implement a dynamic timeout multiplier when using crypto that - should solve the problem in a more flexible fashion. - - When a timeout hits, those new logs will show: - - [knet]: [info] host: host: 1 (passive) best link: 0 (pri: 0) - [knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0 - [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (4) for host 1 link: 0 - [knet]: [info] pmtud: PMTUD link change for host: 1 link: 0 from 469 to 65429 - [knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429 - [knet]: [info] pmtud: Global data MTU changed to: 65429 - [knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0 - [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (8) for host 1 link: 0 - [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (16) for host 1 link: 0 - [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (32) for host 1 link: 0 - [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (64) for host 1 link: 0 - [knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429 - [knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0 - [knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (128) for host 1 link: 0 - [knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429 - - and when the latency reduces and it is safe to be more responsive again: - - [knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0 - [knet]: [debug] pmtud: Decreasing PMTUd response timeout multiplier to (64) for host 1 link: 0 - [knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429 - - .... - - testing this patch on normal hosts is a bit challenging tho. - - Patch was tested by hardcoding a super low timeout. - and using a long running version of api_knet_send_crypto_test with a short PMTUd setfreq (10 sec). - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libknet/internals.h b/libknet/internals.h -index 31840e4..d1a4757 100644 ---- a/libknet/internals.h -+++ b/libknet/internals.h -@@ -80,6 +80,7 @@ struct knet_link { - uint32_t last_bad_mtu; - uint32_t last_sent_mtu; - uint32_t last_recv_mtu; -+ uint32_t pmtud_crypto_timeout_multiplier;/* used by PMTUd to adjust timeouts on high loads */ - uint8_t has_valid_mtu; - }; - -diff --git a/libknet/links.c b/libknet/links.c -index 03e0af9..f7eccc3 100644 ---- a/libknet/links.c -+++ b/libknet/links.c -@@ -219,6 +219,7 @@ int knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t l - } - } - -+ link->pmtud_crypto_timeout_multiplier = KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN; - link->pong_count = KNET_LINK_DEFAULT_PONG_COUNT; - link->has_valid_mtu = 0; - link->ping_interval = KNET_LINK_DEFAULT_PING_INTERVAL * 1000; /* microseconds */ -diff --git a/libknet/links.h b/libknet/links.h -index e14958d..c8ca610 100644 ---- a/libknet/links.h -+++ b/libknet/links.h -@@ -30,6 +30,16 @@ - */ - #define KNET_LINK_PONG_TIMEOUT_LAT_MUL 2 - -+/* -+ * under heavy load with crypto enabled, it takes much -+ * longer time to receive a response from the other node. -+ * -+ * 128 is somewhat arbitrary number but we want to set a limit -+ * and report failures after that. -+ */ -+#define KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN 2 -+#define KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MAX 128 -+ - int _link_updown(knet_handle_t knet_h, knet_node_id_t node_id, uint8_t link_id, - unsigned int enabled, unsigned int connected); - -diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c -index 1dd1788..d342697 100644 ---- a/libknet/threads_pmtud.c -+++ b/libknet/threads_pmtud.c -@@ -36,8 +36,9 @@ static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_ - size_t app_mtu_len; /* real data that we can send onwire */ - ssize_t len; /* len of what we were able to sendto onwire */ - -- struct timespec ts; -- unsigned long long pong_timeout_adj_tmp; -+ struct timespec ts, pmtud_crypto_start_ts, pmtud_crypto_stop_ts; -+ unsigned long long pong_timeout_adj_tmp, timediff; -+ int pmtud_crypto_reduce = 1; - unsigned char *outbuf = (unsigned char *)knet_h->pmtudbuf; - - warn_once = 0; -@@ -242,6 +243,15 @@ retry: - return -1; - } - -+ /* -+ * non fatal, we can wait the next round to reduce the -+ * multiplier -+ */ -+ if (clock_gettime(CLOCK_MONOTONIC, &pmtud_crypto_start_ts) < 0) { -+ log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get current time: %s", strerror(errno)); -+ pmtud_crypto_reduce = 0; -+ } -+ - /* - * set PMTUd reply timeout to match pong_timeout on a given link - * -@@ -261,7 +271,7 @@ retry: - /* - * crypto, under pressure, is a royal PITA - */ -- pong_timeout_adj_tmp = dst_link->pong_timeout_adj * 2; -+ pong_timeout_adj_tmp = dst_link->pong_timeout_adj * dst_link->pmtud_crypto_timeout_multiplier; - } else { - pong_timeout_adj_tmp = dst_link->pong_timeout_adj; - } -@@ -295,6 +305,17 @@ retry: - - if (ret) { - if (ret == ETIMEDOUT) { -+ if ((knet_h->crypto_instance) && (dst_link->pmtud_crypto_timeout_multiplier < KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MAX)) { -+ dst_link->pmtud_crypto_timeout_multiplier = dst_link->pmtud_crypto_timeout_multiplier * 2; -+ pmtud_crypto_reduce = 0; -+ log_debug(knet_h, KNET_SUB_PMTUD, -+ "Increasing PMTUd response timeout multiplier to (%u) for host %u link: %u", -+ dst_link->pmtud_crypto_timeout_multiplier, -+ dst_host->host_id, -+ dst_link->link_id); -+ pthread_mutex_unlock(&knet_h->pmtud_mutex); -+ goto restart; -+ } - if (!warn_once) { - log_warn(knet_h, KNET_SUB_PMTUD, - "possible MTU misconfiguration detected. " -@@ -323,6 +344,23 @@ retry: - } - } - -+ if ((knet_h->crypto_instance) && (pmtud_crypto_reduce == 1) && -+ (dst_link->pmtud_crypto_timeout_multiplier > KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN)) { -+ if (!clock_gettime(CLOCK_MONOTONIC, &pmtud_crypto_stop_ts)) { -+ timespec_diff(pmtud_crypto_start_ts, pmtud_crypto_stop_ts, &timediff); -+ if (((pong_timeout_adj_tmp * 1000) / 2) > timediff) { -+ dst_link->pmtud_crypto_timeout_multiplier = dst_link->pmtud_crypto_timeout_multiplier / 2; -+ log_debug(knet_h, KNET_SUB_PMTUD, -+ "Decreasing PMTUd response timeout multiplier to (%u) for host %u link: %u", -+ dst_link->pmtud_crypto_timeout_multiplier, -+ dst_host->host_id, -+ dst_link->link_id); -+ } -+ } else { -+ log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get current time: %s", strerror(errno)); -+ } -+ } -+ - if ((dst_link->last_recv_mtu != onwire_len) || (ret)) { - dst_link->last_bad_mtu = onwire_len; - } else { -commit 5f3476849523e9ee486481b429b471a1ab3cac20 -Author: Fabio M. Di Nitto -Date: Thu Jul 18 07:50:37 2019 +0200 - - [handle] make sure that the pmtud buf contains at least knet header size - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libknet/handle.c b/libknet/handle.c -index 4835e99..1fb9c9b 100644 ---- a/libknet/handle.c -+++ b/libknet/handle.c -@@ -234,14 +234,14 @@ static int _init_buffers(knet_handle_t knet_h) - } - memset(knet_h->pingbuf, 0, KNET_HEADER_PING_SIZE); - -- knet_h->pmtudbuf = malloc(KNET_PMTUD_SIZE_V6); -+ knet_h->pmtudbuf = malloc(KNET_PMTUD_SIZE_V6 + KNET_HEADER_ALL_SIZE); - if (!knet_h->pmtudbuf) { - savederrno = errno; - log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for pmtud buffer: %s", - strerror(savederrno)); - goto exit_fail; - } -- memset(knet_h->pmtudbuf, 0, KNET_PMTUD_SIZE_V6); -+ memset(knet_h->pmtudbuf, 0, KNET_PMTUD_SIZE_V6 + KNET_HEADER_ALL_SIZE); - - for (i = 0; i < PCKT_FRAG_MAX; i++) { - bufsize = ceil((float)KNET_MAX_PACKET_SIZE / (i + 1)) + KNET_HEADER_ALL_SIZE + KNET_DATABUFSIZE_CRYPT_PAD; -commit 3b3b6d2a7e1fee7eb41c6bacc1005ff90f7dd5cb -Author: Fabio M. Di Nitto -Date: Thu Jul 18 10:23:14 2019 +0200 - - [tests] fix knet_bench coverity errors - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libknet/tests/knet_bench.c b/libknet/tests/knet_bench.c -index dfe5238..dc04239 100644 ---- a/libknet/tests/knet_bench.c -+++ b/libknet/tests/knet_bench.c -@@ -277,22 +277,24 @@ static void setup_knet(int argc, char *argv[]) - printf("Error: -p can only be specified once\n"); - exit(FAIL); - } -- policystr = optarg; -- if (!strcmp(policystr, "active")) { -- policy = KNET_LINK_POLICY_ACTIVE; -- policyfound = 1; -- } -- /* -- * we can't use rr because clangs can't compile -- * an array of 3 strings, one of which is 2 bytes long -- */ -- if (!strcmp(policystr, "round-robin")) { -- policy = KNET_LINK_POLICY_RR; -- policyfound = 1; -- } -- if (!strcmp(policystr, "passive")) { -- policy = KNET_LINK_POLICY_PASSIVE; -- policyfound = 1; -+ if (optarg) { -+ policystr = optarg; -+ if (!strcmp(policystr, "active")) { -+ policy = KNET_LINK_POLICY_ACTIVE; -+ policyfound = 1; -+ } -+ /* -+ * we can't use rr because clangs can't compile -+ * an array of 3 strings, one of which is 2 bytes long -+ */ -+ if (!strcmp(policystr, "round-robin")) { -+ policy = KNET_LINK_POLICY_RR; -+ policyfound = 1; -+ } -+ if (!strcmp(policystr, "passive")) { -+ policy = KNET_LINK_POLICY_PASSIVE; -+ policyfound = 1; -+ } - } - if (!policyfound) { - printf("Error: invalid policy %s specified. -p accepts active|passive|rr\n", policystr); -@@ -304,14 +306,16 @@ static void setup_knet(int argc, char *argv[]) - printf("Error: -P can only be specified once\n"); - exit(FAIL); - } -- protostr = optarg; -- if (!strcmp(protostr, "UDP")) { -- protocol = KNET_TRANSPORT_UDP; -- protofound = 1; -- } -- if (!strcmp(protostr, "SCTP")) { -- protocol = KNET_TRANSPORT_SCTP; -- protofound = 1; -+ if (optarg) { -+ protostr = optarg; -+ if (!strcmp(protostr, "UDP")) { -+ protocol = KNET_TRANSPORT_UDP; -+ protofound = 1; -+ } -+ if (!strcmp(protostr, "SCTP")) { -+ protocol = KNET_TRANSPORT_SCTP; -+ protofound = 1; -+ } - } - if (!protofound) { - printf("Error: invalid protocol %s specified. -P accepts udp|sctp\n", policystr); -@@ -380,17 +384,22 @@ static void setup_knet(int argc, char *argv[]) - } - break; - case 'T': -- if (!strcmp("ping", optarg)) { -- test_type = TEST_PING; -- } -- if (!strcmp("ping_data", optarg)) { -- test_type = TEST_PING_AND_DATA; -- } -- if (!strcmp("perf-by-size", optarg)) { -- test_type = TEST_PERF_BY_SIZE; -- } -- if (!strcmp("perf-by-time", optarg)) { -- test_type = TEST_PERF_BY_TIME; -+ if (optarg) { -+ if (!strcmp("ping", optarg)) { -+ test_type = TEST_PING; -+ } -+ if (!strcmp("ping_data", optarg)) { -+ test_type = TEST_PING_AND_DATA; -+ } -+ if (!strcmp("perf-by-size", optarg)) { -+ test_type = TEST_PERF_BY_SIZE; -+ } -+ if (!strcmp("perf-by-time", optarg)) { -+ test_type = TEST_PERF_BY_TIME; -+ } -+ } else { -+ printf("Error: -T requires an option\n"); -+ exit(FAIL); - } - break; - case 'S': -@@ -957,15 +966,14 @@ static void display_stats(int level) - struct knet_link_stats total_link_stats; - knet_node_id_t host_list[KNET_MAX_HOST]; - uint8_t link_list[KNET_MAX_LINK]; -- int res; - unsigned int i,j; - size_t num_hosts, num_links; - -- res = knet_handle_get_stats(knet_h, &handle_stats, sizeof(handle_stats)); -- if (res) { -+ if (knet_handle_get_stats(knet_h, &handle_stats, sizeof(handle_stats)) < 0) { - perror("[info]: failed to get knet handle stats"); - return; - } -+ - if (compresscfg || cryptocfg) { - printf("\n"); - printf("[stat]: handle stats\n"); -@@ -1005,8 +1013,7 @@ static void display_stats(int level) - - memset(&total_link_stats, 0, sizeof(struct knet_link_stats)); - -- res = knet_host_get_host_list(knet_h, host_list, &num_hosts); -- if (res) { -+ if (knet_host_get_host_list(knet_h, host_list, &num_hosts) < 0) { - perror("[info]: cannot get host list for stats"); - return; - } -@@ -1015,18 +1022,16 @@ static void display_stats(int level) - qsort(host_list, num_hosts, sizeof(uint16_t), node_compare); - - for (j=0; j -Date: Wed Jul 24 08:38:56 2019 +0200 - - [PMTUd] do not double unlock global read lock - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c -index d342697..f884760 100644 ---- a/libknet/threads_pmtud.c -+++ b/libknet/threads_pmtud.c -@@ -297,7 +297,11 @@ retry: - return -1; - } - -- if (shutdown_in_progress(knet_h)) { -+ /* -+ * we cannot use shutdown_in_progress in here because -+ * we already hold the read lock -+ */ -+ if (knet_h->fini_in_progress) { - pthread_mutex_unlock(&knet_h->pmtud_mutex); - log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD aborted. shutdown in progress"); - return -1; -commit 01242c683b18b813a67c13d3fc0546fec34f9f7c -Author: Fabio M. Di Nitto -Date: Mon Sep 9 15:11:25 2019 +0200 - - [pmtud] switch to use async version of dstcache update due to locking context (read vs write) - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c -index f884760..d10984f 100644 ---- a/libknet/threads_pmtud.c -+++ b/libknet/threads_pmtud.c -@@ -481,7 +481,7 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, - } - - if (saved_valid_pmtud != dst_link->has_valid_mtu) { -- _host_dstcache_update_sync(knet_h, dst_host); -+ _host_dstcache_update_async(knet_h, dst_host); - } - - return dst_link->has_valid_mtu; -commit a70f0adf0d4d38ed614bf2eef1a4e66fec2f2c92 -Author: Fabio M. Di Nitto -Date: Fri Sep 13 07:28:55 2019 +0200 - - [tests] fix ip generation boundaries - - https://ci.kronosnet.org/job/knet-build-all-voting/1450/knet-build-all-voting=rhel80z-s390x/console - - and similar, when pid = 255, the secondary IP would hit 256 that is of course invalid. - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libnozzle/tests/test-common.c b/libnozzle/tests/test-common.c -index b36be79..3afd2ec 100644 ---- a/libnozzle/tests/test-common.c -+++ b/libnozzle/tests/test-common.c -@@ -124,7 +124,7 @@ void make_local_ips(char *testipv4_1, char *testipv4_2, char *testipv6_1, char * - pid = (uint8_t *)&mypid; - - for (i = 0; i < sizeof(pid_t); i++) { -- if (pid[i] == 0) { -+ if ((pid[i] == 0) || (pid[i] == 255)) { - pid[i] = 128; - } - } -commit 63567e1e6b6ebb91fe1df43b910d6b9bd78d528f -Author: Fabio M. Di Nitto -Date: Tue Oct 15 11:53:56 2019 +0200 - - [PMTUd] invalidate MTU for a link if the value is lower than minimum - - Under heavy network load and packet loss, calculated MTU can be - too small. In that case we need to invalidate the link mtu, - that would remove the link from the rotation (and traffic) and - would give PMTUd time to get the right MTU in the next round. - - Signed-off-by: Fabio M. Di Nitto - -diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c -index d10984f..ab00b47 100644 ---- a/libknet/threads_pmtud.c -+++ b/libknet/threads_pmtud.c -@@ -460,7 +460,14 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, - } - dst_link->has_valid_mtu = 0; - } else { -- dst_link->has_valid_mtu = 1; -+ if (dst_link->status.mtu < calc_min_mtu(knet_h)) { -+ log_info(knet_h, KNET_SUB_PMTUD, -+ "Invalid MTU detected for host: %u link: %u mtu: %u", -+ dst_host->host_id, dst_link->link_id, dst_link->status.mtu); -+ dst_link->has_valid_mtu = 0; -+ } else { -+ dst_link->has_valid_mtu = 1; -+ } - if (dst_link->has_valid_mtu) { - if ((saved_pmtud) && (saved_pmtud != dst_link->status.mtu)) { - log_info(knet_h, KNET_SUB_PMTUD, "PMTUD link change for host: %u link: %u from %u to %u", diff --git a/SPECS/kronosnet.spec b/SPECS/kronosnet.spec index 48eef4b..79cd799 100644 --- a/SPECS/kronosnet.spec +++ b/SPECS/kronosnet.spec @@ -83,15 +83,15 @@ Name: kronosnet Summary: Multipoint-to-Multipoint VPN daemon Version: 1.10 -Release: 1%{?dist}.2 +Release: 4%{?dist} License: GPLv2+ and LGPLv2+ URL: http://www.kronosnet.org Source0: http://www.kronosnet.org/releases/kronosnet-%{version}.tar.gz #Patch0: bz1688880-covscan-fixes.patch -Patch1: bz1754442-link-mem-corruption.patch -Patch2: bz1763715-fix-mtu-calculation.patch -Patch3: bz1763714-fix-data-deliver-corruption-from-fragmented-packets.patch +Patch1: bz1753517-link-mem-corruption.patch +Patch2: bz1736872-fix-mtu-calculation.patch +Patch3: bz1761711-fix-data-deliver-corruption-from-fragmented-packets.patch # Build dependencies BuildRequires: gcc @@ -141,9 +141,9 @@ BuildRequires: autoconf %prep %setup -q -n %{name}-%{version} #%patch0 -p1 -b .bz1688880-covscan-fixes -%patch1 -p1 -b .bz1754442-link-mem-corruption -%patch2 -p1 -b .bz1763715-fix-mtu-calculation -%patch3 -p1 -b .bz1763714-fix-data-deliver-corruption-from-fragmented-packets +%patch1 -p1 -b .bz1753517-link-mem-corruption +%patch2 -p1 -b .bz1736872-fix-mtu-calculation +%patch3 -p1 -b .bz1761711-fix-data-deliver-corruption-from-fragmented-packets %build %if %{defined buildautogen} @@ -534,17 +534,20 @@ Requires: libnozzle1%{_isa} = %{version}-%{release} %endif %changelog -* Tue Oct 22 2019 Fabio M. Di Nitto - 1.10-1.2 +* Thu Oct 17 2019 Fabio M. Di Nitto - 1.10-4 + Disable fun_pmtud_crypto_test as it can take several hours to run + Resolves: rhbz#1736872 + +* Wed Oct 16 2019 Fabio M. Di Nitto - 1.10-3 + PMTUd: Fix MTU calculation when using crypto + Resolves: rhbz#1736872 host: Fix defrag buffer reclaim logic that could cause delivery of corrupted data - Resolves: rhbz#1763714 - PMTUd: Fix MTU calculation when using crypto - Disable fun_pmtud_crypto_test as it can take several hours to run - Resolves: rhbz#1763715 + ResolveS: rhbz#1761711 -* Mon Sep 23 2019 Fabio M. Di Nitto - 1.10-1.1 +* Wed Oct 16 2019 Fabio M. Di Nitto - 1.10-2 link: Fix memory corruption when too many up/down events are recorded - Resolves: rhbz#1754442 + Resolves: rhbz#1753517 * Wed Jun 12 2019 Christine Caulfield - 1.10-1 Rebase to 1.10 for ACL support