Blame SOURCES/0016-tcp-Clamp-MSS-value-when-queueing-data-to-tap-also-f.patch

f07426
From 96bf75e8ebffe06d33b5dee20c44e16ce53ea663 Mon Sep 17 00:00:00 2001
f07426
From: Stefano Brivio <sbrivio@redhat.com>
f07426
Date: Wed, 8 Mar 2023 18:07:42 +0100
f07426
Subject: [PATCH 16/20] tcp: Clamp MSS value when queueing data to tap, also
f07426
 for pasta
f07426
f07426
Tom reports that a pattern of repated ~1 MiB chunks downloads over
f07426
NNTP over TLS, on Podman 4.4 using pasta as network back-end, results
f07426
in pasta taking one full CPU thread after a while, and the download
f07426
never succeeds.
f07426
f07426
On that setup, we end up re-sending the same frame over and over,
f07426
with a consistent 65 534 bytes size, and never get an
f07426
acknowledgement from the tap-side client. This only happens for the
f07426
default MTU value (65 520 bytes) or for values that are slightly
f07426
smaller than that (down to 64 499 bytes).
f07426
f07426
We hit this condition because the MSS value we use in
f07426
tcp_data_from_sock(), only in pasta mode, is simply clamped to
f07426
USHRT_MAX, and not to the actual size of the buffers we pre-cooked
f07426
for sending, which is a bit less than that.
f07426
f07426
It looks like we got away with it until commit 0fb7b2b9080a ("tap:
f07426
Use different io vector bases depending on tap type") fixed the
f07426
setting of iov_len.
f07426
f07426
Luckily, since it's pasta, we're queueing up to two frames at a time,
f07426
so the worst that can happen is a badly segmented TCP stream: we
f07426
always have some space at the tail of the buffer.
f07426
f07426
Clamp the MSS value to the appropriate maximum given by struct
f07426
tcp{4,6}_buf_data_t, no matter if we're running in pasta or passt
f07426
mode.
f07426
f07426
While at it, fix the comments to those structs to reflect the current
f07426
struct size. This is not really relevant for any further calculation
f07426
or consideration, but it's convenient to know while debugging this
f07426
kind of issues.
f07426
f07426
Thanks to Tom for reporting the issue in a very detailed way and for
f07426
providing a test setup.
f07426
f07426
Reported-by: Tom Mombourquette <tom@devnode.com>
f07426
Link: https://github.com/containers/podman/issues/17703
f07426
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
f07426
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
f07426
(cherry picked from commit d7272f1df89c099a7e98ae43d1ef9b936c7e46f7)
f07426
---
f07426
 tcp.c | 23 +++++++++--------------
f07426
 1 file changed, 9 insertions(+), 14 deletions(-)
f07426
f07426
diff --git a/tcp.c b/tcp.c
f07426
index 001fa50..1355b0e 100644
f07426
--- a/tcp.c
f07426
+++ b/tcp.c
f07426
@@ -460,7 +460,7 @@ static struct tcp4_l2_buf_t {
f07426
 	struct iphdr iph;	/* 44				28 */
f07426
 	struct tcphdr th;	/* 64				48 */
f07426
 	uint8_t data[MSS4];	/* 84				68 */
f07426
-				/* 65541			65525 */
f07426
+				/* 65536			65532 */
f07426
 #ifdef __AVX2__
f07426
 } __attribute__ ((packed, aligned(32)))
f07426
 #else
f07426
@@ -488,7 +488,7 @@ struct tcp6_l2_buf_t {
f07426
 	struct ipv6hdr ip6h;	/* 32				20 */
f07426
 	struct tcphdr th;	/* 72				60 */
f07426
 	uint8_t data[MSS6];	/* 92				80 */
f07426
-				/* 65639			65627 */
f07426
+				/* 65536			65532 */
f07426
 #ifdef __AVX2__
f07426
 } __attribute__ ((packed, aligned(32)))
f07426
 #else
f07426
@@ -1913,15 +1913,13 @@ int tcp_conn_new_sock(const struct ctx *c, sa_family_t af)
f07426
 
f07426
 /**
f07426
  * tcp_conn_tap_mss() - Get MSS value advertised by tap/guest
f07426
- * @c:		Execution context
f07426
  * @conn:	Connection pointer
f07426
  * @opts:	Pointer to start of TCP options
f07426
  * @optlen:	Bytes in options: caller MUST ensure available length
f07426
  *
f07426
  * Return: clamped MSS value
f07426
  */
f07426
-static uint16_t tcp_conn_tap_mss(const struct ctx *c,
f07426
-				 const struct tcp_tap_conn *conn,
f07426
+static uint16_t tcp_conn_tap_mss(const struct tcp_tap_conn *conn,
f07426
 				 const char *opts, size_t optlen)
f07426
 {
f07426
 	unsigned int mss;
f07426
@@ -1932,13 +1930,10 @@ static uint16_t tcp_conn_tap_mss(const struct ctx *c,
f07426
 	else
f07426
 		mss = ret;
f07426
 
f07426
-	/* Don't upset qemu */
f07426
-	if (c->mode == MODE_PASST) {
f07426
-		if (CONN_V4(conn))
f07426
-			mss = MIN(MSS4, mss);
f07426
-		else
f07426
-			mss = MIN(MSS6, mss);
f07426
-	}
f07426
+	if (CONN_V4(conn))
f07426
+		mss = MIN(MSS4, mss);
f07426
+	else
f07426
+		mss = MIN(MSS6, mss);
f07426
 
f07426
 	return MIN(mss, USHRT_MAX);
f07426
 }
f07426
@@ -2007,7 +2002,7 @@ static void tcp_conn_from_tap(struct ctx *c, int af, const void *addr,
f07426
 
f07426
 	conn->wnd_to_tap = WINDOW_DEFAULT;
f07426
 
f07426
-	mss = tcp_conn_tap_mss(c, conn, opts, optlen);
f07426
+	mss = tcp_conn_tap_mss(conn, opts, optlen);
f07426
 	if (setsockopt(s, SOL_TCP, TCP_MAXSEG, &mss, sizeof(mss)))
f07426
 		trace("TCP: failed to set TCP_MAXSEG on socket %i", s);
f07426
 	MSS_SET(conn, mss);
f07426
@@ -2469,7 +2464,7 @@ static void tcp_conn_from_sock_finish(struct ctx *c, struct tcp_tap_conn *conn,
f07426
 	if (!(conn->wnd_from_tap >>= conn->ws_from_tap))
f07426
 		conn->wnd_from_tap = 1;
f07426
 
f07426
-	MSS_SET(conn, tcp_conn_tap_mss(c, conn, opts, optlen));
f07426
+	MSS_SET(conn, tcp_conn_tap_mss(conn, opts, optlen));
f07426
 
f07426
 	conn->seq_init_from_tap = ntohl(th->seq) + 1;
f07426
 	conn->seq_from_tap = conn->seq_init_from_tap;
f07426
-- 
f07426
2.39.2
f07426