Blame SOURCES/0071-nft-Set-socket-receive-buffer.patch

1dc35b
From 77b8d7784082889a66d689aac1972154ef46da08 Mon Sep 17 00:00:00 2001
1dc35b
From: Phil Sutter <phil@nwl.cc>
1dc35b
Date: Tue, 2 Jul 2019 20:30:49 +0200
1dc35b
Subject: [PATCH] nft: Set socket receive buffer
1dc35b
1dc35b
When trying to delete user-defined chains in a large ruleset,
1dc35b
iptables-nft aborts with "No buffer space available". This can be
1dc35b
reproduced using the following script:
1dc35b
1dc35b
| #! /bin/bash
1dc35b
| iptables-nft-restore <(
1dc35b
|
1dc35b
| echo "*filter"
1dc35b
| for i in $(seq 0 200000);do
1dc35b
|         printf ":chain_%06x - [0:0]\n" $i
1dc35b
| done
1dc35b
| for i in $(seq 0 200000);do
1dc35b
|         printf -- "-A INPUT -j chain_%06x\n" $i
1dc35b
|         printf -- "-A INPUT -j chain_%06x\n" $i
1dc35b
| done
1dc35b
| echo COMMIT
1dc35b
|
1dc35b
| )
1dc35b
| iptables-nft -X
1dc35b
1dc35b
The problem seems to be the sheer amount of netlink error messages sent
1dc35b
back to user space (one EBUSY for each chain). To solve this, set
1dc35b
receive buffer size depending on number of commands sent to kernel.
1dc35b
1dc35b
Suggested-by: Pablo Neira Ayuso <pablo@netfilter.org>
1dc35b
Signed-off-by: Phil Sutter <phil@nwl.cc>
1dc35b
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
1dc35b
(cherry picked from commit d3e39e9c457f452540359e42fb58d64a28fe3e18)
1dc35b
1dc35b
Conflicts:
1dc35b
	iptables/nft.c
1dc35b
-> Context change due to missing commits 17c282003f463
1dc35b
   ("nft: reset netlink sender buffer size of socket restart") and
1dc35b
   58d7de0181f61 ("xtables: handle concurrent ruleset modifications").
1dc35b
1dc35b
Signed-off-by: Phil Sutter <psutter@redhat.com>
1dc35b
---
1dc35b
 iptables/nft.c | 27 +++++++++++++++++++++++----
1dc35b
 1 file changed, 23 insertions(+), 4 deletions(-)
1dc35b
1dc35b
diff --git a/iptables/nft.c b/iptables/nft.c
1dc35b
index d708fb6176b88..87fdd1e66152d 100644
1dc35b
--- a/iptables/nft.c
1dc35b
+++ b/iptables/nft.c
1dc35b
@@ -170,8 +170,24 @@ static void mnl_set_sndbuffer(const struct mnl_socket *nl,
1dc35b
 	nlbuffsiz = newbuffsiz;
1dc35b
 }
1dc35b
 
1dc35b
+static int nlrcvbuffsiz;
1dc35b
+
1dc35b
+static void mnl_set_rcvbuffer(const struct mnl_socket *nl, int numcmds)
1dc35b
+{
1dc35b
+	int newbuffsiz = getpagesize() * numcmds;
1dc35b
+
1dc35b
+	if (newbuffsiz <= nlrcvbuffsiz)
1dc35b
+		return;
1dc35b
+
1dc35b
+	if (setsockopt(mnl_socket_get_fd(nl), SOL_SOCKET, SO_RCVBUFFORCE,
1dc35b
+		       &newbuffsiz, sizeof(socklen_t)) < 0)
1dc35b
+		return;
1dc35b
+
1dc35b
+	nlrcvbuffsiz = newbuffsiz;
1dc35b
+}
1dc35b
+
1dc35b
 static ssize_t mnl_nft_socket_sendmsg(const struct mnl_socket *nf_sock,
1dc35b
-				      struct nftnl_batch *batch)
1dc35b
+				      struct nftnl_batch *batch, int numcmds)
1dc35b
 {
1dc35b
 	static const struct sockaddr_nl snl = {
1dc35b
 		.nl_family = AF_NETLINK
1dc35b
@@ -186,13 +202,15 @@ static ssize_t mnl_nft_socket_sendmsg(const struct mnl_socket *nf_sock,
1dc35b
 	};
1dc35b
 
1dc35b
 	mnl_set_sndbuffer(nf_sock, batch);
1dc35b
+	mnl_set_rcvbuffer(nf_sock, numcmds);
1dc35b
 	nftnl_batch_iovec(batch, iov, iov_len);
1dc35b
 
1dc35b
 	return sendmsg(mnl_socket_get_fd(nf_sock), &msg, 0);
1dc35b
 }
1dc35b
 
1dc35b
 static int mnl_batch_talk(const struct mnl_socket *nf_sock,
1dc35b
-			  struct nftnl_batch *batch, struct list_head *err_list)
1dc35b
+			  struct nftnl_batch *batch, int numcmds,
1dc35b
+			  struct list_head *err_list)
1dc35b
 {
1dc35b
 	const struct mnl_socket *nl = nf_sock;
1dc35b
 	int ret, fd = mnl_socket_get_fd(nl), portid = mnl_socket_get_portid(nl);
1dc35b
@@ -204,7 +222,7 @@ static int mnl_batch_talk(const struct mnl_socket *nf_sock,
1dc35b
 	};
1dc35b
 	int err = 0;
1dc35b
 
1dc35b
-	ret = mnl_nft_socket_sendmsg(nf_sock, batch);
1dc35b
+	ret = mnl_nft_socket_sendmsg(nf_sock, batch, numcmds);
1dc35b
 	if (ret == -1)
1dc35b
 		return -1;
1dc35b
 
1dc35b
@@ -748,6 +766,7 @@ static int nft_restart(struct nft_handle *h)
1dc35b
 		return -1;
1dc35b
 
1dc35b
 	h->portid = mnl_socket_get_portid(h->nl);
1dc35b
+	nlrcvbuffsiz = 0;
1dc35b
 
1dc35b
 	return 0;
1dc35b
 }
1dc35b
@@ -2728,7 +2747,7 @@ static int nft_action(struct nft_handle *h, int action)
1dc35b
 		break;
1dc35b
 	}
1dc35b
 
1dc35b
-	ret = mnl_batch_talk(h->nl, h->batch, &h->err_list);
1dc35b
+	ret = mnl_batch_talk(h->nl, h->batch, seq, &h->err_list);
1dc35b
 
1dc35b
 	i = 0;
1dc35b
 	buflen = sizeof(errmsg);
1dc35b
-- 
1dc35b
2.22.0
1dc35b