Blame SOURCES/0028-nft-Fix-for-concurrent-noflush-restore-calls.patch

9c35a8
From 575a1e5589f813af7e838c045863b510b4740353 Mon Sep 17 00:00:00 2001
9c35a8
From: Phil Sutter <phil@nwl.cc>
9c35a8
Date: Mon, 5 Oct 2020 16:06:49 +0200
9c35a8
Subject: [PATCH] nft: Fix for concurrent noflush restore calls
9c35a8
9c35a8
Transaction refresh was broken with regards to nft_chain_restore(): It
9c35a8
created a rule flush batch object only if the chain was found in cache
9c35a8
and a chain add object only if the chain was not found. Yet with
9c35a8
concurrent ruleset updates, one has to expect both situations:
9c35a8
9c35a8
* If a chain vanishes, the rule flush job must be skipped and instead
9c35a8
  the chain add job become active.
9c35a8
9c35a8
* If a chain appears, the chain add job must be skipped and instead
9c35a8
  rules flushed.
9c35a8
9c35a8
Change the code accordingly: Create both batch objects and set their
9c35a8
'skip' field depending on the situation in cache and adjust both in
9c35a8
nft_refresh_transaction().
9c35a8
9c35a8
As a side-effect, the implicit rule flush becomes explicit and all
9c35a8
handling of implicit batch jobs is dropped along with the related field
9c35a8
indicating such.
9c35a8
9c35a8
Reuse the 'implicit' parameter of __nft_rule_flush() to control the
9c35a8
initial 'skip' field value instead.
9c35a8
9c35a8
A subtle caveat is vanishing of existing chains: Creating the chain add
9c35a8
job based on the chain in cache causes a netlink message containing that
9c35a8
chain's handle which the kernel dislikes. Therefore unset the chain's
9c35a8
handle in that case.
9c35a8
9c35a8
Fixes: 58d7de0181f61 ("xtables: handle concurrent ruleset modifications")
9c35a8
Signed-off-by: Phil Sutter <phil@nwl.cc>
9c35a8
(cherry picked from commit dac904bdcd9a18aabafee7275ccf0c2bd53800f3)
9c35a8
9c35a8
Conflicts:
9c35a8
	iptables/nft.c
9c35a8
-> Upstream changed good/bad return codes of nft_chain_restore()
9c35a8
   function.
9c35a8
9c35a8
Signed-off-by: Phil Sutter <psutter@redhat.com>
9c35a8
---
9c35a8
 iptables/nft.c                                | 58 ++++++++++---------
9c35a8
 .../ipt-restore/0016-concurrent-restores_0    | 53 +++++++++++++++++
9c35a8
 2 files changed, 83 insertions(+), 28 deletions(-)
9c35a8
 create mode 100755 iptables/tests/shell/testcases/ipt-restore/0016-concurrent-restores_0
9c35a8
9c35a8
diff --git a/iptables/nft.c b/iptables/nft.c
9c35a8
index d661ac2cafda6..dc5490c085364 100644
9c35a8
--- a/iptables/nft.c
9c35a8
+++ b/iptables/nft.c
9c35a8
@@ -283,7 +283,6 @@ struct obj_update {
9c35a8
 	struct list_head	head;
9c35a8
 	enum obj_update_type	type:8;
9c35a8
 	uint8_t			skip:1;
9c35a8
-	uint8_t			implicit:1;
9c35a8
 	unsigned int		seq;
9c35a8
 	union {
9c35a8
 		struct nftnl_table	*table;
9c35a8
@@ -1650,7 +1649,7 @@ int nft_rule_save(struct nft_handle *h, const char *table, unsigned int format)
9c35a8
 
9c35a8
 static void
9c35a8
 __nft_rule_flush(struct nft_handle *h, const char *table,
9c35a8
-		 const char *chain, bool verbose, bool implicit)
9c35a8
+		 const char *chain, bool verbose, bool skip)
9c35a8
 {
9c35a8
 	struct obj_update *obj;
9c35a8
 	struct nftnl_rule *r;
9c35a8
@@ -1672,7 +1671,7 @@ __nft_rule_flush(struct nft_handle *h, const char *table,
9c35a8
 		return;
9c35a8
 	}
9c35a8
 
9c35a8
-	obj->implicit = implicit;
9c35a8
+	obj->skip = skip;
9c35a8
 }
9c35a8
 
9c35a8
 int nft_rule_flush(struct nft_handle *h, const char *chain, const char *table,
9c35a8
@@ -1768,17 +1767,12 @@ int nft_chain_user_add(struct nft_handle *h, const char *chain, const char *tabl
9c35a8
 int nft_chain_restore(struct nft_handle *h, const char *chain, const char *table)
9c35a8
 {
9c35a8
 	struct nftnl_chain_list *list;
9c35a8
+	struct obj_update *obj;
9c35a8
 	struct nftnl_chain *c;
9c35a8
 	bool created = false;
9c35a8
 
9c35a8
 	c = nft_chain_find(h, table, chain);
9c35a8
-	if (c) {
9c35a8
-		/* Apparently -n still flushes existing user defined
9c35a8
-		 * chains that are redefined.
9c35a8
-		 */
9c35a8
-		if (h->noflush)
9c35a8
-			__nft_rule_flush(h, table, chain, false, true);
9c35a8
-	} else {
9c35a8
+	if (!c) {
9c35a8
 		c = nftnl_chain_alloc();
9c35a8
 		if (!c)
9c35a8
 			return -1;
9c35a8
@@ -1786,20 +1780,26 @@ int nft_chain_restore(struct nft_handle *h, const char *chain, const char *table
9c35a8
 		nftnl_chain_set_str(c, NFTNL_CHAIN_TABLE, table);
9c35a8
 		nftnl_chain_set_str(c, NFTNL_CHAIN_NAME, chain);
9c35a8
 		created = true;
9c35a8
-	}
9c35a8
 
9c35a8
-	if (h->family == NFPROTO_BRIDGE)
9c35a8
-		nftnl_chain_set_u32(c, NFTNL_CHAIN_POLICY, NF_ACCEPT);
9c35a8
+		list = nft_chain_list_get(h, table, chain);
9c35a8
+		if (list)
9c35a8
+			nftnl_chain_list_add(c, list);
9c35a8
+	} else {
9c35a8
+		/* If the chain should vanish meanwhile, kernel genid changes
9c35a8
+		 * and the transaction is refreshed enabling the chain add
9c35a8
+		 * object. With the handle still set, kernel interprets it as a
9c35a8
+		 * chain replace job and errors since it is not found anymore.
9c35a8
+		 */
9c35a8
+		nftnl_chain_unset(c, NFTNL_CHAIN_HANDLE);
9c35a8
+	}
9c35a8
 
9c35a8
-	if (!created)
9c35a8
-		return 0;
9c35a8
+	__nft_rule_flush(h, table, chain, false, created);
9c35a8
 
9c35a8
-	if (!batch_chain_add(h, NFT_COMPAT_CHAIN_USER_ADD, c))
9c35a8
+	obj = batch_chain_add(h, NFT_COMPAT_CHAIN_USER_ADD, c);
9c35a8
+	if (!obj)
9c35a8
 		return -1;
9c35a8
 
9c35a8
-	list = nft_chain_list_get(h, table, chain);
9c35a8
-	if (list)
9c35a8
-		nftnl_chain_list_add(c, list);
9c35a8
+	obj->skip = !created;
9c35a8
 
9c35a8
 	return 0;
9c35a8
 }
9c35a8
@@ -2693,11 +2693,6 @@ static void nft_refresh_transaction(struct nft_handle *h)
9c35a8
 	h->error.lineno = 0;
9c35a8
 
9c35a8
 	list_for_each_entry_safe(n, tmp, &h->obj_list, head) {
9c35a8
-		if (n->implicit) {
9c35a8
-			batch_obj_del(h, n);
9c35a8
-			continue;
9c35a8
-		}
9c35a8
-
9c35a8
 		switch (n->type) {
9c35a8
 		case NFT_COMPAT_TABLE_FLUSH:
9c35a8
 			tablename = nftnl_table_get_str(n->table, NFTNL_TABLE_NAME);
9c35a8
@@ -2723,14 +2718,22 @@ static void nft_refresh_transaction(struct nft_handle *h)
9c35a8
 
9c35a8
 			c = nft_chain_find(h, tablename, chainname);
9c35a8
 			if (c) {
9c35a8
-				/* -restore -n flushes existing rules from redefined user-chain */
9c35a8
-				__nft_rule_flush(h, tablename,
9c35a8
-						 chainname, false, true);
9c35a8
 				n->skip = 1;
9c35a8
 			} else if (!c) {
9c35a8
 				n->skip = 0;
9c35a8
 			}
9c35a8
 			break;
9c35a8
+		case NFT_COMPAT_RULE_FLUSH:
9c35a8
+			tablename = nftnl_rule_get_str(n->rule, NFTNL_RULE_TABLE);
9c35a8
+			if (!tablename)
9c35a8
+				continue;
9c35a8
+
9c35a8
+			chainname = nftnl_rule_get_str(n->rule, NFTNL_RULE_CHAIN);
9c35a8
+			if (!chainname)
9c35a8
+				continue;
9c35a8
+
9c35a8
+			n->skip = !nft_chain_find(h, tablename, chainname);
9c35a8
+			break;
9c35a8
 		case NFT_COMPAT_TABLE_ADD:
9c35a8
 		case NFT_COMPAT_CHAIN_ADD:
9c35a8
 		case NFT_COMPAT_CHAIN_ZERO:
9c35a8
@@ -2742,7 +2745,6 @@ static void nft_refresh_transaction(struct nft_handle *h)
9c35a8
 		case NFT_COMPAT_RULE_INSERT:
9c35a8
 		case NFT_COMPAT_RULE_REPLACE:
9c35a8
 		case NFT_COMPAT_RULE_DELETE:
9c35a8
-		case NFT_COMPAT_RULE_FLUSH:
9c35a8
 		case NFT_COMPAT_SET_ADD:
9c35a8
 			break;
9c35a8
 		}
9c35a8
diff --git a/iptables/tests/shell/testcases/ipt-restore/0016-concurrent-restores_0 b/iptables/tests/shell/testcases/ipt-restore/0016-concurrent-restores_0
9c35a8
new file mode 100755
9c35a8
index 0000000000000..53ec12fa368af
9c35a8
--- /dev/null
9c35a8
+++ b/iptables/tests/shell/testcases/ipt-restore/0016-concurrent-restores_0
9c35a8
@@ -0,0 +1,53 @@
9c35a8
+#!/bin/bash
9c35a8
+
9c35a8
+set -e
9c35a8
+
9c35a8
+RS="*filter
9c35a8
+:INPUT ACCEPT [12024:3123388]
9c35a8
+:FORWARD ACCEPT [0:0]
9c35a8
+:OUTPUT ACCEPT [12840:2144421]
9c35a8
+:FOO - [0:0]
9c35a8
+:BAR0 - [0:0]
9c35a8
+:BAR1 - [0:0]
9c35a8
+:BAR2 - [0:0]
9c35a8
+:BAR3 - [0:0]
9c35a8
+:BAR4 - [0:0]
9c35a8
+:BAR5 - [0:0]
9c35a8
+:BAR6 - [0:0]
9c35a8
+:BAR7 - [0:0]
9c35a8
+:BAR8 - [0:0]
9c35a8
+:BAR9 - [0:0]
9c35a8
+"
9c35a8
+
9c35a8
+RS1="$RS
9c35a8
+-X BAR3
9c35a8
+-X BAR6
9c35a8
+-X BAR9
9c35a8
+-A FOO -s 9.9.0.1/32 -j BAR1
9c35a8
+-A FOO -s 9.9.0.2/32 -j BAR2
9c35a8
+-A FOO -s 9.9.0.4/32 -j BAR4
9c35a8
+-A FOO -s 9.9.0.5/32 -j BAR5
9c35a8
+-A FOO -s 9.9.0.7/32 -j BAR7
9c35a8
+-A FOO -s 9.9.0.8/32 -j BAR8
9c35a8
+COMMIT
9c35a8
+"
9c35a8
+
9c35a8
+RS2="$RS
9c35a8
+-X BAR2
9c35a8
+-X BAR5
9c35a8
+-X BAR7
9c35a8
+-A FOO -s 9.9.0.1/32 -j BAR1
9c35a8
+-A FOO -s 9.9.0.3/32 -j BAR3
9c35a8
+-A FOO -s 9.9.0.4/32 -j BAR4
9c35a8
+-A FOO -s 9.9.0.6/32 -j BAR6
9c35a8
+-A FOO -s 9.9.0.8/32 -j BAR8
9c35a8
+-A FOO -s 9.9.0.9/32 -j BAR9
9c35a8
+COMMIT
9c35a8
+"
9c35a8
+
9c35a8
+for n in $(seq 1 10); do
9c35a8
+	$XT_MULTI iptables-restore --noflush -w <<< "$RS1" &
9c35a8
+	$XT_MULTI iptables-restore --noflush -w <<< "$RS2" &
9c35a8
+	wait -n
9c35a8
+	wait -n
9c35a8
+done
9c35a8
-- 
9c35a8
2.28.0
9c35a8