Blame SOURCES/0028-nft-Fix-for-concurrent-noflush-restore-calls.patch

6ef880
From 575a1e5589f813af7e838c045863b510b4740353 Mon Sep 17 00:00:00 2001
6ef880
From: Phil Sutter <phil@nwl.cc>
6ef880
Date: Mon, 5 Oct 2020 16:06:49 +0200
6ef880
Subject: [PATCH] nft: Fix for concurrent noflush restore calls
6ef880
6ef880
Transaction refresh was broken with regards to nft_chain_restore(): It
6ef880
created a rule flush batch object only if the chain was found in cache
6ef880
and a chain add object only if the chain was not found. Yet with
6ef880
concurrent ruleset updates, one has to expect both situations:
6ef880
6ef880
* If a chain vanishes, the rule flush job must be skipped and instead
6ef880
  the chain add job become active.
6ef880
6ef880
* If a chain appears, the chain add job must be skipped and instead
6ef880
  rules flushed.
6ef880
6ef880
Change the code accordingly: Create both batch objects and set their
6ef880
'skip' field depending on the situation in cache and adjust both in
6ef880
nft_refresh_transaction().
6ef880
6ef880
As a side-effect, the implicit rule flush becomes explicit and all
6ef880
handling of implicit batch jobs is dropped along with the related field
6ef880
indicating such.
6ef880
6ef880
Reuse the 'implicit' parameter of __nft_rule_flush() to control the
6ef880
initial 'skip' field value instead.
6ef880
6ef880
A subtle caveat is vanishing of existing chains: Creating the chain add
6ef880
job based on the chain in cache causes a netlink message containing that
6ef880
chain's handle which the kernel dislikes. Therefore unset the chain's
6ef880
handle in that case.
6ef880
6ef880
Fixes: 58d7de0181f61 ("xtables: handle concurrent ruleset modifications")
6ef880
Signed-off-by: Phil Sutter <phil@nwl.cc>
6ef880
(cherry picked from commit dac904bdcd9a18aabafee7275ccf0c2bd53800f3)
6ef880
6ef880
Conflicts:
6ef880
	iptables/nft.c
6ef880
-> Upstream changed good/bad return codes of nft_chain_restore()
6ef880
   function.
6ef880
6ef880
Signed-off-by: Phil Sutter <psutter@redhat.com>
6ef880
---
6ef880
 iptables/nft.c                                | 58 ++++++++++---------
6ef880
 .../ipt-restore/0016-concurrent-restores_0    | 53 +++++++++++++++++
6ef880
 2 files changed, 83 insertions(+), 28 deletions(-)
6ef880
 create mode 100755 iptables/tests/shell/testcases/ipt-restore/0016-concurrent-restores_0
6ef880
6ef880
diff --git a/iptables/nft.c b/iptables/nft.c
6ef880
index d661ac2cafda6..dc5490c085364 100644
6ef880
--- a/iptables/nft.c
6ef880
+++ b/iptables/nft.c
6ef880
@@ -283,7 +283,6 @@ struct obj_update {
6ef880
 	struct list_head	head;
6ef880
 	enum obj_update_type	type:8;
6ef880
 	uint8_t			skip:1;
6ef880
-	uint8_t			implicit:1;
6ef880
 	unsigned int		seq;
6ef880
 	union {
6ef880
 		struct nftnl_table	*table;
6ef880
@@ -1650,7 +1649,7 @@ int nft_rule_save(struct nft_handle *h, const char *table, unsigned int format)
6ef880
 
6ef880
 static void
6ef880
 __nft_rule_flush(struct nft_handle *h, const char *table,
6ef880
-		 const char *chain, bool verbose, bool implicit)
6ef880
+		 const char *chain, bool verbose, bool skip)
6ef880
 {
6ef880
 	struct obj_update *obj;
6ef880
 	struct nftnl_rule *r;
6ef880
@@ -1672,7 +1671,7 @@ __nft_rule_flush(struct nft_handle *h, const char *table,
6ef880
 		return;
6ef880
 	}
6ef880
 
6ef880
-	obj->implicit = implicit;
6ef880
+	obj->skip = skip;
6ef880
 }
6ef880
 
6ef880
 int nft_rule_flush(struct nft_handle *h, const char *chain, const char *table,
6ef880
@@ -1768,17 +1767,12 @@ int nft_chain_user_add(struct nft_handle *h, const char *chain, const char *tabl
6ef880
 int nft_chain_restore(struct nft_handle *h, const char *chain, const char *table)
6ef880
 {
6ef880
 	struct nftnl_chain_list *list;
6ef880
+	struct obj_update *obj;
6ef880
 	struct nftnl_chain *c;
6ef880
 	bool created = false;
6ef880
 
6ef880
 	c = nft_chain_find(h, table, chain);
6ef880
-	if (c) {
6ef880
-		/* Apparently -n still flushes existing user defined
6ef880
-		 * chains that are redefined.
6ef880
-		 */
6ef880
-		if (h->noflush)
6ef880
-			__nft_rule_flush(h, table, chain, false, true);
6ef880
-	} else {
6ef880
+	if (!c) {
6ef880
 		c = nftnl_chain_alloc();
6ef880
 		if (!c)
6ef880
 			return -1;
6ef880
@@ -1786,20 +1780,26 @@ int nft_chain_restore(struct nft_handle *h, const char *chain, const char *table
6ef880
 		nftnl_chain_set_str(c, NFTNL_CHAIN_TABLE, table);
6ef880
 		nftnl_chain_set_str(c, NFTNL_CHAIN_NAME, chain);
6ef880
 		created = true;
6ef880
-	}
6ef880
 
6ef880
-	if (h->family == NFPROTO_BRIDGE)
6ef880
-		nftnl_chain_set_u32(c, NFTNL_CHAIN_POLICY, NF_ACCEPT);
6ef880
+		list = nft_chain_list_get(h, table, chain);
6ef880
+		if (list)
6ef880
+			nftnl_chain_list_add(c, list);
6ef880
+	} else {
6ef880
+		/* If the chain should vanish meanwhile, kernel genid changes
6ef880
+		 * and the transaction is refreshed enabling the chain add
6ef880
+		 * object. With the handle still set, kernel interprets it as a
6ef880
+		 * chain replace job and errors since it is not found anymore.
6ef880
+		 */
6ef880
+		nftnl_chain_unset(c, NFTNL_CHAIN_HANDLE);
6ef880
+	}
6ef880
 
6ef880
-	if (!created)
6ef880
-		return 0;
6ef880
+	__nft_rule_flush(h, table, chain, false, created);
6ef880
 
6ef880
-	if (!batch_chain_add(h, NFT_COMPAT_CHAIN_USER_ADD, c))
6ef880
+	obj = batch_chain_add(h, NFT_COMPAT_CHAIN_USER_ADD, c);
6ef880
+	if (!obj)
6ef880
 		return -1;
6ef880
 
6ef880
-	list = nft_chain_list_get(h, table, chain);
6ef880
-	if (list)
6ef880
-		nftnl_chain_list_add(c, list);
6ef880
+	obj->skip = !created;
6ef880
 
6ef880
 	return 0;
6ef880
 }
6ef880
@@ -2693,11 +2693,6 @@ static void nft_refresh_transaction(struct nft_handle *h)
6ef880
 	h->error.lineno = 0;
6ef880
 
6ef880
 	list_for_each_entry_safe(n, tmp, &h->obj_list, head) {
6ef880
-		if (n->implicit) {
6ef880
-			batch_obj_del(h, n);
6ef880
-			continue;
6ef880
-		}
6ef880
-
6ef880
 		switch (n->type) {
6ef880
 		case NFT_COMPAT_TABLE_FLUSH:
6ef880
 			tablename = nftnl_table_get_str(n->table, NFTNL_TABLE_NAME);
6ef880
@@ -2723,14 +2718,22 @@ static void nft_refresh_transaction(struct nft_handle *h)
6ef880
 
6ef880
 			c = nft_chain_find(h, tablename, chainname);
6ef880
 			if (c) {
6ef880
-				/* -restore -n flushes existing rules from redefined user-chain */
6ef880
-				__nft_rule_flush(h, tablename,
6ef880
-						 chainname, false, true);
6ef880
 				n->skip = 1;
6ef880
 			} else if (!c) {
6ef880
 				n->skip = 0;
6ef880
 			}
6ef880
 			break;
6ef880
+		case NFT_COMPAT_RULE_FLUSH:
6ef880
+			tablename = nftnl_rule_get_str(n->rule, NFTNL_RULE_TABLE);
6ef880
+			if (!tablename)
6ef880
+				continue;
6ef880
+
6ef880
+			chainname = nftnl_rule_get_str(n->rule, NFTNL_RULE_CHAIN);
6ef880
+			if (!chainname)
6ef880
+				continue;
6ef880
+
6ef880
+			n->skip = !nft_chain_find(h, tablename, chainname);
6ef880
+			break;
6ef880
 		case NFT_COMPAT_TABLE_ADD:
6ef880
 		case NFT_COMPAT_CHAIN_ADD:
6ef880
 		case NFT_COMPAT_CHAIN_ZERO:
6ef880
@@ -2742,7 +2745,6 @@ static void nft_refresh_transaction(struct nft_handle *h)
6ef880
 		case NFT_COMPAT_RULE_INSERT:
6ef880
 		case NFT_COMPAT_RULE_REPLACE:
6ef880
 		case NFT_COMPAT_RULE_DELETE:
6ef880
-		case NFT_COMPAT_RULE_FLUSH:
6ef880
 		case NFT_COMPAT_SET_ADD:
6ef880
 			break;
6ef880
 		}
6ef880
diff --git a/iptables/tests/shell/testcases/ipt-restore/0016-concurrent-restores_0 b/iptables/tests/shell/testcases/ipt-restore/0016-concurrent-restores_0
6ef880
new file mode 100755
6ef880
index 0000000000000..53ec12fa368af
6ef880
--- /dev/null
6ef880
+++ b/iptables/tests/shell/testcases/ipt-restore/0016-concurrent-restores_0
6ef880
@@ -0,0 +1,53 @@
6ef880
+#!/bin/bash
6ef880
+
6ef880
+set -e
6ef880
+
6ef880
+RS="*filter
6ef880
+:INPUT ACCEPT [12024:3123388]
6ef880
+:FORWARD ACCEPT [0:0]
6ef880
+:OUTPUT ACCEPT [12840:2144421]
6ef880
+:FOO - [0:0]
6ef880
+:BAR0 - [0:0]
6ef880
+:BAR1 - [0:0]
6ef880
+:BAR2 - [0:0]
6ef880
+:BAR3 - [0:0]
6ef880
+:BAR4 - [0:0]
6ef880
+:BAR5 - [0:0]
6ef880
+:BAR6 - [0:0]
6ef880
+:BAR7 - [0:0]
6ef880
+:BAR8 - [0:0]
6ef880
+:BAR9 - [0:0]
6ef880
+"
6ef880
+
6ef880
+RS1="$RS
6ef880
+-X BAR3
6ef880
+-X BAR6
6ef880
+-X BAR9
6ef880
+-A FOO -s 9.9.0.1/32 -j BAR1
6ef880
+-A FOO -s 9.9.0.2/32 -j BAR2
6ef880
+-A FOO -s 9.9.0.4/32 -j BAR4
6ef880
+-A FOO -s 9.9.0.5/32 -j BAR5
6ef880
+-A FOO -s 9.9.0.7/32 -j BAR7
6ef880
+-A FOO -s 9.9.0.8/32 -j BAR8
6ef880
+COMMIT
6ef880
+"
6ef880
+
6ef880
+RS2="$RS
6ef880
+-X BAR2
6ef880
+-X BAR5
6ef880
+-X BAR7
6ef880
+-A FOO -s 9.9.0.1/32 -j BAR1
6ef880
+-A FOO -s 9.9.0.3/32 -j BAR3
6ef880
+-A FOO -s 9.9.0.4/32 -j BAR4
6ef880
+-A FOO -s 9.9.0.6/32 -j BAR6
6ef880
+-A FOO -s 9.9.0.8/32 -j BAR8
6ef880
+-A FOO -s 9.9.0.9/32 -j BAR9
6ef880
+COMMIT
6ef880
+"
6ef880
+
6ef880
+for n in $(seq 1 10); do
6ef880
+	$XT_MULTI iptables-restore --noflush -w <<< "$RS1" &
6ef880
+	$XT_MULTI iptables-restore --noflush -w <<< "$RS2" &
6ef880
+	wait -n
6ef880
+	wait -n
6ef880
+done
6ef880
-- 
6ef880
2.28.0
6ef880