|
|
9c35a8 |
From 575a1e5589f813af7e838c045863b510b4740353 Mon Sep 17 00:00:00 2001
|
|
|
9c35a8 |
From: Phil Sutter <phil@nwl.cc>
|
|
|
9c35a8 |
Date: Mon, 5 Oct 2020 16:06:49 +0200
|
|
|
9c35a8 |
Subject: [PATCH] nft: Fix for concurrent noflush restore calls
|
|
|
9c35a8 |
|
|
|
9c35a8 |
Transaction refresh was broken with regards to nft_chain_restore(): It
|
|
|
9c35a8 |
created a rule flush batch object only if the chain was found in cache
|
|
|
9c35a8 |
and a chain add object only if the chain was not found. Yet with
|
|
|
9c35a8 |
concurrent ruleset updates, one has to expect both situations:
|
|
|
9c35a8 |
|
|
|
9c35a8 |
* If a chain vanishes, the rule flush job must be skipped and instead
|
|
|
9c35a8 |
the chain add job become active.
|
|
|
9c35a8 |
|
|
|
9c35a8 |
* If a chain appears, the chain add job must be skipped and instead
|
|
|
9c35a8 |
rules flushed.
|
|
|
9c35a8 |
|
|
|
9c35a8 |
Change the code accordingly: Create both batch objects and set their
|
|
|
9c35a8 |
'skip' field depending on the situation in cache and adjust both in
|
|
|
9c35a8 |
nft_refresh_transaction().
|
|
|
9c35a8 |
|
|
|
9c35a8 |
As a side-effect, the implicit rule flush becomes explicit and all
|
|
|
9c35a8 |
handling of implicit batch jobs is dropped along with the related field
|
|
|
9c35a8 |
indicating such.
|
|
|
9c35a8 |
|
|
|
9c35a8 |
Reuse the 'implicit' parameter of __nft_rule_flush() to control the
|
|
|
9c35a8 |
initial 'skip' field value instead.
|
|
|
9c35a8 |
|
|
|
9c35a8 |
A subtle caveat is vanishing of existing chains: Creating the chain add
|
|
|
9c35a8 |
job based on the chain in cache causes a netlink message containing that
|
|
|
9c35a8 |
chain's handle which the kernel dislikes. Therefore unset the chain's
|
|
|
9c35a8 |
handle in that case.
|
|
|
9c35a8 |
|
|
|
9c35a8 |
Fixes: 58d7de0181f61 ("xtables: handle concurrent ruleset modifications")
|
|
|
9c35a8 |
Signed-off-by: Phil Sutter <phil@nwl.cc>
|
|
|
9c35a8 |
(cherry picked from commit dac904bdcd9a18aabafee7275ccf0c2bd53800f3)
|
|
|
9c35a8 |
|
|
|
9c35a8 |
Conflicts:
|
|
|
9c35a8 |
iptables/nft.c
|
|
|
9c35a8 |
-> Upstream changed good/bad return codes of nft_chain_restore()
|
|
|
9c35a8 |
function.
|
|
|
9c35a8 |
|
|
|
9c35a8 |
Signed-off-by: Phil Sutter <psutter@redhat.com>
|
|
|
9c35a8 |
---
|
|
|
9c35a8 |
iptables/nft.c | 58 ++++++++++---------
|
|
|
9c35a8 |
.../ipt-restore/0016-concurrent-restores_0 | 53 +++++++++++++++++
|
|
|
9c35a8 |
2 files changed, 83 insertions(+), 28 deletions(-)
|
|
|
9c35a8 |
create mode 100755 iptables/tests/shell/testcases/ipt-restore/0016-concurrent-restores_0
|
|
|
9c35a8 |
|
|
|
9c35a8 |
diff --git a/iptables/nft.c b/iptables/nft.c
|
|
|
9c35a8 |
index d661ac2cafda6..dc5490c085364 100644
|
|
|
9c35a8 |
--- a/iptables/nft.c
|
|
|
9c35a8 |
+++ b/iptables/nft.c
|
|
|
9c35a8 |
@@ -283,7 +283,6 @@ struct obj_update {
|
|
|
9c35a8 |
struct list_head head;
|
|
|
9c35a8 |
enum obj_update_type type:8;
|
|
|
9c35a8 |
uint8_t skip:1;
|
|
|
9c35a8 |
- uint8_t implicit:1;
|
|
|
9c35a8 |
unsigned int seq;
|
|
|
9c35a8 |
union {
|
|
|
9c35a8 |
struct nftnl_table *table;
|
|
|
9c35a8 |
@@ -1650,7 +1649,7 @@ int nft_rule_save(struct nft_handle *h, const char *table, unsigned int format)
|
|
|
9c35a8 |
|
|
|
9c35a8 |
static void
|
|
|
9c35a8 |
__nft_rule_flush(struct nft_handle *h, const char *table,
|
|
|
9c35a8 |
- const char *chain, bool verbose, bool implicit)
|
|
|
9c35a8 |
+ const char *chain, bool verbose, bool skip)
|
|
|
9c35a8 |
{
|
|
|
9c35a8 |
struct obj_update *obj;
|
|
|
9c35a8 |
struct nftnl_rule *r;
|
|
|
9c35a8 |
@@ -1672,7 +1671,7 @@ __nft_rule_flush(struct nft_handle *h, const char *table,
|
|
|
9c35a8 |
return;
|
|
|
9c35a8 |
}
|
|
|
9c35a8 |
|
|
|
9c35a8 |
- obj->implicit = implicit;
|
|
|
9c35a8 |
+ obj->skip = skip;
|
|
|
9c35a8 |
}
|
|
|
9c35a8 |
|
|
|
9c35a8 |
int nft_rule_flush(struct nft_handle *h, const char *chain, const char *table,
|
|
|
9c35a8 |
@@ -1768,17 +1767,12 @@ int nft_chain_user_add(struct nft_handle *h, const char *chain, const char *tabl
|
|
|
9c35a8 |
int nft_chain_restore(struct nft_handle *h, const char *chain, const char *table)
|
|
|
9c35a8 |
{
|
|
|
9c35a8 |
struct nftnl_chain_list *list;
|
|
|
9c35a8 |
+ struct obj_update *obj;
|
|
|
9c35a8 |
struct nftnl_chain *c;
|
|
|
9c35a8 |
bool created = false;
|
|
|
9c35a8 |
|
|
|
9c35a8 |
c = nft_chain_find(h, table, chain);
|
|
|
9c35a8 |
- if (c) {
|
|
|
9c35a8 |
- /* Apparently -n still flushes existing user defined
|
|
|
9c35a8 |
- * chains that are redefined.
|
|
|
9c35a8 |
- */
|
|
|
9c35a8 |
- if (h->noflush)
|
|
|
9c35a8 |
- __nft_rule_flush(h, table, chain, false, true);
|
|
|
9c35a8 |
- } else {
|
|
|
9c35a8 |
+ if (!c) {
|
|
|
9c35a8 |
c = nftnl_chain_alloc();
|
|
|
9c35a8 |
if (!c)
|
|
|
9c35a8 |
return -1;
|
|
|
9c35a8 |
@@ -1786,20 +1780,26 @@ int nft_chain_restore(struct nft_handle *h, const char *chain, const char *table
|
|
|
9c35a8 |
nftnl_chain_set_str(c, NFTNL_CHAIN_TABLE, table);
|
|
|
9c35a8 |
nftnl_chain_set_str(c, NFTNL_CHAIN_NAME, chain);
|
|
|
9c35a8 |
created = true;
|
|
|
9c35a8 |
- }
|
|
|
9c35a8 |
|
|
|
9c35a8 |
- if (h->family == NFPROTO_BRIDGE)
|
|
|
9c35a8 |
- nftnl_chain_set_u32(c, NFTNL_CHAIN_POLICY, NF_ACCEPT);
|
|
|
9c35a8 |
+ list = nft_chain_list_get(h, table, chain);
|
|
|
9c35a8 |
+ if (list)
|
|
|
9c35a8 |
+ nftnl_chain_list_add(c, list);
|
|
|
9c35a8 |
+ } else {
|
|
|
9c35a8 |
+ /* If the chain should vanish meanwhile, kernel genid changes
|
|
|
9c35a8 |
+ * and the transaction is refreshed enabling the chain add
|
|
|
9c35a8 |
+ * object. With the handle still set, kernel interprets it as a
|
|
|
9c35a8 |
+ * chain replace job and errors since it is not found anymore.
|
|
|
9c35a8 |
+ */
|
|
|
9c35a8 |
+ nftnl_chain_unset(c, NFTNL_CHAIN_HANDLE);
|
|
|
9c35a8 |
+ }
|
|
|
9c35a8 |
|
|
|
9c35a8 |
- if (!created)
|
|
|
9c35a8 |
- return 0;
|
|
|
9c35a8 |
+ __nft_rule_flush(h, table, chain, false, created);
|
|
|
9c35a8 |
|
|
|
9c35a8 |
- if (!batch_chain_add(h, NFT_COMPAT_CHAIN_USER_ADD, c))
|
|
|
9c35a8 |
+ obj = batch_chain_add(h, NFT_COMPAT_CHAIN_USER_ADD, c);
|
|
|
9c35a8 |
+ if (!obj)
|
|
|
9c35a8 |
return -1;
|
|
|
9c35a8 |
|
|
|
9c35a8 |
- list = nft_chain_list_get(h, table, chain);
|
|
|
9c35a8 |
- if (list)
|
|
|
9c35a8 |
- nftnl_chain_list_add(c, list);
|
|
|
9c35a8 |
+ obj->skip = !created;
|
|
|
9c35a8 |
|
|
|
9c35a8 |
return 0;
|
|
|
9c35a8 |
}
|
|
|
9c35a8 |
@@ -2693,11 +2693,6 @@ static void nft_refresh_transaction(struct nft_handle *h)
|
|
|
9c35a8 |
h->error.lineno = 0;
|
|
|
9c35a8 |
|
|
|
9c35a8 |
list_for_each_entry_safe(n, tmp, &h->obj_list, head) {
|
|
|
9c35a8 |
- if (n->implicit) {
|
|
|
9c35a8 |
- batch_obj_del(h, n);
|
|
|
9c35a8 |
- continue;
|
|
|
9c35a8 |
- }
|
|
|
9c35a8 |
-
|
|
|
9c35a8 |
switch (n->type) {
|
|
|
9c35a8 |
case NFT_COMPAT_TABLE_FLUSH:
|
|
|
9c35a8 |
tablename = nftnl_table_get_str(n->table, NFTNL_TABLE_NAME);
|
|
|
9c35a8 |
@@ -2723,14 +2718,22 @@ static void nft_refresh_transaction(struct nft_handle *h)
|
|
|
9c35a8 |
|
|
|
9c35a8 |
c = nft_chain_find(h, tablename, chainname);
|
|
|
9c35a8 |
if (c) {
|
|
|
9c35a8 |
- /* -restore -n flushes existing rules from redefined user-chain */
|
|
|
9c35a8 |
- __nft_rule_flush(h, tablename,
|
|
|
9c35a8 |
- chainname, false, true);
|
|
|
9c35a8 |
n->skip = 1;
|
|
|
9c35a8 |
} else if (!c) {
|
|
|
9c35a8 |
n->skip = 0;
|
|
|
9c35a8 |
}
|
|
|
9c35a8 |
break;
|
|
|
9c35a8 |
+ case NFT_COMPAT_RULE_FLUSH:
|
|
|
9c35a8 |
+ tablename = nftnl_rule_get_str(n->rule, NFTNL_RULE_TABLE);
|
|
|
9c35a8 |
+ if (!tablename)
|
|
|
9c35a8 |
+ continue;
|
|
|
9c35a8 |
+
|
|
|
9c35a8 |
+ chainname = nftnl_rule_get_str(n->rule, NFTNL_RULE_CHAIN);
|
|
|
9c35a8 |
+ if (!chainname)
|
|
|
9c35a8 |
+ continue;
|
|
|
9c35a8 |
+
|
|
|
9c35a8 |
+ n->skip = !nft_chain_find(h, tablename, chainname);
|
|
|
9c35a8 |
+ break;
|
|
|
9c35a8 |
case NFT_COMPAT_TABLE_ADD:
|
|
|
9c35a8 |
case NFT_COMPAT_CHAIN_ADD:
|
|
|
9c35a8 |
case NFT_COMPAT_CHAIN_ZERO:
|
|
|
9c35a8 |
@@ -2742,7 +2745,6 @@ static void nft_refresh_transaction(struct nft_handle *h)
|
|
|
9c35a8 |
case NFT_COMPAT_RULE_INSERT:
|
|
|
9c35a8 |
case NFT_COMPAT_RULE_REPLACE:
|
|
|
9c35a8 |
case NFT_COMPAT_RULE_DELETE:
|
|
|
9c35a8 |
- case NFT_COMPAT_RULE_FLUSH:
|
|
|
9c35a8 |
case NFT_COMPAT_SET_ADD:
|
|
|
9c35a8 |
break;
|
|
|
9c35a8 |
}
|
|
|
9c35a8 |
diff --git a/iptables/tests/shell/testcases/ipt-restore/0016-concurrent-restores_0 b/iptables/tests/shell/testcases/ipt-restore/0016-concurrent-restores_0
|
|
|
9c35a8 |
new file mode 100755
|
|
|
9c35a8 |
index 0000000000000..53ec12fa368af
|
|
|
9c35a8 |
--- /dev/null
|
|
|
9c35a8 |
+++ b/iptables/tests/shell/testcases/ipt-restore/0016-concurrent-restores_0
|
|
|
9c35a8 |
@@ -0,0 +1,53 @@
|
|
|
9c35a8 |
+#!/bin/bash
|
|
|
9c35a8 |
+
|
|
|
9c35a8 |
+set -e
|
|
|
9c35a8 |
+
|
|
|
9c35a8 |
+RS="*filter
|
|
|
9c35a8 |
+:INPUT ACCEPT [12024:3123388]
|
|
|
9c35a8 |
+:FORWARD ACCEPT [0:0]
|
|
|
9c35a8 |
+:OUTPUT ACCEPT [12840:2144421]
|
|
|
9c35a8 |
+:FOO - [0:0]
|
|
|
9c35a8 |
+:BAR0 - [0:0]
|
|
|
9c35a8 |
+:BAR1 - [0:0]
|
|
|
9c35a8 |
+:BAR2 - [0:0]
|
|
|
9c35a8 |
+:BAR3 - [0:0]
|
|
|
9c35a8 |
+:BAR4 - [0:0]
|
|
|
9c35a8 |
+:BAR5 - [0:0]
|
|
|
9c35a8 |
+:BAR6 - [0:0]
|
|
|
9c35a8 |
+:BAR7 - [0:0]
|
|
|
9c35a8 |
+:BAR8 - [0:0]
|
|
|
9c35a8 |
+:BAR9 - [0:0]
|
|
|
9c35a8 |
+"
|
|
|
9c35a8 |
+
|
|
|
9c35a8 |
+RS1="$RS
|
|
|
9c35a8 |
+-X BAR3
|
|
|
9c35a8 |
+-X BAR6
|
|
|
9c35a8 |
+-X BAR9
|
|
|
9c35a8 |
+-A FOO -s 9.9.0.1/32 -j BAR1
|
|
|
9c35a8 |
+-A FOO -s 9.9.0.2/32 -j BAR2
|
|
|
9c35a8 |
+-A FOO -s 9.9.0.4/32 -j BAR4
|
|
|
9c35a8 |
+-A FOO -s 9.9.0.5/32 -j BAR5
|
|
|
9c35a8 |
+-A FOO -s 9.9.0.7/32 -j BAR7
|
|
|
9c35a8 |
+-A FOO -s 9.9.0.8/32 -j BAR8
|
|
|
9c35a8 |
+COMMIT
|
|
|
9c35a8 |
+"
|
|
|
9c35a8 |
+
|
|
|
9c35a8 |
+RS2="$RS
|
|
|
9c35a8 |
+-X BAR2
|
|
|
9c35a8 |
+-X BAR5
|
|
|
9c35a8 |
+-X BAR7
|
|
|
9c35a8 |
+-A FOO -s 9.9.0.1/32 -j BAR1
|
|
|
9c35a8 |
+-A FOO -s 9.9.0.3/32 -j BAR3
|
|
|
9c35a8 |
+-A FOO -s 9.9.0.4/32 -j BAR4
|
|
|
9c35a8 |
+-A FOO -s 9.9.0.6/32 -j BAR6
|
|
|
9c35a8 |
+-A FOO -s 9.9.0.8/32 -j BAR8
|
|
|
9c35a8 |
+-A FOO -s 9.9.0.9/32 -j BAR9
|
|
|
9c35a8 |
+COMMIT
|
|
|
9c35a8 |
+"
|
|
|
9c35a8 |
+
|
|
|
9c35a8 |
+for n in $(seq 1 10); do
|
|
|
9c35a8 |
+ $XT_MULTI iptables-restore --noflush -w <<< "$RS1" &
|
|
|
9c35a8 |
+ $XT_MULTI iptables-restore --noflush -w <<< "$RS2" &
|
|
|
9c35a8 |
+ wait -n
|
|
|
9c35a8 |
+ wait -n
|
|
|
9c35a8 |
+done
|
|
|
9c35a8 |
--
|
|
|
9c35a8 |
2.28.0
|
|
|
9c35a8 |
|