Skip to content

Commit 53de1e6

Browse files
Florian Westphalgregkh
authored andcommitted
netfilter: nf_tables: avoid chain re-validation if possible
[ Upstream commit 8e1a1bc4f5a42747c08130b8242ebebd1210b32f ] Hamza Mahfooz reports cpu soft lock-ups in nft_chain_validate(): watchdog: BUG: soft lockup - CPU#1 stuck for 27s! [iptables-nft-re:37547] [..] RIP: 0010:nft_chain_validate+0xcb/0x110 [nf_tables] [..] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_table_validate+0x6b/0xb0 [nf_tables] nf_tables_validate+0x8b/0xa0 [nf_tables] nf_tables_commit+0x1df/0x1eb0 [nf_tables] [..] Currently nf_tables will traverse the entire table (chain graph), starting from the entry points (base chains), exploring all possible paths (chain jumps). But there are cases where we could avoid revalidation. Consider: 1 input -> j2 -> j3 2 input -> j2 -> j3 3 input -> j1 -> j2 -> j3 Then the second rule does not need to revalidate j2, and, by extension j3, because this was already checked during validation of the first rule. We need to validate it only for rule 3. This is needed because chain loop detection also ensures we do not exceed the jump stack: Just because we know that j2 is cycle free, its last jump might now exceed the allowed stack size. We also need to update all reachable chains with the new largest observed call depth. Care has to be taken to revalidate even if the chain depth won't be an issue: chain validation also ensures that expressions are not called from invalid base chains. For example, the masquerade expression can only be called from NAT postrouting base chains. Therefore we also need to keep record of the base chain context (type, hooknum) and revalidate if the chain becomes reachable from a different hook location. Reported-by: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com> Closes: https://lore.kernel.org/netfilter-devel/20251118221735.GA5477@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net/ Tested-by: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com> Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent c04b3a8 commit 53de1e6

2 files changed

Lines changed: 91 additions & 12 deletions

File tree

include/net/netfilter/nf_tables.h

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,6 +1079,29 @@ struct nft_rule_blob {
10791079
__attribute__((aligned(__alignof__(struct nft_rule_dp))));
10801080
};
10811081

1082+
enum nft_chain_types {
1083+
NFT_CHAIN_T_DEFAULT = 0,
1084+
NFT_CHAIN_T_ROUTE,
1085+
NFT_CHAIN_T_NAT,
1086+
NFT_CHAIN_T_MAX
1087+
};
1088+
1089+
/**
1090+
* struct nft_chain_validate_state - validation state
1091+
*
1092+
* If a chain is encountered again during table validation it is
1093+
* possible to avoid revalidation provided the calling context is
1094+
* compatible. This structure stores relevant calling context of
1095+
* previous validations.
1096+
*
1097+
* @hook_mask: the hook numbers and locations the chain is linked to
1098+
* @depth: the deepest call chain level the chain is linked to
1099+
*/
1100+
struct nft_chain_validate_state {
1101+
u8 hook_mask[NFT_CHAIN_T_MAX];
1102+
u8 depth;
1103+
};
1104+
10821105
/**
10831106
* struct nft_chain - nf_tables chain
10841107
*
@@ -1097,6 +1120,7 @@ struct nft_rule_blob {
10971120
* @udlen: user data length
10981121
* @udata: user data in the chain
10991122
* @blob_next: rule blob pointer to the next in the chain
1123+
* @vstate: validation state
11001124
*/
11011125
struct nft_chain {
11021126
struct nft_rule_blob __rcu *blob_gen_0;
@@ -1116,23 +1140,17 @@ struct nft_chain {
11161140

11171141
/* Only used during control plane commit phase: */
11181142
struct nft_rule_blob *blob_next;
1143+
struct nft_chain_validate_state vstate;
11191144
};
11201145

1121-
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
1146+
int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain);
11221147
int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
11231148
const struct nft_set_iter *iter,
11241149
struct nft_set_elem *elem);
11251150
int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set);
11261151
int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
11271152
void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
11281153

1129-
enum nft_chain_types {
1130-
NFT_CHAIN_T_DEFAULT = 0,
1131-
NFT_CHAIN_T_ROUTE,
1132-
NFT_CHAIN_T_NAT,
1133-
NFT_CHAIN_T_MAX
1134-
};
1135-
11361154
/**
11371155
* struct nft_chain_type - nf_tables chain type info
11381156
*

net/netfilter/nf_tables_api.c

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,29 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
121121

122122
table->validate_state = new_validate_state;
123123
}
124+
125+
static bool nft_chain_vstate_valid(const struct nft_ctx *ctx,
126+
const struct nft_chain *chain)
127+
{
128+
const struct nft_base_chain *base_chain;
129+
enum nft_chain_types type;
130+
u8 hooknum;
131+
132+
if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain)))
133+
return false;
134+
135+
base_chain = nft_base_chain(ctx->chain);
136+
hooknum = base_chain->ops.hooknum;
137+
type = base_chain->type->type;
138+
139+
/* chain is already validated for this call depth */
140+
if (chain->vstate.depth >= ctx->level &&
141+
chain->vstate.hook_mask[type] & BIT(hooknum))
142+
return true;
143+
144+
return false;
145+
}
146+
124147
static void nf_tables_trans_destroy_work(struct work_struct *w);
125148
static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
126149

@@ -3798,6 +3821,29 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
37983821
nf_tables_rule_destroy(ctx, rule);
37993822
}
38003823

3824+
static void nft_chain_vstate_update(const struct nft_ctx *ctx, struct nft_chain *chain)
3825+
{
3826+
const struct nft_base_chain *base_chain;
3827+
enum nft_chain_types type;
3828+
u8 hooknum;
3829+
3830+
/* ctx->chain must hold the calling base chain. */
3831+
if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain))) {
3832+
memset(&chain->vstate, 0, sizeof(chain->vstate));
3833+
return;
3834+
}
3835+
3836+
base_chain = nft_base_chain(ctx->chain);
3837+
hooknum = base_chain->ops.hooknum;
3838+
type = base_chain->type->type;
3839+
3840+
BUILD_BUG_ON(BIT(NF_INET_NUMHOOKS) > U8_MAX);
3841+
3842+
chain->vstate.hook_mask[type] |= BIT(hooknum);
3843+
if (chain->vstate.depth < ctx->level)
3844+
chain->vstate.depth = ctx->level;
3845+
}
3846+
38013847
/** nft_chain_validate - loop detection and hook validation
38023848
*
38033849
* @ctx: context containing call depth and base chain
@@ -3807,15 +3853,25 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
38073853
* and set lookups until either the jump limit is hit or all reachable
38083854
* chains have been validated.
38093855
*/
3810-
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
3856+
int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain)
38113857
{
38123858
struct nft_expr *expr, *last;
38133859
struct nft_rule *rule;
38143860
int err;
38153861

3862+
BUILD_BUG_ON(NFT_JUMP_STACK_SIZE > 255);
38163863
if (ctx->level == NFT_JUMP_STACK_SIZE)
38173864
return -EMLINK;
38183865

3866+
if (ctx->level > 0) {
3867+
/* jumps to base chains are not allowed. */
3868+
if (nft_is_base_chain(chain))
3869+
return -ELOOP;
3870+
3871+
if (nft_chain_vstate_valid(ctx, chain))
3872+
return 0;
3873+
}
3874+
38193875
list_for_each_entry(rule, &chain->rules, list) {
38203876
if (fatal_signal_pending(current))
38213877
return -EINTR;
@@ -3836,6 +3892,7 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
38363892
}
38373893
}
38383894

3895+
nft_chain_vstate_update(ctx, chain);
38393896
return 0;
38403897
}
38413898
EXPORT_SYMBOL_GPL(nft_chain_validate);
@@ -3847,7 +3904,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
38473904
.net = net,
38483905
.family = table->family,
38493906
};
3850-
int err;
3907+
int err = 0;
38513908

38523909
list_for_each_entry(chain, &table->chains, list) {
38533910
if (!nft_is_base_chain(chain))
@@ -3856,12 +3913,16 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
38563913
ctx.chain = chain;
38573914
err = nft_chain_validate(&ctx, chain);
38583915
if (err < 0)
3859-
return err;
3916+
goto err;
38603917

38613918
cond_resched();
38623919
}
38633920

3864-
return 0;
3921+
err:
3922+
list_for_each_entry(chain, &table->chains, list)
3923+
memset(&chain->vstate, 0, sizeof(chain->vstate));
3924+
3925+
return err;
38653926
}
38663927

38673928
int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,

0 commit comments

Comments
 (0)