From 3840538ad384fb7891adeeaf36624f870c51fc0e Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 18 Jul 2017 14:56:17 +0200 Subject: [PATCH 1/5] netfilter: ipt_CLUSTERIP: fix use-after-free of proc entry When we delete a netns with a CLUSTERIP rule, clusterip_net_exit() is called first, removing /proc/net/ipt_CLUSTERIP. Then clusterip_config_entry_put() is called from clusterip_tg_destroy(), and tries to remove its entry under /proc/net/ipt_CLUSTERIP/. Fix this by checking that the parent directory of the entry to remove hasn't already been deleted. The following triggers a KASAN splat (stealing the reproducer from 202f59afd441, thanks to Jianlin Shi and Xin Long): ip netns add test ip link add veth0_in type veth peer name veth0_out ip link set veth0_in netns test ip netns exec test ip link set lo up ip netns exec test ip link set veth0_in up ip netns exec test iptables -I INPUT -d 1.2.3.4 -i veth0_in -j \ CLUSTERIP --new --clustermac 89:d4:47:eb:9a:fa --total-nodes 3 \ --local-node 1 --hashmode sourceip-sourceport ip netns del test Fixes: ce4ff76c15a8 ("netfilter: ipt_CLUSTERIP: make proc directory per net namespace") Signed-off-by: Sabrina Dubroca Reviewed-by: Xin Long Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 7d72decb80f9..efaa04dcc80e 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -117,7 +117,8 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c) * functions are also incrementing the refcount on their own, * so it's safe to remove the entry even if it's in use. */ #ifdef CONFIG_PROC_FS - proc_remove(c->pde); + if (cn->procdir) + proc_remove(c->pde); #endif return; } @@ -815,6 +816,7 @@ static void clusterip_net_exit(struct net *net) #ifdef CONFIG_PROC_FS struct clusterip_net *cn = net_generic(net, clusterip_net_id); proc_remove(cn->procdir); + cn->procdir = NULL; #endif nf_unregister_net_hook(net, &cip_arp_ops); } From f7fb77fc12352d15180dc3c08ffba10573d5167d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 18 Jul 2017 20:03:05 +0200 Subject: [PATCH 2/5] netfilter: nft_compat: check extension hook mask only if set If the x_tables extension comes with no hook mask, skip this validation. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index f5a7cb68694e..b89f4f65b2a0 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -305,7 +305,7 @@ static int nft_target_validate(const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; - if (!(hook_mask & target->hooks)) + if (target->hooks && !(hook_mask & target->hooks)) return -EINVAL; ret = nft_compat_chain_validate_dependency(target->table, @@ -484,7 +484,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; - if (!(hook_mask & match->hooks)) + if (match->hooks && !(hook_mask & match->hooks)) return -EINVAL; ret = nft_compat_chain_validate_dependency(match->table, From 9beceb54fa2c0b47532dd2b07f37e410641cf9b2 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Jul 2017 11:22:04 +0900 Subject: [PATCH 3/5] netfilter: x_tables: Fix use-after-free in ipt_do_table. If verdict is NF_STOLEN in the SYNPROXY target, the skb is consumed. However, ipt_do_table() always tries to get ip header from the skb. So that, KASAN triggers the use-after-free message. We can reproduce this message using below command. # iptables -I INPUT -p tcp -j SYNPROXY --mss 1460 [ 193.542265] BUG: KASAN: use-after-free in ipt_do_table+0x1405/0x1c10 [ ... ] [ 193.578603] Call Trace: [ 193.581590] [ 193.584107] dump_stack+0x68/0xa0 [ 193.588168] print_address_description+0x78/0x290 [ 193.593828] ? ipt_do_table+0x1405/0x1c10 [ 193.598690] kasan_report+0x230/0x340 [ 193.603194] __asan_report_load2_noabort+0x19/0x20 [ 193.608950] ipt_do_table+0x1405/0x1c10 [ 193.613591] ? rcu_read_lock_held+0xae/0xd0 [ 193.618631] ? ip_route_input_rcu+0x27d7/0x4270 [ 193.624348] ? ipt_do_table+0xb68/0x1c10 [ 193.629124] ? do_add_counters+0x620/0x620 [ 193.634234] ? iptable_filter_net_init+0x60/0x60 [ ... ] After this patch, only when verdict is XT_CONTINUE, ipt_do_table() tries to get ip header. Also arpt_do_table() is modified because it has same bug. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 10 +++++----- net/ipv4/netfilter/ip_tables.c | 9 +++++---- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 0bc3c3d73e61..9e9d9afd18f7 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -268,14 +268,14 @@ unsigned int arpt_do_table(struct sk_buff *skb, acpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, &acpar); - /* Target might have changed stuff. */ - arp = arp_hdr(skb); - - if (verdict == XT_CONTINUE) + if (verdict == XT_CONTINUE) { + /* Target might have changed stuff. */ + arp = arp_hdr(skb); e = arpt_next_entry(e); - else + } else { /* Verdict */ break; + } } while (!acpar.hotdrop); xt_write_recseq_end(addend); local_bh_enable(); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 2a55a40211cb..622ed2887cd5 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -352,13 +352,14 @@ ipt_do_table(struct sk_buff *skb, acpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, &acpar); - /* Target might have changed stuff. */ - ip = ip_hdr(skb); - if (verdict == XT_CONTINUE) + if (verdict == XT_CONTINUE) { + /* Target might have changed stuff. */ + ip = ip_hdr(skb); e = ipt_next_entry(e); - else + } else { /* Verdict */ break; + } } while (!acpar.hotdrop); xt_write_recseq_end(addend); From ab6dd1beac7be3c17f8bf3d38bdf29ecb7293f1e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 10 Aug 2017 10:22:24 +0800 Subject: [PATCH 4/5] netfilter: check for seqadj ext existence before adding it in nf_nat_setup_info Commit 4440a2ab3b9f ("netfilter: synproxy: Check oom when adding synproxy and seqadj ct extensions") wanted to drop the packet when it fails to add seqadj ext due to no memory by checking if nfct_seqadj_ext_add returns NULL. But that nfct_seqadj_ext_add returns NULL can also happen when seqadj ext already exists in a nf_conn. It will cause that userspace protocol doesn't work when both dnat and snat are configured. Li Shuang found this issue in the case: Topo: ftp client router ftp server 10.167.131.2 <-> 10.167.131.254 10.167.141.254 <-> 10.167.141.1 Rules: # iptables -t nat -A PREROUTING -i eth1 -p tcp -m tcp --dport 21 -j \ DNAT --to-destination 10.167.141.1 # iptables -t nat -A POSTROUTING -o eth2 -p tcp -m tcp --dport 21 -j \ SNAT --to-source 10.167.141.254 In router, when both dnat and snat are added, nf_nat_setup_info will be called twice. The packet can be dropped at the 2nd time for DNAT due to seqadj ext is already added at the 1st time for SNAT. This patch is to fix it by checking for seqadj ext existence before adding it, so that the packet will not be dropped if seqadj ext already exists. Note that as Florian mentioned, as a long term, we should review ext_add() behaviour, it's better to return a pointer to the existing ext instead. Fixes: 4440a2ab3b9f ("netfilter: synproxy: Check oom when adding synproxy and seqadj ct extensions") Reported-by: Li Shuang Acked-by: Florian Westphal Signed-off-by: Xin Long Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index eb541786ccb7..b1d3740ae36a 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -441,7 +441,7 @@ nf_nat_setup_info(struct nf_conn *ct, else ct->status |= IPS_DST_NAT; - if (nfct_help(ct)) + if (nfct_help(ct) && !nfct_seqadj(ct)) if (!nfct_seqadj_ext_add(ct)) return NF_DROP; } From c26844eda9d4fdbd266660e3b3de2d0270e3a1ed Mon Sep 17 00:00:00 2001 From: andy zhou Date: Mon, 21 Aug 2017 12:38:53 -0700 Subject: [PATCH 5/5] netfilter: nf_tables: Fix nft limit burst handling Current implementation treats the burst configuration the same as rate configuration. This can cause the per packet cost to be lower than configured. In effect, this bug causes the token bucket to be refilled at a higher rate than what user has specified. This patch changes the implementation so that the token bucket size is controlled by "rate + burst", while maintain the token bucket refill rate the same as user specified. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Andy Zhou Acked-by: Joe Stringer Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_limit.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 18dd57a52651..14538b1d4d11 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -65,19 +65,23 @@ static int nft_limit_init(struct nft_limit *limit, limit->nsecs = unit * NSEC_PER_SEC; if (limit->rate == 0 || limit->nsecs < unit) return -EOVERFLOW; - limit->tokens = limit->tokens_max = limit->nsecs; - - if (tb[NFTA_LIMIT_BURST]) { - u64 rate; + if (tb[NFTA_LIMIT_BURST]) limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST])); + else + limit->burst = 0; - rate = limit->rate + limit->burst; - if (rate < limit->rate) - return -EOVERFLOW; + if (limit->rate + limit->burst < limit->rate) + return -EOVERFLOW; + + /* The token bucket size limits the number of tokens can be + * accumulated. tokens_max specifies the bucket size. + * tokens_max = unit * (rate + burst) / rate. + */ + limit->tokens = div_u64(limit->nsecs * (limit->rate + limit->burst), + limit->rate); + limit->tokens_max = limit->tokens; - limit->rate = rate; - } if (tb[NFTA_LIMIT_FLAGS]) { u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS])); @@ -95,9 +99,8 @@ static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit, { u32 flags = limit->invert ? NFT_LIMIT_F_INV : 0; u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC); - u64 rate = limit->rate - limit->burst; - if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate), + if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(limit->rate), NFTA_LIMIT_PAD) || nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs), NFTA_LIMIT_PAD) ||