linux/net/netfilter/nf_bpf_link.c
D. Wythe 1834d62ae8 netfilter: bpf: fix bad registration on nf_defrag
We should pass a pointer to global_hook to the get_proto_defrag_hook()
instead of its value, since the passed value won't be updated even if
the request module was loaded successfully.

Log:

[   54.915713] nf_defrag_ipv4 has bad registration
[   54.915779] WARNING: CPU: 3 PID: 6323 at net/netfilter/nf_bpf_link.c:62 get_proto_defrag_hook+0x137/0x160
[   54.915835] CPU: 3 PID: 6323 Comm: fentry Kdump: loaded Tainted: G            E      6.7.0-rc2+ #35
[   54.915839] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014
[   54.915841] RIP: 0010:get_proto_defrag_hook+0x137/0x160
[   54.915844] Code: 4f 8c e8 2c cf 68 ff 80 3d db 83 9a 01 00 0f 85 74 ff ff ff 48 89 ee 48 c7 c7 8f 12 4f 8c c6 05 c4 83 9a 01 01 e8 09 ee 5f ff <0f> 0b e9 57 ff ff ff 49 8b 3c 24 4c 63 e5 e8 36 28 6c ff 4c 89 e0
[   54.915849] RSP: 0018:ffffb676003fbdb0 EFLAGS: 00010286
[   54.915852] RAX: 0000000000000023 RBX: ffff9596503d5600 RCX: ffff95996fce08c8
[   54.915854] RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffff95996fce08c0
[   54.915855] RBP: ffffffff8c4f12de R08: 0000000000000000 R09: 00000000fffeffff
[   54.915859] R10: ffffb676003fbc70 R11: ffffffff8d363ae8 R12: 0000000000000000
[   54.915861] R13: ffffffff8e1f75c0 R14: ffffb676003c9000 R15: 00007ffd15e78ef0
[   54.915864] FS:  00007fb6e9cab740(0000) GS:ffff95996fcc0000(0000) knlGS:0000000000000000
[   54.915867] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   54.915868] CR2: 00007ffd15e75c40 CR3: 0000000101e62006 CR4: 0000000000360ef0
[   54.915870] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   54.915871] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   54.915873] Call Trace:
[   54.915891]  <TASK>
[   54.915894]  ? __warn+0x84/0x140
[   54.915905]  ? get_proto_defrag_hook+0x137/0x160
[   54.915908]  ? __report_bug+0xea/0x100
[   54.915925]  ? report_bug+0x2b/0x80
[   54.915928]  ? handle_bug+0x3c/0x70
[   54.915939]  ? exc_invalid_op+0x18/0x70
[   54.915942]  ? asm_exc_invalid_op+0x1a/0x20
[   54.915948]  ? get_proto_defrag_hook+0x137/0x160
[   54.915950]  bpf_nf_link_attach+0x1eb/0x240
[   54.915953]  link_create+0x173/0x290
[   54.915969]  __sys_bpf+0x588/0x8f0
[   54.915974]  __x64_sys_bpf+0x20/0x30
[   54.915977]  do_syscall_64+0x45/0xf0
[   54.915989]  entry_SYSCALL_64_after_hwframe+0x6e/0x76
[   54.915998] RIP: 0033:0x7fb6e9daa51d
[   54.916001] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 2b 89 0c 00 f7 d8 64 89 01 48
[   54.916003] RSP: 002b:00007ffd15e78ed8 EFLAGS: 00000246 ORIG_RAX: 0000000000000141
[   54.916006] RAX: ffffffffffffffda RBX: 00007ffd15e78fc0 RCX: 00007fb6e9daa51d
[   54.916007] RDX: 0000000000000040 RSI: 00007ffd15e78ef0 RDI: 000000000000001c
[   54.916009] RBP: 000000000000002d R08: 00007fb6e9e73a60 R09: 0000000000000001
[   54.916010] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000006
[   54.916012] R13: 0000000000000006 R14: 0000000000000000 R15: 0000000000000000
[   54.916014]  </TASK>
[   54.916015] ---[ end trace 0000000000000000 ]---

Fixes: 91721c2d02 ("netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link")
Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
Acked-by: Daniel Xu <dxu@dxuuu.xyz>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2023-12-06 17:14:26 +01:00

324 lines
7.6 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/kmod.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_bpf_link.h>
#include <uapi/linux/netfilter_ipv4.h>
static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
const struct nf_hook_state *s)
{
const struct bpf_prog *prog = bpf_prog;
struct bpf_nf_ctx ctx = {
.state = s,
.skb = skb,
};
return bpf_prog_run(prog, &ctx);
}
struct bpf_nf_link {
struct bpf_link link;
struct nf_hook_ops hook_ops;
struct net *net;
u32 dead;
const struct nf_defrag_hook *defrag_hook;
};
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
static const struct nf_defrag_hook *
get_proto_defrag_hook(struct bpf_nf_link *link,
const struct nf_defrag_hook __rcu **ptr_global_hook,
const char *mod)
{
const struct nf_defrag_hook *hook;
int err;
/* RCU protects us from races against module unloading */
rcu_read_lock();
hook = rcu_dereference(*ptr_global_hook);
if (!hook) {
rcu_read_unlock();
err = request_module(mod);
if (err)
return ERR_PTR(err < 0 ? err : -EINVAL);
rcu_read_lock();
hook = rcu_dereference(*ptr_global_hook);
}
if (hook && try_module_get(hook->owner)) {
/* Once we have a refcnt on the module, we no longer need RCU */
hook = rcu_pointer_handoff(hook);
} else {
WARN_ONCE(!hook, "%s has bad registration", mod);
hook = ERR_PTR(-ENOENT);
}
rcu_read_unlock();
if (!IS_ERR(hook)) {
err = hook->enable(link->net);
if (err) {
module_put(hook->owner);
hook = ERR_PTR(err);
}
}
return hook;
}
#endif
static int bpf_nf_enable_defrag(struct bpf_nf_link *link)
{
const struct nf_defrag_hook __maybe_unused *hook;
switch (link->hook_ops.pf) {
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
case NFPROTO_IPV4:
hook = get_proto_defrag_hook(link, &nf_defrag_v4_hook, "nf_defrag_ipv4");
if (IS_ERR(hook))
return PTR_ERR(hook);
link->defrag_hook = hook;
return 0;
#endif
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
case NFPROTO_IPV6:
hook = get_proto_defrag_hook(link, &nf_defrag_v6_hook, "nf_defrag_ipv6");
if (IS_ERR(hook))
return PTR_ERR(hook);
link->defrag_hook = hook;
return 0;
#endif
default:
return -EAFNOSUPPORT;
}
}
static void bpf_nf_disable_defrag(struct bpf_nf_link *link)
{
const struct nf_defrag_hook *hook = link->defrag_hook;
if (!hook)
return;
hook->disable(link->net);
module_put(hook->owner);
}
static void bpf_nf_link_release(struct bpf_link *link)
{
struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
if (nf_link->dead)
return;
/* do not double release in case .detach was already called */
if (!cmpxchg(&nf_link->dead, 0, 1)) {
nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
bpf_nf_disable_defrag(nf_link);
}
}
static void bpf_nf_link_dealloc(struct bpf_link *link)
{
struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
kfree(nf_link);
}
static int bpf_nf_link_detach(struct bpf_link *link)
{
bpf_nf_link_release(link);
return 0;
}
static void bpf_nf_link_show_info(const struct bpf_link *link,
struct seq_file *seq)
{
struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
seq_printf(seq, "pf:\t%u\thooknum:\t%u\tprio:\t%d\n",
nf_link->hook_ops.pf, nf_link->hook_ops.hooknum,
nf_link->hook_ops.priority);
}
static int bpf_nf_link_fill_link_info(const struct bpf_link *link,
struct bpf_link_info *info)
{
struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
info->netfilter.pf = nf_link->hook_ops.pf;
info->netfilter.hooknum = nf_link->hook_ops.hooknum;
info->netfilter.priority = nf_link->hook_ops.priority;
info->netfilter.flags = 0;
return 0;
}
static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
struct bpf_prog *old_prog)
{
return -EOPNOTSUPP;
}
static const struct bpf_link_ops bpf_nf_link_lops = {
.release = bpf_nf_link_release,
.dealloc = bpf_nf_link_dealloc,
.detach = bpf_nf_link_detach,
.show_fdinfo = bpf_nf_link_show_info,
.fill_link_info = bpf_nf_link_fill_link_info,
.update_prog = bpf_nf_link_update,
};
static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
{
int prio;
switch (attr->link_create.netfilter.pf) {
case NFPROTO_IPV4:
case NFPROTO_IPV6:
if (attr->link_create.netfilter.hooknum >= NF_INET_NUMHOOKS)
return -EPROTO;
break;
default:
return -EAFNOSUPPORT;
}
if (attr->link_create.netfilter.flags & ~BPF_F_NETFILTER_IP_DEFRAG)
return -EOPNOTSUPP;
/* make sure conntrack confirm is always last */
prio = attr->link_create.netfilter.priority;
if (prio == NF_IP_PRI_FIRST)
return -ERANGE; /* sabotage_in and other warts */
else if (prio == NF_IP_PRI_LAST)
return -ERANGE; /* e.g. conntrack confirm */
else if ((attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) &&
prio <= NF_IP_PRI_CONNTRACK_DEFRAG)
return -ERANGE; /* cannot use defrag if prog runs before nf_defrag */
return 0;
}
int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
struct net *net = current->nsproxy->net_ns;
struct bpf_link_primer link_primer;
struct bpf_nf_link *link;
int err;
if (attr->link_create.flags)
return -EINVAL;
err = bpf_nf_check_pf_and_hooks(attr);
if (err)
return err;
link = kzalloc(sizeof(*link), GFP_USER);
if (!link)
return -ENOMEM;
bpf_link_init(&link->link, BPF_LINK_TYPE_NETFILTER, &bpf_nf_link_lops, prog);
link->hook_ops.hook = nf_hook_run_bpf;
link->hook_ops.hook_ops_type = NF_HOOK_OP_BPF;
link->hook_ops.priv = prog;
link->hook_ops.pf = attr->link_create.netfilter.pf;
link->hook_ops.priority = attr->link_create.netfilter.priority;
link->hook_ops.hooknum = attr->link_create.netfilter.hooknum;
link->net = net;
link->dead = false;
link->defrag_hook = NULL;
err = bpf_link_prime(&link->link, &link_primer);
if (err) {
kfree(link);
return err;
}
if (attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) {
err = bpf_nf_enable_defrag(link);
if (err) {
bpf_link_cleanup(&link_primer);
return err;
}
}
err = nf_register_net_hook(net, &link->hook_ops);
if (err) {
bpf_nf_disable_defrag(link);
bpf_link_cleanup(&link_primer);
return err;
}
return bpf_link_settle(&link_primer);
}
const struct bpf_prog_ops netfilter_prog_ops = {
.test_run = bpf_prog_test_run_nf,
};
static bool nf_ptr_to_btf_id(struct bpf_insn_access_aux *info, const char *name)
{
struct btf *btf;
s32 type_id;
btf = bpf_get_btf_vmlinux();
if (IS_ERR_OR_NULL(btf))
return false;
type_id = btf_find_by_name_kind(btf, name, BTF_KIND_STRUCT);
if (WARN_ON_ONCE(type_id < 0))
return false;
info->btf = btf;
info->btf_id = type_id;
info->reg_type = PTR_TO_BTF_ID | PTR_TRUSTED;
return true;
}
static bool nf_is_valid_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (off < 0 || off >= sizeof(struct bpf_nf_ctx))
return false;
if (type == BPF_WRITE)
return false;
switch (off) {
case bpf_ctx_range(struct bpf_nf_ctx, skb):
if (size != sizeof_field(struct bpf_nf_ctx, skb))
return false;
return nf_ptr_to_btf_id(info, "sk_buff");
case bpf_ctx_range(struct bpf_nf_ctx, state):
if (size != sizeof_field(struct bpf_nf_ctx, state))
return false;
return nf_ptr_to_btf_id(info, "nf_hook_state");
default:
return false;
}
return false;
}
static const struct bpf_func_proto *
bpf_nf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
return bpf_base_func_proto(func_id);
}
const struct bpf_verifier_ops netfilter_verifier_ops = {
.is_valid_access = nf_is_valid_access,
.get_func_proto = bpf_nf_func_proto,
};