linux/kernel/bpf/dispatcher.c
Jiri Olsa 4121d4481b bpf: Synchronize dispatcher update with bpf_dispatcher_xdp_func
Hao Sun reported crash in dispatcher image [1].

Currently we don't have any sync between bpf_dispatcher_update and
bpf_dispatcher_xdp_func, so following race is possible:

 cpu 0:                               cpu 1:

 bpf_prog_run_xdp
   ...
   bpf_dispatcher_xdp_func
     in image at offset 0x0

                                      bpf_dispatcher_update
                                        update image at offset 0x800
                                      bpf_dispatcher_update
                                        update image at offset 0x0

     in image at offset 0x0 -> crash

Fixing this by synchronizing dispatcher image update (which is done
in bpf_dispatcher_update function) with bpf_dispatcher_xdp_func that
reads and execute the dispatcher image.

Calling synchronize_rcu after updating and installing new image ensures
that readers leave old image before it's changed in the next dispatcher
update. The update itself is locked with dispatcher's mutex.

The bpf_prog_run_xdp is called under local_bh_disable and synchronize_rcu
will wait for it to leave [2].

[1] https://lore.kernel.org/bpf/Y5SFho7ZYXr9ifRn@krava/T/#m00c29ece654bc9f332a17df493bbca33e702896c
[2] https://lore.kernel.org/bpf/0B62D35A-E695-4B7A-A0D4-774767544C1A@gmail.com/T/#mff43e2c003ae99f4a38f353c7969be4c7162e877

Reported-by: Hao Sun <sunhao.th@gmail.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/r/20221214123542.1389719-1-jolsa@kernel.org
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
2022-12-14 12:02:14 -08:00

174 lines
4.3 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2019 Intel Corporation. */
#include <linux/hash.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/static_call.h>
/* The BPF dispatcher is a multiway branch code generator. The
* dispatcher is a mechanism to avoid the performance penalty of an
* indirect call, which is expensive when retpolines are enabled. A
* dispatch client registers a BPF program into the dispatcher, and if
* there is available room in the dispatcher a direct call to the BPF
* program will be generated. All calls to the BPF programs called via
* the dispatcher will then be a direct call, instead of an
* indirect. The dispatcher hijacks a trampoline function it via the
* __fentry__ of the trampoline. The trampoline function has the
* following signature:
*
* unsigned int trampoline(const void *ctx, const struct bpf_insn *insnsi,
* unsigned int (*bpf_func)(const void *,
* const struct bpf_insn *));
*/
static struct bpf_dispatcher_prog *bpf_dispatcher_find_prog(
struct bpf_dispatcher *d, struct bpf_prog *prog)
{
int i;
for (i = 0; i < BPF_DISPATCHER_MAX; i++) {
if (prog == d->progs[i].prog)
return &d->progs[i];
}
return NULL;
}
static struct bpf_dispatcher_prog *bpf_dispatcher_find_free(
struct bpf_dispatcher *d)
{
return bpf_dispatcher_find_prog(d, NULL);
}
static bool bpf_dispatcher_add_prog(struct bpf_dispatcher *d,
struct bpf_prog *prog)
{
struct bpf_dispatcher_prog *entry;
if (!prog)
return false;
entry = bpf_dispatcher_find_prog(d, prog);
if (entry) {
refcount_inc(&entry->users);
return false;
}
entry = bpf_dispatcher_find_free(d);
if (!entry)
return false;
bpf_prog_inc(prog);
entry->prog = prog;
refcount_set(&entry->users, 1);
d->num_progs++;
return true;
}
static bool bpf_dispatcher_remove_prog(struct bpf_dispatcher *d,
struct bpf_prog *prog)
{
struct bpf_dispatcher_prog *entry;
if (!prog)
return false;
entry = bpf_dispatcher_find_prog(d, prog);
if (!entry)
return false;
if (refcount_dec_and_test(&entry->users)) {
entry->prog = NULL;
bpf_prog_put(prog);
d->num_progs--;
return true;
}
return false;
}
int __weak arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs)
{
return -ENOTSUPP;
}
static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *buf)
{
s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0];
int i;
for (i = 0; i < BPF_DISPATCHER_MAX; i++) {
if (d->progs[i].prog)
*ipsp++ = (s64)(uintptr_t)d->progs[i].prog->bpf_func;
}
return arch_prepare_bpf_dispatcher(image, buf, &ips[0], d->num_progs);
}
static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
{
void *new, *tmp;
u32 noff = 0;
if (prev_num_progs)
noff = d->image_off ^ (PAGE_SIZE / 2);
new = d->num_progs ? d->image + noff : NULL;
tmp = d->num_progs ? d->rw_image + noff : NULL;
if (new) {
/* Prepare the dispatcher in d->rw_image. Then use
* bpf_arch_text_copy to update d->image, which is RO+X.
*/
if (bpf_dispatcher_prepare(d, new, tmp))
return;
if (IS_ERR(bpf_arch_text_copy(new, tmp, PAGE_SIZE / 2)))
return;
}
__BPF_DISPATCHER_UPDATE(d, new ?: (void *)&bpf_dispatcher_nop_func);
/* Make sure all the callers executing the previous/old half of the
* image leave it, so following update call can modify it safely.
*/
synchronize_rcu();
if (new)
d->image_off = noff;
}
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
struct bpf_prog *to)
{
bool changed = false;
int prev_num_progs;
if (from == to)
return;
mutex_lock(&d->mutex);
if (!d->image) {
d->image = bpf_prog_pack_alloc(PAGE_SIZE, bpf_jit_fill_hole_with_zero);
if (!d->image)
goto out;
d->rw_image = bpf_jit_alloc_exec(PAGE_SIZE);
if (!d->rw_image) {
u32 size = PAGE_SIZE;
bpf_arch_text_copy(d->image, &size, sizeof(size));
bpf_prog_pack_free((struct bpf_binary_header *)d->image);
d->image = NULL;
goto out;
}
bpf_image_ksym_add(d->image, &d->ksym);
}
prev_num_progs = d->num_progs;
changed |= bpf_dispatcher_remove_prog(d, from);
changed |= bpf_dispatcher_add_prog(d, to);
if (!changed)
goto out;
bpf_dispatcher_update(d, prev_num_progs);
out:
mutex_unlock(&d->mutex);
}