Merge branch 'fix-bpf-multi-uprobe-pid-filtering-logic'

Andrii Nakryiko says:

====================
Fix BPF multi-uprobe PID filtering logic

It turns out that current implementation of multi-uprobe PID filtering logic
is broken. It filters by thread, while the promise is filtering by process.
Patch #1 fixes the logic trivially. The rest is testing and mitigations that
are necessary for libbpf to not break users of USDT programs.

v1->v2:
  - fix selftest in last patch (CI);
  - use semicolon in patch #3 (Jiri).
====================

Link: https://lore.kernel.org/r/20240521163401.3005045-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2024-05-25 10:46:03 -07:00
commit 590016ad83
4 changed files with 206 additions and 19 deletions

View file

@ -3295,7 +3295,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe,
struct bpf_run_ctx *old_run_ctx;
int err = 0;
if (link->task && current != link->task)
if (link->task && current->mm != link->task->mm)
return 0;
if (sleepable)
@ -3396,8 +3396,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path);
uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets);
cnt = attr->link_create.uprobe_multi.cnt;
pid = attr->link_create.uprobe_multi.pid;
if (!upath || !uoffsets || !cnt)
if (!upath || !uoffsets || !cnt || pid < 0)
return -EINVAL;
if (cnt > MAX_UPROBE_MULTI_CNT)
return -E2BIG;
@ -3421,11 +3422,8 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
goto error_path_put;
}
pid = attr->link_create.uprobe_multi.pid;
if (pid) {
rcu_read_lock();
task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
rcu_read_unlock();
task = get_pid_task(find_vpid(pid), PIDTYPE_TGID);
if (!task) {
err = -ESRCH;
goto error_path_put;

View file

@ -392,11 +392,40 @@ static int probe_uprobe_multi_link(int token_fd)
link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
err = -errno; /* close() can clobber errno */
if (link_fd >= 0 || err != -EBADF) {
close(link_fd);
close(prog_fd);
return 0;
}
/* Initial multi-uprobe support in kernel didn't handle PID filtering
* correctly (it was doing thread filtering, not process filtering).
* So now we'll detect if PID filtering logic was fixed, and, if not,
* we'll pretend multi-uprobes are not supported, if not.
* Multi-uprobes are used in USDT attachment logic, and we need to be
* conservative here, because multi-uprobe selection happens early at
* load time, while the use of PID filtering is known late at
* attachment time, at which point it's too late to undo multi-uprobe
* selection.
*
* Creating uprobe with pid == -1 for (invalid) '/' binary will fail
* early with -EINVAL on kernels with fixed PID filtering logic;
* otherwise -ESRCH would be returned if passed correct binary path
* (but we'll just get -BADF, of course).
*/
link_opts.uprobe_multi.pid = -1; /* invalid PID */
link_opts.uprobe_multi.path = "/"; /* invalid path */
link_opts.uprobe_multi.offsets = &offset;
link_opts.uprobe_multi.cnt = 1;
link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
err = -errno; /* close() can clobber errno */
if (link_fd >= 0)
close(link_fd);
close(prog_fd);
return link_fd < 0 && err == -EBADF;
return link_fd < 0 && err == -EINVAL;
}
static int probe_kern_bpf_cookie(int token_fd)

View file

@ -1,12 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#include <unistd.h>
#include <pthread.h>
#include <test_progs.h>
#include "uprobe_multi.skel.h"
#include "uprobe_multi_bench.skel.h"
#include "uprobe_multi_usdt.skel.h"
#include "bpf/libbpf_internal.h"
#include "testing_helpers.h"
#include "../sdt.h"
static char test_data[] = "test_data";
@ -25,9 +27,17 @@ noinline void uprobe_multi_func_3(void)
asm volatile ("");
}
noinline void usdt_trigger(void)
{
STAP_PROBE(test, pid_filter_usdt);
}
struct child {
int go[2];
int c2p[2]; /* child -> parent channel */
int pid;
int tid;
pthread_t thread;
};
static void release_child(struct child *child)
@ -38,6 +48,10 @@ static void release_child(struct child *child)
return;
close(child->go[1]);
close(child->go[0]);
if (child->thread)
pthread_join(child->thread, NULL);
close(child->c2p[0]);
close(child->c2p[1]);
if (child->pid > 0)
waitpid(child->pid, &child_status, 0);
}
@ -63,7 +77,7 @@ static struct child *spawn_child(void)
if (pipe(child.go))
return NULL;
child.pid = fork();
child.pid = child.tid = fork();
if (child.pid < 0) {
release_child(&child);
errno = EINVAL;
@ -82,6 +96,7 @@ static struct child *spawn_child(void)
uprobe_multi_func_1();
uprobe_multi_func_2();
uprobe_multi_func_3();
usdt_trigger();
exit(errno);
}
@ -89,6 +104,67 @@ static struct child *spawn_child(void)
return &child;
}
static void *child_thread(void *ctx)
{
struct child *child = ctx;
int c = 0, err;
child->tid = syscall(SYS_gettid);
/* let parent know we are ready */
err = write(child->c2p[1], &c, 1);
if (err != 1)
pthread_exit(&err);
/* wait for parent's kick */
err = read(child->go[0], &c, 1);
if (err != 1)
pthread_exit(&err);
uprobe_multi_func_1();
uprobe_multi_func_2();
uprobe_multi_func_3();
usdt_trigger();
err = 0;
pthread_exit(&err);
}
static struct child *spawn_thread(void)
{
static struct child child;
int c, err;
/* pipe to notify child to execute the trigger functions */
if (pipe(child.go))
return NULL;
/* pipe to notify parent that child thread is ready */
if (pipe(child.c2p)) {
close(child.go[0]);
close(child.go[1]);
return NULL;
}
child.pid = getpid();
err = pthread_create(&child.thread, NULL, child_thread, &child);
if (err) {
err = -errno;
close(child.go[0]);
close(child.go[1]);
close(child.c2p[0]);
close(child.c2p[1]);
errno = -err;
return NULL;
}
err = read(child.c2p[0], &c, 1);
if (!ASSERT_EQ(err, 1, "child_thread_ready"))
return NULL;
return &child;
}
static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child)
{
skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1;
@ -103,15 +179,23 @@ static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child
* passed at the probe attach.
*/
skel->bss->pid = child ? 0 : getpid();
skel->bss->expect_pid = child ? child->pid : 0;
/* trigger all probes, if we are testing child *process*, just to make
* sure that PID filtering doesn't let through activations from wrong
* PIDs; when we test child *thread*, we don't want to do this to
* avoid double counting number of triggering events
*/
if (!child || !child->thread) {
uprobe_multi_func_1();
uprobe_multi_func_2();
uprobe_multi_func_3();
usdt_trigger();
}
if (child)
kick_child(child);
/* trigger all probes */
uprobe_multi_func_1();
uprobe_multi_func_2();
uprobe_multi_func_3();
/*
* There are 2 entry and 2 exit probe called for each uprobe_multi_func_[123]
* function and each slepable probe (6) increments uprobe_multi_sleep_result.
@ -126,8 +210,12 @@ static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child
ASSERT_EQ(skel->bss->uprobe_multi_sleep_result, 6, "uprobe_multi_sleep_result");
if (child)
ASSERT_FALSE(skel->bss->bad_pid_seen, "bad_pid_seen");
if (child) {
ASSERT_EQ(skel->bss->child_pid, child->pid, "uprobe_multi_child_pid");
ASSERT_EQ(skel->bss->child_tid, child->tid, "uprobe_multi_child_tid");
}
}
static void test_skel_api(void)
@ -190,8 +278,24 @@ __test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_mul
if (!ASSERT_OK_PTR(skel->links.uprobe_extra, "bpf_program__attach_uprobe_multi"))
goto cleanup;
/* Attach (uprobe-backed) USDTs */
skel->links.usdt_pid = bpf_program__attach_usdt(skel->progs.usdt_pid, pid, binary,
"test", "pid_filter_usdt", NULL);
if (!ASSERT_OK_PTR(skel->links.usdt_pid, "attach_usdt_pid"))
goto cleanup;
skel->links.usdt_extra = bpf_program__attach_usdt(skel->progs.usdt_extra, -1, binary,
"test", "pid_filter_usdt", NULL);
if (!ASSERT_OK_PTR(skel->links.usdt_extra, "attach_usdt_extra"))
goto cleanup;
uprobe_multi_test_run(skel, child);
ASSERT_FALSE(skel->bss->bad_pid_seen_usdt, "bad_pid_seen_usdt");
if (child) {
ASSERT_EQ(skel->bss->child_pid_usdt, child->pid, "usdt_multi_child_pid");
ASSERT_EQ(skel->bss->child_tid_usdt, child->tid, "usdt_multi_child_tid");
}
cleanup:
uprobe_multi__destroy(skel);
}
@ -210,6 +314,13 @@ test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi
return;
__test_attach_api(binary, pattern, opts, child);
/* pid filter (thread) */
child = spawn_thread();
if (!ASSERT_OK_PTR(child, "spawn_thread"))
return;
__test_attach_api(binary, pattern, opts, child);
}
static void test_attach_api_pattern(void)
@ -397,7 +508,7 @@ static void test_attach_api_fails(void)
link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
if (!ASSERT_ERR(link_fd, "link_fd"))
goto cleanup;
ASSERT_EQ(link_fd, -ESRCH, "pid_is_wrong");
ASSERT_EQ(link_fd, -EINVAL, "pid_is_wrong");
cleanup:
if (link_fd >= 0)
@ -495,6 +606,13 @@ static void test_link_api(void)
return;
__test_link_api(child);
/* pid filter (thread) */
child = spawn_thread();
if (!ASSERT_OK_PTR(child, "spawn_thread"))
return;
__test_link_api(child);
}
static void test_bench_attach_uprobe(void)

View file

@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <stdbool.h>
#include <bpf/usdt.bpf.h>
char _license[] SEC("license") = "GPL";
@ -22,6 +22,13 @@ __u64 uprobe_multi_sleep_result = 0;
int pid = 0;
int child_pid = 0;
int child_tid = 0;
int child_pid_usdt = 0;
int child_tid_usdt = 0;
int expect_pid = 0;
bool bad_pid_seen = false;
bool bad_pid_seen_usdt = false;
bool test_cookie = false;
void *user_ptr = 0;
@ -36,11 +43,19 @@ static __always_inline bool verify_sleepable_user_copy(void)
static void uprobe_multi_check(void *ctx, bool is_return, bool is_sleep)
{
child_pid = bpf_get_current_pid_tgid() >> 32;
__u64 cur_pid_tgid = bpf_get_current_pid_tgid();
__u32 cur_pid;
if (pid && child_pid != pid)
cur_pid = cur_pid_tgid >> 32;
if (pid && cur_pid != pid)
return;
if (expect_pid && cur_pid != expect_pid)
bad_pid_seen = true;
child_pid = cur_pid_tgid >> 32;
child_tid = (__u32)cur_pid_tgid;
__u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0;
__u64 addr = bpf_get_func_ip(ctx);
@ -97,5 +112,32 @@ int uretprobe_sleep(struct pt_regs *ctx)
SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*")
int uprobe_extra(struct pt_regs *ctx)
{
/* we need this one just to mix PID-filtered and global uprobes */
return 0;
}
SEC("usdt")
int usdt_pid(struct pt_regs *ctx)
{
__u64 cur_pid_tgid = bpf_get_current_pid_tgid();
__u32 cur_pid;
cur_pid = cur_pid_tgid >> 32;
if (pid && cur_pid != pid)
return 0;
if (expect_pid && cur_pid != expect_pid)
bad_pid_seen_usdt = true;
child_pid_usdt = cur_pid_tgid >> 32;
child_tid_usdt = (__u32)cur_pid_tgid;
return 0;
}
SEC("usdt")
int usdt_extra(struct pt_regs *ctx)
{
/* we need this one just to mix PID-filtered and global USDT probes */
return 0;
}