bpf-for-netdev

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQTFp0I1jqZrAX+hPRXbK58LschIgwUCZmAYPgAKCRDbK58LschI
 g2XdAP9M8zYLRw4IG8DUFug7F+oqRPqgbs+Gvsf9YNl5/PSiTQEA6WKa/ObaG/W9
 vre9VxhMWKgcMfzqZyztNHAiDm8R+QI=
 =l7gV
 -----END PGP SIGNATURE-----

Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2024-06-05

We've added 8 non-merge commits during the last 6 day(s) which contain
a total of 9 files changed, 34 insertions(+), 35 deletions(-).

The main changes are:

1) Fix a potential use-after-free in bpf_link_free when the link uses
   dealloc_deferred to free the link object but later still tests for
   presence of link->ops->dealloc, from Cong Wang.

2) Fix BPF test infra to set the run context for rawtp test_run callback
   where syzbot reported a crash, from Jiri Olsa.

3) Fix bpf_session_cookie BTF_ID in the special_kfunc_set list to exclude
   it for the case of !CONFIG_FPROBE, also from Jiri Olsa.

4) Fix a Coverity static analysis report to not close() a link_fd of -1
   in the multi-uprobe feature detector, from Andrii Nakryiko.

5) Revert support for redirect to any xsk socket bound to the same umem
   as it can result in corrupted ring state which can lead to a crash when
   flushing rings. A different approach will be pursued for bpf-next to
   address it safely, from Magnus Karlsson.

6) Fix inet_csk_accept prototype in test_sk_storage_tracing.c which caused
   BPF CI failure after the last tree fast forwarding, from Andrii Nakryiko.

7) Fix a coccicheck warning in BPF devmap that iterator variable cannot
   be NULL, from Thorsten Blum.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  Revert "xsk: Document ability to redirect to any socket bound to the same umem"
  Revert "xsk: Support redirect to any socket bound to the same umem"
  bpf: Set run context for rawtp test_run callback
  bpf: Fix a potential use-after-free in bpf_link_free()
  bpf, devmap: Remove unnecessary if check in for loop
  libbpf: don't close(-1) in multi-uprobe feature detector
  bpf: Fix bpf_session_cookie BTF_ID in special_kfunc_set list
  selftests/bpf: fix inet_csk_accept prototype in test_sk_storage_tracing.c
====================

Link: https://lore.kernel.org/r/20240605091525.22628-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2024-06-05 19:03:07 -07:00
commit 886bf9172d
9 changed files with 33 additions and 34 deletions

View file

@ -329,24 +329,23 @@ XDP_SHARED_UMEM option and provide the initial socket's fd in the
sxdp_shared_umem_fd field as you registered the UMEM on that
socket. These two sockets will now share one and the same UMEM.
In this case, it is possible to use the NIC's packet steering
capabilities to steer the packets to the right queue. This is not
possible in the previous example as there is only one queue shared
among sockets, so the NIC cannot do this steering as it can only steer
between queues.
There is no need to supply an XDP program like the one in the previous
case where sockets were bound to the same queue id and
device. Instead, use the NIC's packet steering capabilities to steer
the packets to the right queue. In the previous example, there is only
one queue shared among sockets, so the NIC cannot do this steering. It
can only steer between queues.
In libxdp (or libbpf prior to version 1.0), you need to use the
xsk_socket__create_shared() API as it takes a reference to a FILL ring
and a COMPLETION ring that will be created for you and bound to the
shared UMEM. You can use this function for all the sockets you create,
or you can use it for the second and following ones and use
xsk_socket__create() for the first one. Both methods yield the same
result.
In libbpf, you need to use the xsk_socket__create_shared() API as it
takes a reference to a FILL ring and a COMPLETION ring that will be
created for you and bound to the shared UMEM. You can use this
function for all the sockets you create, or you can use it for the
second and following ones and use xsk_socket__create() for the first
one. Both methods yield the same result.
Note that a UMEM can be shared between sockets on the same queue id
and device, as well as between queues on the same device and between
devices at the same time. It is also possible to redirect to any
socket as long as it is bound to the same umem with XDP_SHARED_UMEM.
devices at the same time.
XDP_USE_NEED_WAKEUP bind flag
-----------------------------
@ -823,10 +822,6 @@ A: The short answer is no, that is not supported at the moment. The
switch, or other distribution mechanism, in your NIC to direct
traffic to the correct queue id and socket.
Note that if you are using the XDP_SHARED_UMEM option, it is
possible to switch traffic between any socket bound to the same
umem.
Q: My packets are sometimes corrupted. What is wrong?
A: Care has to be taken not to feed the same buffer in the UMEM into

View file

@ -760,9 +760,6 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
for (i = 0; i < dtab->n_buckets; i++) {
head = dev_map_index_hash(dtab, i);
hlist_for_each_entry_safe(dst, next, head, index_hlist) {
if (!dst)
continue;
if (is_ifindex_excluded(excluded_devices, num_excluded,
dst->dev->ifindex))
continue;

View file

@ -2998,6 +2998,7 @@ static int bpf_obj_get(const union bpf_attr *attr)
void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
const struct bpf_link_ops *ops, struct bpf_prog *prog)
{
WARN_ON(ops->dealloc && ops->dealloc_deferred);
atomic64_set(&link->refcnt, 1);
link->type = type;
link->id = 0;
@ -3056,16 +3057,17 @@ static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
/* bpf_link_free is guaranteed to be called from process context */
static void bpf_link_free(struct bpf_link *link)
{
const struct bpf_link_ops *ops = link->ops;
bool sleepable = false;
bpf_link_free_id(link->id);
if (link->prog) {
sleepable = link->prog->sleepable;
/* detach BPF program, clean up used resources */
link->ops->release(link);
ops->release(link);
bpf_prog_put(link->prog);
}
if (link->ops->dealloc_deferred) {
if (ops->dealloc_deferred) {
/* schedule BPF link deallocation; if underlying BPF program
* is sleepable, we need to first wait for RCU tasks trace
* sync, then go through "classic" RCU grace period
@ -3074,9 +3076,8 @@ static void bpf_link_free(struct bpf_link *link)
call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
else
call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
}
if (link->ops->dealloc)
link->ops->dealloc(link);
} else if (ops->dealloc)
ops->dealloc(link);
}
static void bpf_link_put_deferred(struct work_struct *work)

View file

@ -11128,7 +11128,11 @@ BTF_ID(func, bpf_iter_css_task_new)
#else
BTF_ID_UNUSED
#endif
#ifdef CONFIG_BPF_EVENTS
BTF_ID(func, bpf_session_cookie)
#else
BTF_ID_UNUSED
#endif
static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
{

View file

@ -3517,7 +3517,6 @@ static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
}
#endif /* CONFIG_UPROBES */
#ifdef CONFIG_FPROBE
__bpf_kfunc_start_defs();
__bpf_kfunc bool bpf_session_is_return(void)
@ -3566,4 +3565,3 @@ static int __init bpf_kprobe_multi_kfuncs_init(void)
}
late_initcall(bpf_kprobe_multi_kfuncs_init);
#endif

View file

@ -727,10 +727,16 @@ static void
__bpf_prog_test_run_raw_tp(void *data)
{
struct bpf_raw_tp_test_run_info *info = data;
struct bpf_trace_run_ctx run_ctx = {};
struct bpf_run_ctx *old_run_ctx;
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
rcu_read_lock();
info->retval = bpf_prog_run(info->prog, info->ctx);
rcu_read_unlock();
bpf_reset_run_ctx(old_run_ctx);
}
int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,

View file

@ -313,13 +313,10 @@ static bool xsk_is_bound(struct xdp_sock *xs)
static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
struct net_device *dev = xdp->rxq->dev;
u32 qid = xdp->rxq->queue_index;
if (!xsk_is_bound(xs))
return -ENXIO;
if (!dev->_rx[qid].pool || xs->umem != dev->_rx[qid].pool->umem)
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) {

View file

@ -393,7 +393,8 @@ static int probe_uprobe_multi_link(int token_fd)
err = -errno; /* close() can clobber errno */
if (link_fd >= 0 || err != -EBADF) {
close(link_fd);
if (link_fd >= 0)
close(link_fd);
close(prog_fd);
return 0;
}

View file

@ -84,7 +84,7 @@ int BPF_PROG(trace_tcp_connect, struct sock *sk)
}
SEC("fexit/inet_csk_accept")
int BPF_PROG(inet_csk_accept, struct sock *sk, int flags, int *err, bool kern,
int BPF_PROG(inet_csk_accept, struct sock *sk, struct proto_accept_arg *arg,
struct sock *accepted_sk)
{
set_task_info(accepted_sk);