linux/kernel/bpf/map_in_map.c
Alexei Starovoitov 68134668c1 bpf: Add map side support for bpf timers.
Restrict bpf timers to array, hash (both preallocated and kmalloced), and
lru map types. The per-cpu maps with timers don't make sense, since 'struct
bpf_timer' is a part of map value. bpf timers in per-cpu maps would mean that
the number of timers depends on number of possible cpus and timers would not be
accessible from all cpus. lpm map support can be added in the future.
The timers in inner maps are supported.

The bpf_map_update/delete_elem() helpers and sys_bpf commands cancel and free
bpf_timer in a given map element.

Similar to 'struct bpf_spin_lock' BTF is required and it is used to validate
that map element indeed contains 'struct bpf_timer'.

Make check_and_init_map_value() init both bpf_spin_lock and bpf_timer when
map element data is reused in preallocated htab and lru maps.

Teach copy_map_value() to support both bpf_spin_lock and bpf_timer in a single
map element. There could be one of each, but not more than one. Due to 'one
bpf_timer in one element' restriction do not support timers in global data,
since global data is a map of single element, but from bpf program side it's
seen as many global variables and restriction of single global timer would be
odd. The sys_bpf map_freeze and sys_mmap syscalls are not allowed on maps with
timers, since user space could have corrupted mmap element and crashed the
kernel. The maps with timers cannot be readonly. Due to these restrictions
search for bpf_timer in datasec BTF in case it was placed in the global data to
report clear error.

The previous patch allowed 'struct bpf_timer' as a first field in a map
element only. Relax this restriction.

Refactor lru map to s/bpf_lru_push_free/htab_lru_push_free/ to cancel and free
the timer when lru map deletes an element as a part of it eviction algorithm.

Make sure that bpf program cannot access 'struct bpf_timer' via direct load/store.
The timer operation are done through helpers only.
This is similar to 'struct bpf_spin_lock'.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210715005417.78572-5-alexei.starovoitov@gmail.com
2021-07-15 22:31:10 +02:00

117 lines
2.8 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2017 Facebook
*/
#include <linux/slab.h>
#include <linux/bpf.h>
#include "map_in_map.h"
struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
{
struct bpf_map *inner_map, *inner_map_meta;
u32 inner_map_meta_size;
struct fd f;
f = fdget(inner_map_ufd);
inner_map = __bpf_map_get(f);
if (IS_ERR(inner_map))
return inner_map;
/* Does not support >1 level map-in-map */
if (inner_map->inner_map_meta) {
fdput(f);
return ERR_PTR(-EINVAL);
}
if (!inner_map->ops->map_meta_equal) {
fdput(f);
return ERR_PTR(-ENOTSUPP);
}
if (map_value_has_spin_lock(inner_map)) {
fdput(f);
return ERR_PTR(-ENOTSUPP);
}
inner_map_meta_size = sizeof(*inner_map_meta);
/* In some cases verifier needs to access beyond just base map. */
if (inner_map->ops == &array_map_ops)
inner_map_meta_size = sizeof(struct bpf_array);
inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER);
if (!inner_map_meta) {
fdput(f);
return ERR_PTR(-ENOMEM);
}
inner_map_meta->map_type = inner_map->map_type;
inner_map_meta->key_size = inner_map->key_size;
inner_map_meta->value_size = inner_map->value_size;
inner_map_meta->map_flags = inner_map->map_flags;
inner_map_meta->max_entries = inner_map->max_entries;
inner_map_meta->spin_lock_off = inner_map->spin_lock_off;
inner_map_meta->timer_off = inner_map->timer_off;
/* Misc members not needed in bpf_map_meta_equal() check. */
inner_map_meta->ops = inner_map->ops;
if (inner_map->ops == &array_map_ops) {
inner_map_meta->bypass_spec_v1 = inner_map->bypass_spec_v1;
container_of(inner_map_meta, struct bpf_array, map)->index_mask =
container_of(inner_map, struct bpf_array, map)->index_mask;
}
fdput(f);
return inner_map_meta;
}
void bpf_map_meta_free(struct bpf_map *map_meta)
{
kfree(map_meta);
}
bool bpf_map_meta_equal(const struct bpf_map *meta0,
const struct bpf_map *meta1)
{
/* No need to compare ops because it is covered by map_type */
return meta0->map_type == meta1->map_type &&
meta0->key_size == meta1->key_size &&
meta0->value_size == meta1->value_size &&
meta0->timer_off == meta1->timer_off &&
meta0->map_flags == meta1->map_flags;
}
void *bpf_map_fd_get_ptr(struct bpf_map *map,
struct file *map_file /* not used */,
int ufd)
{
struct bpf_map *inner_map, *inner_map_meta;
struct fd f;
f = fdget(ufd);
inner_map = __bpf_map_get(f);
if (IS_ERR(inner_map))
return inner_map;
inner_map_meta = map->inner_map_meta;
if (inner_map_meta->ops->map_meta_equal(inner_map_meta, inner_map))
bpf_map_inc(inner_map);
else
inner_map = ERR_PTR(-EINVAL);
fdput(f);
return inner_map;
}
void bpf_map_fd_put_ptr(void *ptr)
{
/* ptr->ops->map_free() has to go through one
* rcu grace period by itself.
*/
bpf_map_put(ptr);
}
u32 bpf_map_fd_sys_lookup_elem(void *ptr)
{
return ((struct bpf_map *)ptr)->id;
}