Merge branch 'Sleepable local storage'

KP Singh says:

====================

Local storage is currently unusable in sleepable helpers. One of the
important use cases of local_storage is to attach security (or
performance) contextual information to kernel objects in LSM / tracing
programs to be used later in the life-cyle of the object.

Sometimes this context can only be gathered from sleepable programs
(because it needs accesing __user pointers or helpers like
bpf_ima_inode_hash). Allowing local storage to be used from sleepable
programs allows such context to be managed with the benefits of
local_storage.

# v2 -> v3

* Fixed some RCU issues pointed by Martin
* Added Martin's ack

# v1 -> v2

* Generalize RCU checks (will send a separate patch for updating
  non local storage code where this can be used).
* Add missing RCU lock checks from v1
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2021-12-29 17:54:41 -08:00
commit 1705c62e30
8 changed files with 73 additions and 49 deletions

View file

@ -17,6 +17,9 @@
#define BPF_LOCAL_STORAGE_CACHE_SIZE 16
#define bpf_rcu_lock_held() \
(rcu_read_lock_held() || rcu_read_lock_trace_held() || \
rcu_read_lock_bh_held())
struct bpf_local_storage_map_bucket {
struct hlist_head list;
raw_spinlock_t lock;
@ -162,4 +165,6 @@ struct bpf_local_storage_data *
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
void *value, u64 map_flags);
void bpf_local_storage_free_rcu(struct rcu_head *rcu);
#endif /* _BPF_LOCAL_STORAGE_H */

View file

@ -17,6 +17,7 @@
#include <linux/bpf_lsm.h>
#include <linux/btf_ids.h>
#include <linux/fdtable.h>
#include <linux/rcupdate_trace.h>
DEFINE_BPF_STORAGE_CACHE(inode_cache);
@ -44,7 +45,8 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode,
if (!bsb)
return NULL;
inode_storage = rcu_dereference(bsb->storage);
inode_storage =
rcu_dereference_check(bsb->storage, bpf_rcu_lock_held());
if (!inode_storage)
return NULL;
@ -172,6 +174,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
{
struct bpf_local_storage_data *sdata;
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
return (unsigned long)NULL;
@ -204,6 +207,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
BPF_CALL_2(bpf_inode_storage_delete,
struct bpf_map *, map, struct inode *, inode)
{
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (!inode)
return -EINVAL;

View file

@ -11,6 +11,9 @@
#include <net/sock.h>
#include <uapi/linux/sock_diag.h>
#include <uapi/linux/btf.h>
#include <linux/rcupdate.h>
#include <linux/rcupdate_trace.h>
#include <linux/rcupdate_wait.h>
#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE)
@ -81,6 +84,22 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
return NULL;
}
void bpf_local_storage_free_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage *local_storage;
local_storage = container_of(rcu, struct bpf_local_storage, rcu);
kfree_rcu(local_storage, rcu);
}
static void bpf_selem_free_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage_elem *selem;
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
kfree_rcu(selem, rcu);
}
/* local_storage->lock must be held and selem->local_storage == local_storage.
* The caller must ensure selem->smap is still valid to be
* dereferenced for its smap->elem_size and smap->cache_idx.
@ -93,7 +112,7 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
bool free_local_storage;
void *owner;
smap = rcu_dereference(SDATA(selem)->smap);
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
owner = local_storage->owner;
/* All uncharging on the owner must be done first.
@ -118,12 +137,12 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
*
* Although the unlock will be done under
* rcu_read_lock(), it is more intutivie to
* read if kfree_rcu(local_storage, rcu) is done
* read if the freeing of the storage is done
* after the raw_spin_unlock_bh(&local_storage->lock).
*
* Hence, a "bool free_local_storage" is returned
* to the caller which then calls the kfree_rcu()
* after unlock.
* to the caller which then calls then frees the storage after
* all the RCU grace periods have expired.
*/
}
hlist_del_init_rcu(&selem->snode);
@ -131,8 +150,7 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
SDATA(selem))
RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
kfree_rcu(selem, rcu);
call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu);
return free_local_storage;
}
@ -146,7 +164,8 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
/* selem has already been unlinked from sk */
return;
local_storage = rcu_dereference(selem->local_storage);
local_storage = rcu_dereference_check(selem->local_storage,
bpf_rcu_lock_held());
raw_spin_lock_irqsave(&local_storage->lock, flags);
if (likely(selem_linked_to_storage(selem)))
free_local_storage = bpf_selem_unlink_storage_nolock(
@ -154,7 +173,8 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
if (free_local_storage)
kfree_rcu(local_storage, rcu);
call_rcu_tasks_trace(&local_storage->rcu,
bpf_local_storage_free_rcu);
}
void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
@ -174,7 +194,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
/* selem has already be unlinked from smap */
return;
smap = rcu_dereference(SDATA(selem)->smap);
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
b = select_bucket(smap, selem);
raw_spin_lock_irqsave(&b->lock, flags);
if (likely(selem_linked_to_map(selem)))
@ -213,12 +233,14 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem;
/* Fast path (cache hit) */
sdata = rcu_dereference(local_storage->cache[smap->cache_idx]);
sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx],
bpf_rcu_lock_held());
if (sdata && rcu_access_pointer(sdata->smap) == smap)
return sdata;
/* Slow path (cache miss) */
hlist_for_each_entry_rcu(selem, &local_storage->list, snode)
hlist_for_each_entry_rcu(selem, &local_storage->list, snode,
rcu_read_lock_trace_held())
if (rcu_access_pointer(SDATA(selem)->smap) == smap)
break;
@ -306,7 +328,8 @@ int bpf_local_storage_alloc(void *owner,
* bucket->list, first_selem can be freed immediately
* (instead of kfree_rcu) because
* bpf_local_storage_map_free() does a
* synchronize_rcu() before walking the bucket->list.
* synchronize_rcu_mult (waiting for both sleepable and
* normal programs) before walking the bucket->list.
* Hence, no one is accessing selem from the
* bucket->list under rcu_read_lock().
*/
@ -342,7 +365,8 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
!map_value_has_spin_lock(&smap->map)))
return ERR_PTR(-EINVAL);
local_storage = rcu_dereference(*owner_storage(smap, owner));
local_storage = rcu_dereference_check(*owner_storage(smap, owner),
bpf_rcu_lock_held());
if (!local_storage || hlist_empty(&local_storage->list)) {
/* Very first elem for the owner */
err = check_flags(NULL, map_flags);

View file

@ -17,6 +17,7 @@
#include <uapi/linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/fdtable.h>
#include <linux/rcupdate_trace.h>
DEFINE_BPF_STORAGE_CACHE(task_cache);
@ -59,7 +60,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map,
struct bpf_local_storage *task_storage;
struct bpf_local_storage_map *smap;
task_storage = rcu_dereference(task->bpf_storage);
task_storage =
rcu_dereference_check(task->bpf_storage, bpf_rcu_lock_held());
if (!task_storage)
return NULL;
@ -229,6 +231,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
{
struct bpf_local_storage_data *sdata;
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
return (unsigned long)NULL;
@ -260,6 +263,7 @@ BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
{
int ret;
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (!task)
return -EINVAL;

View file

@ -11874,6 +11874,9 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
}
break;
case BPF_MAP_TYPE_RINGBUF:
case BPF_MAP_TYPE_INODE_STORAGE:
case BPF_MAP_TYPE_SK_STORAGE:
case BPF_MAP_TYPE_TASK_STORAGE:
break;
default:
verbose(env,

View file

@ -13,6 +13,7 @@
#include <net/sock.h>
#include <uapi/linux/sock_diag.h>
#include <uapi/linux/btf.h>
#include <linux/rcupdate_trace.h>
DEFINE_BPF_STORAGE_CACHE(sk_cache);
@ -22,7 +23,8 @@ bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit)
struct bpf_local_storage *sk_storage;
struct bpf_local_storage_map *smap;
sk_storage = rcu_dereference(sk->sk_bpf_storage);
sk_storage =
rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held());
if (!sk_storage)
return NULL;
@ -258,6 +260,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
{
struct bpf_local_storage_data *sdata;
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE)
return (unsigned long)NULL;
@ -288,6 +291,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
{
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (!sk || !sk_fullsock(sk))
return -EINVAL;
@ -416,6 +420,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags)
{
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (in_hardirq() || in_nmi())
return (unsigned long)NULL;
@ -425,6 +430,7 @@ BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
struct sock *, sk)
{
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (in_hardirq() || in_nmi())
return -EPERM;

View file

@ -28,10 +28,6 @@ static unsigned int duration;
struct storage {
void *inode;
unsigned int value;
/* Lock ensures that spin locked versions of local stoage operations
* also work, most operations in this tests are still single threaded
*/
struct bpf_spin_lock lock;
};
/* Fork and exec the provided rm binary and return the exit code of the
@ -66,27 +62,24 @@ static int run_self_unlink(int *monitored_pid, const char *rm_path)
static bool check_syscall_operations(int map_fd, int obj_fd)
{
struct storage val = { .value = TEST_STORAGE_VALUE, .lock = { 0 } },
lookup_val = { .value = 0, .lock = { 0 } };
struct storage val = { .value = TEST_STORAGE_VALUE },
lookup_val = { .value = 0 };
int err;
/* Looking up an existing element should fail initially */
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
BPF_F_LOCK);
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
"err:%d errno:%d\n", err, errno))
return false;
/* Create a new element */
err = bpf_map_update_elem(map_fd, &obj_fd, &val,
BPF_NOEXIST | BPF_F_LOCK);
err = bpf_map_update_elem(map_fd, &obj_fd, &val, BPF_NOEXIST);
if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err,
errno))
return false;
/* Lookup the newly created element */
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
BPF_F_LOCK);
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err,
errno))
return false;
@ -102,8 +95,7 @@ static bool check_syscall_operations(int map_fd, int obj_fd)
return false;
/* The lookup should fail, now that the element has been deleted */
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
BPF_F_LOCK);
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
"err:%d errno:%d\n", err, errno))
return false;

View file

@ -20,7 +20,6 @@ int sk_storage_result = -1;
struct local_storage {
struct inode *exec_inode;
__u32 value;
struct bpf_spin_lock lock;
};
struct {
@ -58,9 +57,7 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
bpf_get_current_task_btf(), 0, 0);
if (storage) {
/* Don't let an executable delete itself */
bpf_spin_lock(&storage->lock);
is_self_unlink = storage->exec_inode == victim->d_inode;
bpf_spin_unlock(&storage->lock);
if (is_self_unlink)
return -EPERM;
}
@ -68,7 +65,7 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
return 0;
}
SEC("lsm/inode_rename")
SEC("lsm.s/inode_rename")
int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
@ -89,10 +86,8 @@ int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
if (!storage)
return 0;
bpf_spin_lock(&storage->lock);
if (storage->value != DUMMY_STORAGE_VALUE)
inode_storage_result = -1;
bpf_spin_unlock(&storage->lock);
err = bpf_inode_storage_delete(&inode_storage_map, old_dentry->d_inode);
if (!err)
@ -101,7 +96,7 @@ int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
return 0;
}
SEC("lsm/socket_bind")
SEC("lsm.s/socket_bind")
int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
int addrlen)
{
@ -117,10 +112,8 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
if (!storage)
return 0;
bpf_spin_lock(&storage->lock);
if (storage->value != DUMMY_STORAGE_VALUE)
sk_storage_result = -1;
bpf_spin_unlock(&storage->lock);
err = bpf_sk_storage_delete(&sk_storage_map, sock->sk);
if (!err)
@ -129,7 +122,7 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
return 0;
}
SEC("lsm/socket_post_create")
SEC("lsm.s/socket_post_create")
int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
int protocol, int kern)
{
@ -144,9 +137,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
if (!storage)
return 0;
bpf_spin_lock(&storage->lock);
storage->value = DUMMY_STORAGE_VALUE;
bpf_spin_unlock(&storage->lock);
return 0;
}
@ -154,7 +145,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
/* This uses the local storage to remember the inode of the binary that a
* process was originally executing.
*/
SEC("lsm/bprm_committed_creds")
SEC("lsm.s/bprm_committed_creds")
void BPF_PROG(exec, struct linux_binprm *bprm)
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
@ -166,18 +157,13 @@ void BPF_PROG(exec, struct linux_binprm *bprm)
storage = bpf_task_storage_get(&task_storage_map,
bpf_get_current_task_btf(), 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (storage) {
bpf_spin_lock(&storage->lock);
if (storage)
storage->exec_inode = bprm->file->f_inode;
bpf_spin_unlock(&storage->lock);
}
storage = bpf_inode_storage_get(&inode_storage_map, bprm->file->f_inode,
0, BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!storage)
return;
bpf_spin_lock(&storage->lock);
storage->value = DUMMY_STORAGE_VALUE;
bpf_spin_unlock(&storage->lock);
}