selftsets/bpf: Retry map update for non-preallocated per-cpu map

BPF CI failed due to map_percpu_stats_percpu_hash from time to time [1].
It seems that the failure reason is per-cpu bpf memory allocator may not
be able to allocate per-cpu pointer successfully and it can not refill
free llist timely, and bpf_map_update_elem() will return -ENOMEM.

So mitigate the problem by retrying the update operation for
non-preallocated per-cpu map.

[1]: https://github.com/kernel-patches/bpf/actions/runs/6713177520/job/18244865326?pr=5909

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231101032455.3808547-4-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Hou Tao 2023-11-01 11:24:55 +08:00 committed by Alexei Starovoitov
parent b9b7955316
commit 2f553b032c

View file

@ -141,6 +141,7 @@ struct upsert_opts {
__u32 map_type;
int map_fd;
__u32 n;
bool retry_for_nomem;
};
static int create_small_hash(void)
@ -154,6 +155,11 @@ static int create_small_hash(void)
return map_fd;
}
static bool retry_for_nomem_fn(int err)
{
return err == ENOMEM;
}
static void *patch_map_thread(void *arg)
{
/* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */
@ -175,7 +181,12 @@ static void *patch_map_thread(void *arg)
val_ptr = &val;
}
ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0);
/* 2 seconds may be enough ? */
if (opts->retry_for_nomem)
ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0,
40, retry_for_nomem_fn);
else
ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0);
CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno));
if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
@ -296,6 +307,13 @@ static void __test(int map_fd)
else
opts.n /= 2;
/* per-cpu bpf memory allocator may not be able to allocate per-cpu
* pointer successfully and it can not refill free llist timely, and
* bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate
* the problem temporarily.
*/
opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC);
/*
* Upsert keys [0, n) under some competition: with random values from
* N_THREADS threads. Check values, then delete all elements and check