mirror of
https://github.com/torvalds/linux
synced 2024-11-05 18:23:50 +00:00
Merge branch 'bpf-perf'
Kaixu Xia says: ==================== bpf: Introduce the new ability of eBPF programs to access hardware PMU counter This patchset is base on the net-next: git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git commit9dc20a6496
. Previous patch v6 url: https://lkml.org/lkml/2015/8/4/188 changes in V7: - rebase the whole patch set to net-next tree(9dc20a64
); - split out the core perf APIs into Patch 1/5; - change the return value of function perf_event_attrs() from struct perf_event * to const struct perf_event * in Patch 1/5; - rename the function perf_event_read_internal() to perf_event_ read_local() and rewrite it in Patch 1/5; - rename the function check_func_limit() to check_map_func _compatibility() and remove the unnecessary pass pointer to a pointer in Patch 4/5; changes in V6: - make the Patch 1/4 commit message more meaning and readable; - remove the unnecessary comment in Patch 2/4 and make it clean; - declare the function perf_event_release_kernel() in include/ linux/perf_event.h to fix the build error when CONFIG_PERF_EVENTS isn't configured in Patch 2/4; - add function perf_event_attrs() to get the struct perf_event_attr in Patch 2/4. - move the related code from kernel/trace/bpf_trace.c to kernel/ events/core.c and add function perf_event_read_internal() to avoid poking inside of the event outside of perf code in Patch 3/4; - generial the func & map match-pair with an array in Patch 3/4; changes in V5: - move struct fd_array_map_ops* fd_ops to bpf_map; - move array perf event decrement refcnt function to map_free; - fix the NULL ptr of perf_event_get(); - move bpf_perf_event_read() to kernel/bpf/bpf_trace.c; - get rid of the remaining struct bpf_prog; - move the unnecessay cast on void *; changes in V4: - make the bpf_prog_array_map more generic; - fix the bug of event refcnt leak; - use more useful errno in bpf_perf_event_read(); changes in V3: - collapse V2 patches 1-3 into one; - drop the function map->ops->map_traverse_elem() and release the struct perf_event in map_free; - only allow to access bpf_perf_event_read() from programs; - update the perf_event_array_map elem via xchg(); - pass index directly to bpf_perf_event_read() instead of MAP_KEY; changes in V2: - put atomic_long_inc_not_zero() between fdget() and fdput(); - limit the event type to PERF_TYPE_RAW and PERF_TYPE_HARDWARE; - Only read the event counter on current CPU or on current process; - add new map type BPF_MAP_TYPE_PERF_EVENT_ARRAY to store the pointer to the struct perf_event; - according to the perf_event_map_fd and key, the function bpf_perf_event_read() can get the Hardware PMU counter value; Patch 5/5 is a simple example and shows how to use this new eBPF programs ability. The PMU counter data can be found in /sys/kernel/debug/tracing/trace(trace_pipe).(the cycles PMU value when 'kprobe/sys_write' sampling) $ cat /sys/kernel/debug/tracing/trace_pipe $ ./tracex6 ... syslog-ng-548 [000] d..1 76.905673: : CPU-0 681765271 syslog-ng-548 [000] d..1 76.905690: : CPU-0 681787855 syslog-ng-548 [000] d..1 76.905707: : CPU-0 681810504 syslog-ng-548 [000] d..1 76.905725: : CPU-0 681834771 syslog-ng-548 [000] d..1 76.905745: : CPU-0 681859519 syslog-ng-548 [000] d..1 76.905766: : CPU-0 681890419 syslog-ng-548 [000] d..1 76.905783: : CPU-0 681914045 syslog-ng-548 [000] d..1 76.905800: : CPU-0 681935950 syslog-ng-548 [000] d..1 76.905816: : CPU-0 681958299 ls-690 [005] d..1 82.241308: : CPU-5 3138451 sh-691 [004] d..1 82.244570: : CPU-4 7324988 <...>-699 [007] d..1 99.961387: : CPU-7 3194027 <...>-695 [003] d..1 99.961474: : CPU-3 288901 <...>-695 [003] d..1 99.961541: : CPU-3 383145 <...>-695 [003] d..1 99.961591: : CPU-3 450365 <...>-695 [003] d..1 99.961639: : CPU-3 515751 <...>-695 [003] d..1 99.961686: : CPU-3 579047 ... The detail of patches is as follow: Patch 1/5 add the necessary core perf APIs perf_event_attrs(), perf_event_get(),perf_event_read_local() when accessing events counters in eBPF programs Patch 2/5 rewrites part of the bpf_prog_array map code and make it more generic; Patch 3/5 introduces a new bpf map type. This map only stores the pointer to struct perf_event; Patch 4/5 implements function bpf_perf_event_read() that get the selected hardware PMU conuter; Patch 5/5 gives a simple example. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
d74a790d52
14 changed files with 373 additions and 53 deletions
|
@ -246,7 +246,7 @@ static void emit_prologue(u8 **pprog)
|
|||
* goto out;
|
||||
* if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
|
||||
* goto out;
|
||||
* prog = array->prog[index];
|
||||
* prog = array->ptrs[index];
|
||||
* if (prog == NULL)
|
||||
* goto out;
|
||||
* goto *(prog->bpf_func + prologue_size);
|
||||
|
@ -284,9 +284,9 @@ static void emit_bpf_tail_call(u8 **pprog)
|
|||
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
|
||||
EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */
|
||||
|
||||
/* prog = array->prog[index]; */
|
||||
/* prog = array->ptrs[index]; */
|
||||
EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */
|
||||
offsetof(struct bpf_array, prog));
|
||||
offsetof(struct bpf_array, ptrs));
|
||||
EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */
|
||||
|
||||
/* if (prog == NULL)
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <uapi/linux/bpf.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
struct bpf_map;
|
||||
|
||||
|
@ -24,6 +25,10 @@ struct bpf_map_ops {
|
|||
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
|
||||
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
|
||||
int (*map_delete_elem)(struct bpf_map *map, void *key);
|
||||
|
||||
/* funcs called by prog_array and perf_event_array map */
|
||||
void *(*map_fd_get_ptr) (struct bpf_map *map, int fd);
|
||||
void (*map_fd_put_ptr) (void *ptr);
|
||||
};
|
||||
|
||||
struct bpf_map {
|
||||
|
@ -142,13 +147,13 @@ struct bpf_array {
|
|||
bool owner_jited;
|
||||
union {
|
||||
char value[0] __aligned(8);
|
||||
struct bpf_prog *prog[0] __aligned(8);
|
||||
void *ptrs[0] __aligned(8);
|
||||
};
|
||||
};
|
||||
#define MAX_TAIL_CALL_CNT 32
|
||||
|
||||
u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
|
||||
void bpf_prog_array_map_clear(struct bpf_map *map);
|
||||
void bpf_fd_array_map_clear(struct bpf_map *map);
|
||||
bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
|
||||
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
|
||||
|
||||
|
@ -185,6 +190,7 @@ extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
|
|||
extern const struct bpf_func_proto bpf_map_update_elem_proto;
|
||||
extern const struct bpf_func_proto bpf_map_delete_elem_proto;
|
||||
|
||||
extern const struct bpf_func_proto bpf_perf_event_read_proto;
|
||||
extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
|
||||
extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
|
||||
extern const struct bpf_func_proto bpf_tail_call_proto;
|
||||
|
|
|
@ -641,6 +641,8 @@ extern int perf_event_init_task(struct task_struct *child);
|
|||
extern void perf_event_exit_task(struct task_struct *child);
|
||||
extern void perf_event_free_task(struct task_struct *task);
|
||||
extern void perf_event_delayed_put(struct task_struct *task);
|
||||
extern struct perf_event *perf_event_get(unsigned int fd);
|
||||
extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
|
||||
extern void perf_event_print_debug(void);
|
||||
extern void perf_pmu_disable(struct pmu *pmu);
|
||||
extern void perf_pmu_enable(struct pmu *pmu);
|
||||
|
@ -659,6 +661,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
|
|||
void *context);
|
||||
extern void perf_pmu_migrate_context(struct pmu *pmu,
|
||||
int src_cpu, int dst_cpu);
|
||||
extern u64 perf_event_read_local(struct perf_event *event);
|
||||
extern u64 perf_event_read_value(struct perf_event *event,
|
||||
u64 *enabled, u64 *running);
|
||||
|
||||
|
@ -979,6 +982,12 @@ static inline int perf_event_init_task(struct task_struct *child) { return 0; }
|
|||
static inline void perf_event_exit_task(struct task_struct *child) { }
|
||||
static inline void perf_event_free_task(struct task_struct *task) { }
|
||||
static inline void perf_event_delayed_put(struct task_struct *task) { }
|
||||
static inline struct perf_event *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); }
|
||||
static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
|
||||
{
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
static inline u64 perf_event_read_local(struct perf_event *event) { return -EINVAL; }
|
||||
static inline void perf_event_print_debug(void) { }
|
||||
static inline int perf_event_task_disable(void) { return -EINVAL; }
|
||||
static inline int perf_event_task_enable(void) { return -EINVAL; }
|
||||
|
@ -1011,6 +1020,7 @@ static inline void perf_event_enable(struct perf_event *event) { }
|
|||
static inline void perf_event_disable(struct perf_event *event) { }
|
||||
static inline int __perf_event_disable(void *info) { return -1; }
|
||||
static inline void perf_event_task_tick(void) { }
|
||||
static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
|
||||
|
|
|
@ -114,6 +114,7 @@ enum bpf_map_type {
|
|||
BPF_MAP_TYPE_HASH,
|
||||
BPF_MAP_TYPE_ARRAY,
|
||||
BPF_MAP_TYPE_PROG_ARRAY,
|
||||
BPF_MAP_TYPE_PERF_EVENT_ARRAY,
|
||||
};
|
||||
|
||||
enum bpf_prog_type {
|
||||
|
@ -270,6 +271,7 @@ enum bpf_func_id {
|
|||
*/
|
||||
BPF_FUNC_skb_get_tunnel_key,
|
||||
BPF_FUNC_skb_set_tunnel_key,
|
||||
BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */
|
||||
__BPF_FUNC_MAX_ID,
|
||||
};
|
||||
|
||||
|
|
|
@ -150,15 +150,15 @@ static int __init register_array_map(void)
|
|||
}
|
||||
late_initcall(register_array_map);
|
||||
|
||||
static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
|
||||
static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
/* only bpf_prog file descriptors can be stored in prog_array map */
|
||||
/* only file descriptors can be stored in this type of map */
|
||||
if (attr->value_size != sizeof(u32))
|
||||
return ERR_PTR(-EINVAL);
|
||||
return array_map_alloc(attr);
|
||||
}
|
||||
|
||||
static void prog_array_map_free(struct bpf_map *map)
|
||||
static void fd_array_map_free(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
int i;
|
||||
|
@ -167,21 +167,21 @@ static void prog_array_map_free(struct bpf_map *map)
|
|||
|
||||
/* make sure it's empty */
|
||||
for (i = 0; i < array->map.max_entries; i++)
|
||||
BUG_ON(array->prog[i] != NULL);
|
||||
BUG_ON(array->ptrs[i] != NULL);
|
||||
kvfree(array);
|
||||
}
|
||||
|
||||
static void *prog_array_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* only called from syscall */
|
||||
static int prog_array_map_update_elem(struct bpf_map *map, void *key,
|
||||
void *value, u64 map_flags)
|
||||
static int fd_array_map_update_elem(struct bpf_map *map, void *key,
|
||||
void *value, u64 map_flags)
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
struct bpf_prog *prog, *old_prog;
|
||||
void *new_ptr, *old_ptr;
|
||||
u32 index = *(u32 *)key, ufd;
|
||||
|
||||
if (map_flags != BPF_ANY)
|
||||
|
@ -191,57 +191,75 @@ static int prog_array_map_update_elem(struct bpf_map *map, void *key,
|
|||
return -E2BIG;
|
||||
|
||||
ufd = *(u32 *)value;
|
||||
prog = bpf_prog_get(ufd);
|
||||
if (IS_ERR(prog))
|
||||
return PTR_ERR(prog);
|
||||
new_ptr = map->ops->map_fd_get_ptr(map, ufd);
|
||||
if (IS_ERR(new_ptr))
|
||||
return PTR_ERR(new_ptr);
|
||||
|
||||
if (!bpf_prog_array_compatible(array, prog)) {
|
||||
bpf_prog_put(prog);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
old_prog = xchg(array->prog + index, prog);
|
||||
if (old_prog)
|
||||
bpf_prog_put_rcu(old_prog);
|
||||
old_ptr = xchg(array->ptrs + index, new_ptr);
|
||||
if (old_ptr)
|
||||
map->ops->map_fd_put_ptr(old_ptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int prog_array_map_delete_elem(struct bpf_map *map, void *key)
|
||||
static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
struct bpf_prog *old_prog;
|
||||
void *old_ptr;
|
||||
u32 index = *(u32 *)key;
|
||||
|
||||
if (index >= array->map.max_entries)
|
||||
return -E2BIG;
|
||||
|
||||
old_prog = xchg(array->prog + index, NULL);
|
||||
if (old_prog) {
|
||||
bpf_prog_put_rcu(old_prog);
|
||||
old_ptr = xchg(array->ptrs + index, NULL);
|
||||
if (old_ptr) {
|
||||
map->ops->map_fd_put_ptr(old_ptr);
|
||||
return 0;
|
||||
} else {
|
||||
return -ENOENT;
|
||||
}
|
||||
}
|
||||
|
||||
static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd)
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
struct bpf_prog *prog = bpf_prog_get(fd);
|
||||
if (IS_ERR(prog))
|
||||
return prog;
|
||||
|
||||
if (!bpf_prog_array_compatible(array, prog)) {
|
||||
bpf_prog_put(prog);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
return prog;
|
||||
}
|
||||
|
||||
static void prog_fd_array_put_ptr(void *ptr)
|
||||
{
|
||||
struct bpf_prog *prog = ptr;
|
||||
|
||||
bpf_prog_put_rcu(prog);
|
||||
}
|
||||
|
||||
/* decrement refcnt of all bpf_progs that are stored in this map */
|
||||
void bpf_prog_array_map_clear(struct bpf_map *map)
|
||||
void bpf_fd_array_map_clear(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < array->map.max_entries; i++)
|
||||
prog_array_map_delete_elem(map, &i);
|
||||
fd_array_map_delete_elem(map, &i);
|
||||
}
|
||||
|
||||
static const struct bpf_map_ops prog_array_ops = {
|
||||
.map_alloc = prog_array_map_alloc,
|
||||
.map_free = prog_array_map_free,
|
||||
.map_alloc = fd_array_map_alloc,
|
||||
.map_free = fd_array_map_free,
|
||||
.map_get_next_key = array_map_get_next_key,
|
||||
.map_lookup_elem = prog_array_map_lookup_elem,
|
||||
.map_update_elem = prog_array_map_update_elem,
|
||||
.map_delete_elem = prog_array_map_delete_elem,
|
||||
.map_lookup_elem = fd_array_map_lookup_elem,
|
||||
.map_update_elem = fd_array_map_update_elem,
|
||||
.map_delete_elem = fd_array_map_delete_elem,
|
||||
.map_fd_get_ptr = prog_fd_array_get_ptr,
|
||||
.map_fd_put_ptr = prog_fd_array_put_ptr,
|
||||
};
|
||||
|
||||
static struct bpf_map_type_list prog_array_type __read_mostly = {
|
||||
|
@ -255,3 +273,60 @@ static int __init register_prog_array_map(void)
|
|||
return 0;
|
||||
}
|
||||
late_initcall(register_prog_array_map);
|
||||
|
||||
static void perf_event_array_map_free(struct bpf_map *map)
|
||||
{
|
||||
bpf_fd_array_map_clear(map);
|
||||
fd_array_map_free(map);
|
||||
}
|
||||
|
||||
static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
|
||||
{
|
||||
struct perf_event *event;
|
||||
const struct perf_event_attr *attr;
|
||||
|
||||
event = perf_event_get(fd);
|
||||
if (IS_ERR(event))
|
||||
return event;
|
||||
|
||||
attr = perf_event_attrs(event);
|
||||
if (IS_ERR(attr))
|
||||
return (void *)attr;
|
||||
|
||||
if (attr->type != PERF_TYPE_RAW &&
|
||||
attr->type != PERF_TYPE_HARDWARE) {
|
||||
perf_event_release_kernel(event);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
return event;
|
||||
}
|
||||
|
||||
static void perf_event_fd_array_put_ptr(void *ptr)
|
||||
{
|
||||
struct perf_event *event = ptr;
|
||||
|
||||
perf_event_release_kernel(event);
|
||||
}
|
||||
|
||||
static const struct bpf_map_ops perf_event_array_ops = {
|
||||
.map_alloc = fd_array_map_alloc,
|
||||
.map_free = perf_event_array_map_free,
|
||||
.map_get_next_key = array_map_get_next_key,
|
||||
.map_lookup_elem = fd_array_map_lookup_elem,
|
||||
.map_update_elem = fd_array_map_update_elem,
|
||||
.map_delete_elem = fd_array_map_delete_elem,
|
||||
.map_fd_get_ptr = perf_event_fd_array_get_ptr,
|
||||
.map_fd_put_ptr = perf_event_fd_array_put_ptr,
|
||||
};
|
||||
|
||||
static struct bpf_map_type_list perf_event_array_type __read_mostly = {
|
||||
.ops = &perf_event_array_ops,
|
||||
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
|
||||
};
|
||||
|
||||
static int __init register_perf_event_array_map(void)
|
||||
{
|
||||
bpf_register_map_type(&perf_event_array_type);
|
||||
return 0;
|
||||
}
|
||||
late_initcall(register_perf_event_array_map);
|
||||
|
|
|
@ -450,7 +450,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
|
|||
|
||||
tail_call_cnt++;
|
||||
|
||||
prog = READ_ONCE(array->prog[index]);
|
||||
prog = READ_ONCE(array->ptrs[index]);
|
||||
if (unlikely(!prog))
|
||||
goto out;
|
||||
|
||||
|
|
|
@ -72,7 +72,7 @@ static int bpf_map_release(struct inode *inode, struct file *filp)
|
|||
/* prog_array stores refcnt-ed bpf_prog pointers
|
||||
* release them all when user space closes prog_array_fd
|
||||
*/
|
||||
bpf_prog_array_map_clear(map);
|
||||
bpf_fd_array_map_clear(map);
|
||||
|
||||
bpf_map_put(map);
|
||||
return 0;
|
||||
|
|
|
@ -238,6 +238,14 @@ static const char * const reg_type_str[] = {
|
|||
[CONST_IMM] = "imm",
|
||||
};
|
||||
|
||||
static const struct {
|
||||
int map_type;
|
||||
int func_id;
|
||||
} func_limit[] = {
|
||||
{BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
|
||||
{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
|
||||
};
|
||||
|
||||
static void print_verifier_state(struct verifier_env *env)
|
||||
{
|
||||
enum bpf_reg_type t;
|
||||
|
@ -837,6 +845,28 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
|
|||
return err;
|
||||
}
|
||||
|
||||
static int check_map_func_compatibility(struct bpf_map *map, int func_id)
|
||||
{
|
||||
bool bool_map, bool_func;
|
||||
int i;
|
||||
|
||||
if (!map)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i <= ARRAY_SIZE(func_limit); i++) {
|
||||
bool_map = (map->map_type == func_limit[i].map_type);
|
||||
bool_func = (func_id == func_limit[i].func_id);
|
||||
/* only when map & func pair match it can continue.
|
||||
* don't allow any other map type to be passed into
|
||||
* the special func;
|
||||
*/
|
||||
if (bool_map != bool_func)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_call(struct verifier_env *env, int func_id)
|
||||
{
|
||||
struct verifier_state *state = &env->cur_state;
|
||||
|
@ -912,21 +942,9 @@ static int check_call(struct verifier_env *env, int func_id)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (map && map->map_type == BPF_MAP_TYPE_PROG_ARRAY &&
|
||||
func_id != BPF_FUNC_tail_call)
|
||||
/* prog_array map type needs extra care:
|
||||
* only allow to pass it into bpf_tail_call() for now.
|
||||
* bpf_map_delete_elem() can be allowed in the future,
|
||||
* while bpf_map_update_elem() must only be done via syscall
|
||||
*/
|
||||
return -EINVAL;
|
||||
|
||||
if (func_id == BPF_FUNC_tail_call &&
|
||||
map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
|
||||
/* don't allow any other map type to be passed into
|
||||
* bpf_tail_call()
|
||||
*/
|
||||
return -EINVAL;
|
||||
err = check_map_func_compatibility(map, func_id);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -3212,6 +3212,59 @@ static inline u64 perf_event_count(struct perf_event *event)
|
|||
return __perf_event_count(event);
|
||||
}
|
||||
|
||||
/*
|
||||
* NMI-safe method to read a local event, that is an event that
|
||||
* is:
|
||||
* - either for the current task, or for this CPU
|
||||
* - does not have inherit set, for inherited task events
|
||||
* will not be local and we cannot read them atomically
|
||||
* - must not have a pmu::count method
|
||||
*/
|
||||
u64 perf_event_read_local(struct perf_event *event)
|
||||
{
|
||||
unsigned long flags;
|
||||
u64 val;
|
||||
|
||||
/*
|
||||
* Disabling interrupts avoids all counter scheduling (context
|
||||
* switches, timer based rotation and IPIs).
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
|
||||
/* If this is a per-task event, it must be for current */
|
||||
WARN_ON_ONCE((event->attach_state & PERF_ATTACH_TASK) &&
|
||||
event->hw.target != current);
|
||||
|
||||
/* If this is a per-CPU event, it must be for this CPU */
|
||||
WARN_ON_ONCE(!(event->attach_state & PERF_ATTACH_TASK) &&
|
||||
event->cpu != smp_processor_id());
|
||||
|
||||
/*
|
||||
* It must not be an event with inherit set, we cannot read
|
||||
* all child counters from atomic context.
|
||||
*/
|
||||
WARN_ON_ONCE(event->attr.inherit);
|
||||
|
||||
/*
|
||||
* It must not have a pmu::count method, those are not
|
||||
* NMI safe.
|
||||
*/
|
||||
WARN_ON_ONCE(event->pmu->count);
|
||||
|
||||
/*
|
||||
* If the event is currently on this CPU, its either a per-task event,
|
||||
* or local to this CPU. Furthermore it means its ACTIVE (otherwise
|
||||
* oncpu == -1).
|
||||
*/
|
||||
if (event->oncpu == smp_processor_id())
|
||||
event->pmu->read(event);
|
||||
|
||||
val = local64_read(&event->count);
|
||||
local_irq_restore(flags);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static u64 perf_event_read(struct perf_event *event)
|
||||
{
|
||||
/*
|
||||
|
@ -8574,6 +8627,31 @@ void perf_event_delayed_put(struct task_struct *task)
|
|||
WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
|
||||
}
|
||||
|
||||
struct perf_event *perf_event_get(unsigned int fd)
|
||||
{
|
||||
int err;
|
||||
struct fd f;
|
||||
struct perf_event *event;
|
||||
|
||||
err = perf_fget_light(fd, &f);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
event = f.file->private_data;
|
||||
atomic_long_inc(&event->refcount);
|
||||
fdput(f);
|
||||
|
||||
return event;
|
||||
}
|
||||
|
||||
const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
|
||||
{
|
||||
if (!event)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
return &event->attr;
|
||||
}
|
||||
|
||||
/*
|
||||
* inherit a event from parent task to child task:
|
||||
*/
|
||||
|
|
|
@ -158,6 +158,35 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
|
|||
return &bpf_trace_printk_proto;
|
||||
}
|
||||
|
||||
static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
|
||||
{
|
||||
struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
struct perf_event *event;
|
||||
|
||||
if (unlikely(index >= array->map.max_entries))
|
||||
return -E2BIG;
|
||||
|
||||
event = (struct perf_event *)array->ptrs[index];
|
||||
if (!event)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* we don't know if the function is run successfully by the
|
||||
* return value. It can be judged in other places, such as
|
||||
* eBPF programs.
|
||||
*/
|
||||
return perf_event_read_local(event);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_perf_event_read_proto = {
|
||||
.func = bpf_perf_event_read,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
|
||||
{
|
||||
switch (func_id) {
|
||||
|
@ -183,6 +212,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
|
|||
return bpf_get_trace_printk_proto();
|
||||
case BPF_FUNC_get_smp_processor_id:
|
||||
return &bpf_get_smp_processor_id_proto;
|
||||
case BPF_FUNC_perf_event_read:
|
||||
return &bpf_perf_event_read_proto;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@ hostprogs-y += tracex2
|
|||
hostprogs-y += tracex3
|
||||
hostprogs-y += tracex4
|
||||
hostprogs-y += tracex5
|
||||
hostprogs-y += tracex6
|
||||
hostprogs-y += lathist
|
||||
|
||||
test_verifier-objs := test_verifier.o libbpf.o
|
||||
|
@ -25,6 +26,7 @@ tracex2-objs := bpf_load.o libbpf.o tracex2_user.o
|
|||
tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
|
||||
tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
|
||||
tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
|
||||
tracex6-objs := bpf_load.o libbpf.o tracex6_user.o
|
||||
lathist-objs := bpf_load.o libbpf.o lathist_user.o
|
||||
|
||||
# Tell kbuild to always build the programs
|
||||
|
@ -37,6 +39,7 @@ always += tracex2_kern.o
|
|||
always += tracex3_kern.o
|
||||
always += tracex4_kern.o
|
||||
always += tracex5_kern.o
|
||||
always += tracex6_kern.o
|
||||
always += tcbpf1_kern.o
|
||||
always += lathist_kern.o
|
||||
|
||||
|
@ -51,6 +54,7 @@ HOSTLOADLIBES_tracex2 += -lelf
|
|||
HOSTLOADLIBES_tracex3 += -lelf
|
||||
HOSTLOADLIBES_tracex4 += -lelf -lrt
|
||||
HOSTLOADLIBES_tracex5 += -lelf
|
||||
HOSTLOADLIBES_tracex6 += -lelf
|
||||
HOSTLOADLIBES_lathist += -lelf
|
||||
|
||||
# point this to your LLVM backend with bpf support
|
||||
|
|
|
@ -31,6 +31,8 @@ static unsigned long long (*bpf_get_current_uid_gid)(void) =
|
|||
(void *) BPF_FUNC_get_current_uid_gid;
|
||||
static int (*bpf_get_current_comm)(void *buf, int buf_size) =
|
||||
(void *) BPF_FUNC_get_current_comm;
|
||||
static int (*bpf_perf_event_read)(void *map, int index) =
|
||||
(void *) BPF_FUNC_perf_event_read;
|
||||
|
||||
/* llvm builtin functions that eBPF C program may use to
|
||||
* emit BPF_LD_ABS and BPF_LD_IND instructions
|
||||
|
|
26
samples/bpf/tracex6_kern.c
Normal file
26
samples/bpf/tracex6_kern.c
Normal file
|
@ -0,0 +1,26 @@
|
|||
#include <linux/version.h>
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include "bpf_helpers.h"
|
||||
|
||||
struct bpf_map_def SEC("maps") my_map = {
|
||||
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(u32),
|
||||
.max_entries = 32,
|
||||
};
|
||||
|
||||
SEC("kprobe/sys_write")
|
||||
int bpf_prog1(struct pt_regs *ctx)
|
||||
{
|
||||
u64 count;
|
||||
u32 key = bpf_get_smp_processor_id();
|
||||
char fmt[] = "CPU-%d %llu\n";
|
||||
|
||||
count = bpf_perf_event_read(&my_map, key);
|
||||
bpf_trace_printk(fmt, sizeof(fmt), key, count);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
u32 _version SEC("version") = LINUX_VERSION_CODE;
|
68
samples/bpf/tracex6_user.c
Normal file
68
samples/bpf/tracex6_user.c
Normal file
|
@ -0,0 +1,68 @@
|
|||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
#include <poll.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/bpf.h>
|
||||
#include "libbpf.h"
|
||||
#include "bpf_load.h"
|
||||
|
||||
#define SAMPLE_PERIOD 0x7fffffffffffffffULL
|
||||
|
||||
static void test_bpf_perf_event(void)
|
||||
{
|
||||
int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
|
||||
int *pmu_fd = malloc(nr_cpus * sizeof(int));
|
||||
unsigned long value;
|
||||
int i;
|
||||
|
||||
struct perf_event_attr attr_insn_pmu = {
|
||||
.freq = 0,
|
||||
.sample_period = SAMPLE_PERIOD,
|
||||
.inherit = 0,
|
||||
.type = PERF_TYPE_HARDWARE,
|
||||
.read_format = 0,
|
||||
.sample_type = 0,
|
||||
.config = 0,/* PMU: cycles */
|
||||
};
|
||||
|
||||
for (i = 0; i < nr_cpus; i++) {
|
||||
pmu_fd[i] = perf_event_open(&attr_insn_pmu, -1/*pid*/, i/*cpu*/, -1/*group_fd*/, 0);
|
||||
if (pmu_fd[i] < 0)
|
||||
printf("event syscall failed\n");
|
||||
|
||||
bpf_update_elem(map_fd[0], &i, &pmu_fd[i], BPF_ANY);
|
||||
ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0);
|
||||
}
|
||||
|
||||
system("ls");
|
||||
system("pwd");
|
||||
system("sleep 2");
|
||||
|
||||
for (i = 0; i < nr_cpus; i++)
|
||||
close(pmu_fd[i]);
|
||||
|
||||
close(map_fd);
|
||||
|
||||
free(pmu_fd);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
char filename[256];
|
||||
|
||||
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
|
||||
|
||||
if (load_bpf_file(filename)) {
|
||||
printf("%s", bpf_log_buf);
|
||||
return 1;
|
||||
}
|
||||
|
||||
test_bpf_perf_event();
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in a new issue