linux/arch/arm64/kernel/perf_regs.c
James Clark cbb0c02caf perf: arm64: Add SVE vector granule register to user regs
Dwarf based unwinding in a function that pushes SVE registers onto
the stack requires the unwinder to know the length of the SVE register
to calculate the stack offsets correctly. This was added to the Arm
specific Dwarf spec as the VG pseudo register[1].

Add the vector length at position 46 if it's requested by userspace and
SVE is supported. If it's not supported then fail to open the event.

The vector length must be on each sample because it can be changed
at runtime via a prctl or ptrace call. Also by adding it as a register
rather than a separate attribute, minimal changes will be required in an
unwinder that already indexes into the register list.

[1]: https://github.com/ARM-software/abi-aa/blob/main/aadwarf64/aadwarf64.rst

Reviewed-by: Mark Brown <broonie@kernel.org>
Signed-off-by: James Clark <james.clark@arm.com>
Link: https://lore.kernel.org/r/20220901132658.1024635-2-james.clark@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2022-09-22 15:06:02 +01:00

107 lines
2.8 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/compat.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/perf_event.h>
#include <linux/bug.h>
#include <linux/sched/task_stack.h>
#include <asm/perf_regs.h>
#include <asm/ptrace.h>
static u64 perf_ext_regs_value(int idx)
{
switch (idx) {
case PERF_REG_ARM64_VG:
if (WARN_ON_ONCE(!system_supports_sve()))
return 0;
/*
* Vector granule is current length in bits of SVE registers
* divided by 64.
*/
return (task_get_sve_vl(current) * 8) / 64;
default:
WARN_ON_ONCE(true);
return 0;
}
}
u64 perf_reg_value(struct pt_regs *regs, int idx)
{
if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_EXTENDED_MAX))
return 0;
/*
* Our handling of compat tasks (PERF_SAMPLE_REGS_ABI_32) is weird, but
* we're stuck with it for ABI compatibility reasons.
*
* For a 32-bit consumer inspecting a 32-bit task, then it will look at
* the first 16 registers (see arch/arm/include/uapi/asm/perf_regs.h).
* These correspond directly to a prefix of the registers saved in our
* 'struct pt_regs', with the exception of the PC, so we copy that down
* (x15 corresponds to SP_hyp in the architecture).
*
* So far, so good.
*
* The oddity arises when a 64-bit consumer looks at a 32-bit task and
* asks for registers beyond PERF_REG_ARM_MAX. In this case, we return
* SP_usr, LR_usr and PC in the positions where the AArch64 SP, LR and
* PC registers would normally live. The initial idea was to allow a
* 64-bit unwinder to unwind a 32-bit task and, although it's not clear
* how well that works in practice, somebody might be relying on it.
*
* At the time we make a sample, we don't know whether the consumer is
* 32-bit or 64-bit, so we have to cater for both possibilities.
*/
if (compat_user_mode(regs)) {
if ((u32)idx == PERF_REG_ARM64_SP)
return regs->compat_sp;
if ((u32)idx == PERF_REG_ARM64_LR)
return regs->compat_lr;
if (idx == 15)
return regs->pc;
}
if ((u32)idx == PERF_REG_ARM64_SP)
return regs->sp;
if ((u32)idx == PERF_REG_ARM64_PC)
return regs->pc;
if ((u32)idx >= PERF_REG_ARM64_MAX)
return perf_ext_regs_value(idx);
return regs->regs[idx];
}
#define REG_RESERVED (~((1ULL << PERF_REG_ARM64_MAX) - 1))
int perf_reg_validate(u64 mask)
{
u64 reserved_mask = REG_RESERVED;
if (system_supports_sve())
reserved_mask &= ~(1ULL << PERF_REG_ARM64_VG);
if (!mask || mask & reserved_mask)
return -EINVAL;
return 0;
}
u64 perf_reg_abi(struct task_struct *task)
{
if (is_compat_thread(task_thread_info(task)))
return PERF_SAMPLE_REGS_ABI_32;
else
return PERF_SAMPLE_REGS_ABI_64;
}
void perf_get_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs)
{
regs_user->regs = task_pt_regs(current);
regs_user->abi = perf_reg_abi(current);
}