powerpc fixes for 5.17 #2

- A series of bpf fixes, including an oops fix and some codegen fixes.
 
  - Fix a regression in syscall_get_arch() for compat processes.
 
  - Fix boot failure on some 32-bit systems with KASAN enabled.
 
  - A couple of other build/minor fixes.
 
 Thanks to: Athira Rajeev, Christophe Leroy, Dmitry V. Levin, Jiri Olsa, Johan Almbladh,
 Maxime Bizon, Naveen N. Rao, Nicholas Piggin.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCAAxFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAmHtOJgTHG1wZUBlbGxl
 cm1hbi5pZC5hdQAKCRBR6+o8yOGlgLMeD/9eBHqMy+yhXfTw9mjAf8rVlsdY6Xjq
 2LYgHCJpeP0VjxBm8GcyFtAfJc778LyykU18xsi0WVsuwPmVdVwDfZpzxWJrYqeq
 6AuK9uRZAbEx6gZVCsWlk3Em7GyUEqHm5PTxKsDfbzh+hBTNQ8CcKKbCVmEZ+loA
 nWdaWJOZMpE4hQOx0Ph5JRAos/ZUEt2I57qZkRgvIEV+hroFrmdP1rnncdalnLHN
 pXsEn380Q+rpH+48HIpaTpl5+R8CvgFnnlI6/luZC6HoA8K6jph8FdOkkI9eU1v6
 TgdNMiNVIe1wRFzVp+nH7SGqL3l9X/UPS57hL52X+ccpHoioBW9CLPha06g/+UKT
 kJU3mYZg3jvQGrM9H/2R3P97UH9SwHE5SH0xcXj3ItH1NpOKpMO0h5UuWx+Ze8Wo
 zO80xbYLbALtEfx8JMCf95rnUVFL0n55xVJN+7+fs2hU3Be/ZpIyHfDujlHlbd1m
 o3KRhkg1a0RyGsun6Gd2vwLGaBbwYNWvgHM9IDeiAFdJ7HFpp1YrVGpQRkAIUbJS
 bzYiRczjbSof7c9WqLyKNatgNFO/JPI5vi1iNQ3u9Oh9fzjpv5Zok4vK534mOnqU
 QnhiqLJbY+yBpoAvSePGQTCCzDAnJq0r53n0d1O/2T1GcpX9MKM4YlL+Wu4m0yLn
 RcbOCTkmoqWZEw==
 =KjWT
 -----END PGP SIGNATURE-----

Merge tag 'powerpc-5.17-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:

 - A series of bpf fixes, including an oops fix and some codegen fixes.

 - Fix a regression in syscall_get_arch() for compat processes.

 - Fix boot failure on some 32-bit systems with KASAN enabled.

 - A couple of other build/minor fixes.

Thanks to Athira Rajeev, Christophe Leroy, Dmitry V. Levin, Jiri Olsa,
Johan Almbladh, Maxime Bizon, Naveen N. Rao, and Nicholas Piggin.

* tag 'powerpc-5.17-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/64s: Mask SRR0 before checking against the masked NIP
  powerpc/perf: Only define power_pmu_wants_prompt_pmi() for CONFIG_PPC64
  powerpc/32s: Fix kasan_init_region() for KASAN
  powerpc/time: Fix build failure due to do_hard_irq_enable() on PPC32
  powerpc/audit: Fix syscall_get_arch()
  powerpc64/bpf: Limit 'ldbrx' to processors compliant with ISA v2.06
  tools/bpf: Rename 'struct event' to avoid naming conflict
  powerpc/bpf: Update ldimm64 instructions during extra pass
  powerpc32/bpf: Fix codegen for bpf-to-bpf calls
  bpf: Guard against accessing NULL pt_regs in bpf_get_task_stack()
This commit is contained in:
Linus Torvalds 2022-01-23 17:52:42 +02:00
commit dd81e1c7d5
16 changed files with 129 additions and 85 deletions

View file

@ -223,6 +223,8 @@ static __always_inline void update_user_segments(u32 val)
update_user_segment(15, val);
}
int __init find_free_bat(void);
unsigned int bat_block_size(unsigned long base, unsigned long top);
#endif /* !__ASSEMBLY__ */
/* We happily ignore the smaller BATs on 601, we don't actually use

View file

@ -473,7 +473,7 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
return !(regs->msr & MSR_EE);
}
static inline bool should_hard_irq_enable(void)
static __always_inline bool should_hard_irq_enable(void)
{
return false;
}

View file

@ -500,6 +500,7 @@
#define PPC_RAW_LDX(r, base, b) (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_RAW_LHZ(r, base, i) (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
#define PPC_RAW_LHBRX(r, base, b) (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_RAW_LWBRX(r, base, b) (0x7c00042c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_RAW_LDBRX(r, base, b) (0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_RAW_STWCX(s, a, b) (0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_RAW_CMPWI(a, i) (0x2c000000 | ___PPC_RA(a) | IMM_L(i))

View file

@ -90,7 +90,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
unsigned long val, mask = -1UL;
unsigned int n = 6;
if (is_32bit_task())
if (is_tsk_32bit_task(task))
mask = 0xffffffff;
while (n--) {
@ -105,7 +105,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
static inline int syscall_get_arch(struct task_struct *task)
{
if (is_32bit_task())
if (is_tsk_32bit_task(task))
return AUDIT_ARCH_PPC;
else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
return AUDIT_ARCH_PPC64LE;

View file

@ -168,8 +168,10 @@ static inline bool test_thread_local_flags(unsigned int flags)
#ifdef CONFIG_COMPAT
#define is_32bit_task() (test_thread_flag(TIF_32BIT))
#define is_tsk_32bit_task(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT))
#else
#define is_32bit_task() (IS_ENABLED(CONFIG_PPC32))
#define is_tsk_32bit_task(tsk) (IS_ENABLED(CONFIG_PPC32))
#endif
#if defined(CONFIG_PPC64)

View file

@ -30,6 +30,7 @@ COMPAT_SYS_CALL_TABLE:
.ifc \srr,srr
mfspr r11,SPRN_SRR0
ld r12,_NIP(r1)
clrrdi r11,r11,2
clrrdi r12,r12,2
100: tdne r11,r12
EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
@ -40,6 +41,7 @@ COMPAT_SYS_CALL_TABLE:
.else
mfspr r11,SPRN_HSRR0
ld r12,_NIP(r1)
clrrdi r11,r11,2
clrrdi r12,r12,2
100: tdne r11,r12
EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)

View file

@ -76,7 +76,7 @@ unsigned long p_block_mapped(phys_addr_t pa)
return 0;
}
static int __init find_free_bat(void)
int __init find_free_bat(void)
{
int b;
int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
@ -100,7 +100,7 @@ static int __init find_free_bat(void)
* - block size has to be a power of two. This is calculated by finding the
* highest bit set to 1.
*/
static unsigned int block_size(unsigned long base, unsigned long top)
unsigned int bat_block_size(unsigned long base, unsigned long top)
{
unsigned int max_size = SZ_256M;
unsigned int base_shift = (ffs(base) - 1) & 31;
@ -145,7 +145,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to
int idx;
while ((idx = find_free_bat()) != -1 && base != top) {
unsigned int size = block_size(base, top);
unsigned int size = bat_block_size(base, top);
if (size < 128 << 10)
break;
@ -201,12 +201,12 @@ void mmu_mark_initmem_nx(void)
unsigned long size;
for (i = 0; i < nb - 1 && base < top;) {
size = block_size(base, top);
size = bat_block_size(base, top);
setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT);
base += size;
}
if (base < top) {
size = block_size(base, top);
size = bat_block_size(base, top);
if ((top - base) > size) {
size <<= 1;
if (strict_kernel_rwx_enabled() && base + size > border)

View file

@ -10,48 +10,51 @@ int __init kasan_init_region(void *start, size_t size)
{
unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
unsigned long k_cur = k_start;
int k_size = k_end - k_start;
int k_size_base = 1 << (ffs(k_size) - 1);
unsigned long k_nobat = k_start;
unsigned long k_cur;
phys_addr_t phys;
int ret;
void *block;
block = memblock_alloc(k_size, k_size_base);
while (k_nobat < k_end) {
unsigned int k_size = bat_block_size(k_nobat, k_end);
int idx = find_free_bat();
if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) {
int shift = ffs(k_size - k_size_base);
int k_size_more = shift ? 1 << (shift - 1) : 0;
if (idx == -1)
break;
if (k_size < SZ_128K)
break;
phys = memblock_phys_alloc_range(k_size, k_size, 0,
MEMBLOCK_ALLOC_ANYWHERE);
if (!phys)
break;
setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL);
if (k_size_more >= SZ_128K)
setbat(-1, k_start + k_size_base, __pa(block) + k_size_base,
k_size_more, PAGE_KERNEL);
if (v_block_mapped(k_start))
k_cur = k_start + k_size_base;
if (v_block_mapped(k_start + k_size_base))
k_cur = k_start + k_size_base + k_size_more;
update_bats();
setbat(idx, k_nobat, phys, k_size, PAGE_KERNEL);
k_nobat += k_size;
}
if (k_nobat != k_start)
update_bats();
if (!block)
block = memblock_alloc(k_size, PAGE_SIZE);
if (!block)
return -ENOMEM;
if (k_nobat < k_end) {
phys = memblock_phys_alloc_range(k_end - k_nobat, PAGE_SIZE, 0,
MEMBLOCK_ALLOC_ANYWHERE);
if (!phys)
return -ENOMEM;
}
ret = kasan_init_shadow_page_tables(k_start, k_end);
if (ret)
return ret;
kasan_update_early_region(k_start, k_cur, __pte(0));
kasan_update_early_region(k_start, k_nobat, __pte(0));
for (; k_cur < k_end; k_cur += PAGE_SIZE) {
for (k_cur = k_nobat; k_cur < k_end; k_cur += PAGE_SIZE) {
pmd_t *pmd = pmd_off_k(k_cur);
void *va = block + k_cur - k_start;
pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
pte_t pte = pfn_pte(PHYS_PFN(phys + k_cur - k_nobat), PAGE_KERNEL);
__set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
}
flush_tlb_kernel_range(k_start, k_end);
memset(kasan_mem_to_shadow(start), 0, k_end - k_start);
return 0;
}

View file

@ -23,15 +23,15 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
}
/* Fix the branch target addresses for subprog calls */
static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
struct codegen_context *ctx, u32 *addrs)
/* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */
static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image,
struct codegen_context *ctx, u32 *addrs)
{
const struct bpf_insn *insn = fp->insnsi;
bool func_addr_fixed;
u64 func_addr;
u32 tmp_idx;
int i, ret;
int i, j, ret;
for (i = 0; i < fp->len; i++) {
/*
@ -66,6 +66,23 @@ static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
* of the JITed sequence remains unchanged.
*/
ctx->idx = tmp_idx;
} else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) {
tmp_idx = ctx->idx;
ctx->idx = addrs[i] / 4;
#ifdef CONFIG_PPC32
PPC_LI32(ctx->b2p[insn[i].dst_reg] - 1, (u32)insn[i + 1].imm);
PPC_LI32(ctx->b2p[insn[i].dst_reg], (u32)insn[i].imm);
for (j = ctx->idx - addrs[i] / 4; j < 4; j++)
EMIT(PPC_RAW_NOP());
#else
func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32);
PPC_LI64(b2p[insn[i].dst_reg], func_addr);
/* overwrite rest with nops */
for (j = ctx->idx - addrs[i] / 4; j < 5; j++)
EMIT(PPC_RAW_NOP());
#endif
ctx->idx = tmp_idx;
i++;
}
}
@ -200,13 +217,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
/*
* Do not touch the prologue and epilogue as they will remain
* unchanged. Only fix the branch target address for subprog
* calls in the body.
* calls in the body, and ldimm64 instructions.
*
* This does not change the offsets and lengths of the subprog
* call instruction sequences and hence, the size of the JITed
* image as well.
*/
bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);
bpf_jit_fixup_addresses(fp, code_base, &cgctx, addrs);
/* There is no need to perform the usual passes. */
goto skip_codegen_passes;

View file

@ -191,6 +191,9 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun
if (image && rel < 0x2000000 && rel >= -0x2000000) {
PPC_BL_ABS(func);
EMIT(PPC_RAW_NOP());
EMIT(PPC_RAW_NOP());
EMIT(PPC_RAW_NOP());
} else {
/* Load function address into r0 */
EMIT(PPC_RAW_LIS(_R0, IMM_H(func)));
@ -290,6 +293,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
bool func_addr_fixed;
u64 func_addr;
u32 true_cond;
u32 tmp_idx;
int j;
/*
* addrs[] maps a BPF bytecode address into a real offset from
@ -905,8 +910,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
* 16 byte instruction that uses two 'struct bpf_insn'
*/
case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
tmp_idx = ctx->idx;
PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm);
PPC_LI32(dst_reg, (u32)insn[i].imm);
/* padding to allow full 4 instructions for later patching */
for (j = ctx->idx - tmp_idx; j < 4; j++)
EMIT(PPC_RAW_NOP());
/* Adjust for two bpf instructions */
addrs[++i] = ctx->idx * 4;
break;

View file

@ -319,6 +319,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
u64 imm64;
u32 true_cond;
u32 tmp_idx;
int j;
/*
* addrs[] maps a BPF bytecode address into a real offset from
@ -633,17 +634,21 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1]));
break;
case 64:
/*
* Way easier and faster(?) to store the value
* into stack and then use ldbrx
*
* ctx->seen will be reliable in pass2, but
* the instructions generated will remain the
* same across all passes
*/
/* Store the value to stack and then use byte-reverse loads */
PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)));
EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1]));
if (cpu_has_feature(CPU_FTR_ARCH_206)) {
EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1]));
} else {
EMIT(PPC_RAW_LWBRX(dst_reg, 0, b2p[TMP_REG_1]));
if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32));
EMIT(PPC_RAW_LI(b2p[TMP_REG_2], 4));
EMIT(PPC_RAW_LWBRX(b2p[TMP_REG_2], b2p[TMP_REG_2], b2p[TMP_REG_1]));
if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
EMIT(PPC_RAW_SLDI(b2p[TMP_REG_2], b2p[TMP_REG_2], 32));
EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_2]));
}
break;
}
break;
@ -848,9 +853,13 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
imm64 = ((u64)(u32) insn[i].imm) |
(((u64)(u32) insn[i+1].imm) << 32);
tmp_idx = ctx->idx;
PPC_LI64(dst_reg, imm64);
/* padding to allow full 5 instructions for later patching */
for (j = ctx->idx - tmp_idx; j < 5; j++)
EMIT(PPC_RAW_NOP());
/* Adjust for two bpf instructions */
addrs[++i] = ctx->idx * 4;
PPC_LI64(dst_reg, imm64);
break;
/*

View file

@ -776,6 +776,34 @@ static void pmao_restore_workaround(bool ebb)
mtspr(SPRN_PMC6, pmcs[5]);
}
/*
* If the perf subsystem wants performance monitor interrupts as soon as
* possible (e.g., to sample the instruction address and stack chain),
* this should return true. The IRQ masking code can then enable MSR[EE]
* in some places (e.g., interrupt handlers) that allows PMI interrupts
* through to improve accuracy of profiles, at the cost of some performance.
*
* The PMU counters can be enabled by other means (e.g., sysfs raw SPR
* access), but in that case there is no need for prompt PMI handling.
*
* This currently returns true if any perf counter is being used. It
* could possibly return false if only events are being counted rather than
* samples being taken, but for now this is good enough.
*/
bool power_pmu_wants_prompt_pmi(void)
{
struct cpu_hw_events *cpuhw;
/*
* This could simply test local_paca->pmcregs_in_use if that were not
* under ifdef KVM.
*/
if (!ppmu)
return false;
cpuhw = this_cpu_ptr(&cpu_hw_events);
return cpuhw->n_events;
}
#endif /* CONFIG_PPC64 */
static void perf_event_interrupt(struct pt_regs *regs);
@ -2438,36 +2466,6 @@ static void perf_event_interrupt(struct pt_regs *regs)
perf_sample_event_took(sched_clock() - start_clock);
}
/*
* If the perf subsystem wants performance monitor interrupts as soon as
* possible (e.g., to sample the instruction address and stack chain),
* this should return true. The IRQ masking code can then enable MSR[EE]
* in some places (e.g., interrupt handlers) that allows PMI interrupts
* though to improve accuracy of profiles, at the cost of some performance.
*
* The PMU counters can be enabled by other means (e.g., sysfs raw SPR
* access), but in that case there is no need for prompt PMI handling.
*
* This currently returns true if any perf counter is being used. It
* could possibly return false if only events are being counted rather than
* samples being taken, but for now this is good enough.
*/
bool power_pmu_wants_prompt_pmi(void)
{
struct cpu_hw_events *cpuhw;
/*
* This could simply test local_paca->pmcregs_in_use if that were not
* under ifdef KVM.
*/
if (!ppmu)
return false;
cpuhw = this_cpu_ptr(&cpu_hw_events);
return cpuhw->n_events;
}
static int power_pmu_prepare_cpu(unsigned int cpu)
{
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);

View file

@ -472,13 +472,14 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
u32, size, u64, flags)
{
struct pt_regs *regs;
long res;
long res = -EINVAL;
if (!try_get_task_stack(task))
return -EFAULT;
regs = task_pt_regs(task);
res = __bpf_get_stack(regs, task, NULL, buf, size, flags);
if (regs)
res = __bpf_get_stack(regs, task, NULL, buf, size, flags);
put_task_stack(task);
return res;

View file

@ -68,7 +68,7 @@ int handle__sched_switch(u64 *ctx)
*/
struct task_struct *prev = (struct task_struct *)ctx[1];
struct task_struct *next = (struct task_struct *)ctx[2];
struct event event = {};
struct runq_event event = {};
u64 *tsp, delta_us;
long state;
u32 pid;

View file

@ -100,7 +100,7 @@ static int bump_memlock_rlimit(void)
void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
{
const struct event *e = data;
const struct runq_event *e = data;
struct tm *tm;
char ts[32];
time_t t;

View file

@ -4,7 +4,7 @@
#define TASK_COMM_LEN 16
struct event {
struct runq_event {
char task[TASK_COMM_LEN];
__u64 delta_us;
pid_t pid;